| 9 | | my %App = ( |
|---|
| 10 | | identify => '/usr/bin/identify', |
|---|
| 11 | | ppmhist => '/usr/local/netpbm/bin/ppmhist', |
|---|
| 12 | | jpegtopnm => '/usr/local/netpbm/bin/jpegtopnm', |
|---|
| 13 | | giftopnm => '/usr/local/netpbm/bin/giftopnm', |
|---|
| 14 | | pngtopnm => '/usr/local/netpbm/bin/pngtopnm', |
|---|
| 15 | | bmptopnm => '/usr/local/netpbm/bin/bmptopnm', |
|---|
| 16 | | ); |
|---|
| | 9 | # defaults |
|---|
| | 10 | my $cfgfile = "FuzzyOcr.cf"; |
|---|
| | 11 | my %App; |
|---|
| | 12 | my @bin_utils = qw/pamfile ppmhist jpegtopnm giftopnm pngtopnm bmptopnm/; |
|---|
| | 13 | foreach (@bin_utils) { |
|---|
| | 14 | $App{$_} = "/usr/bin/$_"; |
|---|
| | 15 | } |
|---|
| | 27 | |
|---|
| | 28 | unless (@ARGV) { |
|---|
| | 29 | print "Usage: fuzzy-find.pl [Options] (imagehash|imagefile) \n"; |
|---|
| | 30 | print "\n"; |
|---|
| | 31 | print "Available options:\n"; |
|---|
| | 32 | print "--delete Removes the hash from the database\n"; |
|---|
| | 33 | print "--learn-ham Add the hash as ham to the database\n"; |
|---|
| | 34 | print "--learn-spam Add the hash as spam to the database\n"; |
|---|
| | 35 | print "--verbose Show more informations\n"; |
|---|
| | 36 | print "\n"; |
|---|
| | 37 | exit 1; |
|---|
| | 38 | } |
|---|
| | 39 | |
|---|
| | 40 | # Read custom paths from FuzzyOcr.cf |
|---|
| | 41 | open CONFIG, "< $cfgfile" or warn "Can't read configuration file, using defaults...\n"; |
|---|
| | 42 | |
|---|
| | 43 | while (<CONFIG>) { |
|---|
| | 44 | chomp; |
|---|
| | 45 | if ($_ =~ m/^focr_bin_(\w+) (.+)/) { |
|---|
| | 46 | $App{$1} = $2; |
|---|
| | 47 | printf "Found custom path \"$2\" for application \"$1\"\n" if $verbose |
|---|
| | 48 | } |
|---|
| | 49 | } |
|---|
| | 50 | |
|---|
| | 51 | close CONFIG; |
|---|
| 38 | | my $res = `/usr/bin/identify $file`; |
|---|
| | 67 | my $app; |
|---|
| | 68 | if (($file =~ m/\.jpg$/i) or ($file =~ m/\.jpeg$/i)) { |
|---|
| | 69 | $app = $App{jpegtopnm}; |
|---|
| | 70 | $ctype = "image/jpeg"; |
|---|
| | 71 | } elsif ($file =~ m/\.png$/i) { |
|---|
| | 72 | $app = $App{pngtopnm}; |
|---|
| | 73 | $ctype = "image/png"; |
|---|
| | 74 | } elsif ($file =~ m/\.bmp$/i) { |
|---|
| | 75 | $app = $App{bmptopnm}; |
|---|
| | 76 | $ctype = "image/bmp"; |
|---|
| | 77 | } elsif ($file =~ m/\.pnm$/i) { |
|---|
| | 78 | $ctype = "image/pnm"; |
|---|
| | 79 | $app = '/bin/cat'; |
|---|
| | 80 | } elsif ($file =~ m/\.gif$/i) { |
|---|
| | 81 | $ctype = "image/gif"; |
|---|
| | 82 | $app = $App{giftopnm}; |
|---|
| | 83 | } else { |
|---|
| | 84 | print "Unknown extension given in \"$file\", aborting...\n"; |
|---|
| | 85 | exit 1; |
|---|
| | 86 | } |
|---|
| | 87 | my @hist = `$app $file |$App{ppmhist} -noheader -`; |
|---|
| | 88 | my @res = `$app $file |$App{pamfile} -`; |
|---|
| | 107 | if ($learn_spam || $learn_ham) { |
|---|
| | 108 | my %DB; |
|---|
| | 109 | my $ff = $learn_spam ? 'db_hash' : 'db_safe'; |
|---|
| | 110 | tie %DB, 'MLDBM', $Files{$ff} or die "Can't open $ff"; |
|---|
| | 111 | print "Adding key to database...\n"; |
|---|
| | 112 | if (defined $key) { |
|---|
| | 113 | my $dbm = $DB{$key}; |
|---|
| | 114 | $dbm->{fname} = $file; |
|---|
| | 115 | $dbm->{ctype} = $ctype; |
|---|
| | 116 | $dbm->{dinfo} = "Manually added to the database\n"; |
|---|
| | 117 | $dbm->{basic} = join(':', @data); |
|---|
| | 118 | $dbm->{score} = $learn_spam ? 10 : 0; |
|---|
| | 119 | $dbm->{input} = |
|---|
| | 120 | $dbm->{check} = time; |
|---|
| | 121 | $dbm->{match} = $learn_spam ? 0 : 1; |
|---|
| | 122 | $DB{$key} = $dbm; |
|---|
| | 123 | } |
|---|
| | 124 | untie %DB; |
|---|
| | 125 | exit 0; |
|---|
| | 126 | } else { |
|---|