Changeset 55
- Timestamp:
- 24.11.2006 00:14:09 (2 years ago)
- Files:
-
- trunk/devel/FuzzyOcr.cf (modified) (1 diff)
- trunk/devel/FuzzyOcr.pm (modified) (15 diffs)
- trunk/devel/FuzzyOcr/Config.pm (modified) (2 diffs)
- trunk/devel/FuzzyOcr/Deanimate.pm (modified) (3 diffs)
- trunk/devel/FuzzyOcr/Hashing.pm (modified) (2 diffs)
- trunk/devel/FuzzyOcr/Misc.pm (modified) (1 diff)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
trunk/devel/FuzzyOcr.cf
r54 r55 130 130 # Value = 2 ... use digest_db w/digest_hash import 131 131 # Value = 3 ... use mysql database 132 focr_enable_image_hashing 3132 #focr_enable_image_hashing 3 133 133 # 134 134 # The score is saved with the hash in the database, so no extra scoring for a db hit is required. trunk/devel/FuzzyOcr.pm
r53 r55 20 20 21 21 use lib qw(.); # Allow placing of FuzzyOcr in siteconfigdir 22 use FuzzyOcr::Config qw(kill_pid get_tmpdir set_tmpdir get_pms save_pms get_timeout save_timeoutget_ddb get_thresholds get_scansets get_config get_wordlist set_config finish_parsing_end load_global_words load_personal_words debuglog logfile);22 use FuzzyOcr::Config qw(kill_pid get_tmpdir set_tmpdir get_pms save_pms get_timeout get_ddb get_thresholds get_scansets get_config get_wordlist set_config finish_parsing_end load_global_words load_personal_words debuglog logfile); 23 23 use FuzzyOcr::Hashing qw(check_image_hash_db check_image_hash_db add_image_hash_db calc_image_hash); 24 24 use FuzzyOcr::Deanimate qw(deanimate); 25 25 use FuzzyOcr::Scoring qw(wrong_ctype corrupt_img known_img_hash); 26 use FuzzyOcr::Misc qw(max removedir );27 28 our @ISA = qw (Mail::SpamAssassin::Plugin);26 use FuzzyOcr::Misc qw(max removedir save_execute); 27 28 our @ISA = qw(Mail::SpamAssassin::Plugin); 29 29 30 30 # constructor: register the eval rule … … 49 49 50 50 save_pms($pms); 51 my $t = get_timeout();52 51 53 52 my $end; … … 62 61 if ($t->timed_out()) { 63 62 debuglog("Scan timed out after $conf->{focr_timeout} seconds."); 64 debuglog("Killing possibly running pid...");65 my ($ret, $pid) = kill_pid();66 if ($ret > 0) {67 debuglog("Successfully killed PID $pid");68 } elsif ($ret < 0) {69 debuglog("No processes left... exiting");63 debuglog("Killing possibly running pid..."); 64 my ($ret, $pid) = kill_pid(); 65 if ($ret > 0) { 66 debuglog("Successfully killed PID $pid"); 67 } elsif ($ret < 0) { 68 debuglog("No processes left... exiting"); 70 69 } else { 71 debuglog("Failed to kill PID $pid, stale process!");72 }70 debuglog("Failed to kill PID $pid, stale process!"); 71 } 73 72 return 0; 74 73 } … … 142 141 143 142 $imgdir = Mail::SpamAssassin::Util::secure_tmpdir(); 144 145 set_tmpdir($imgdir);143 144 set_tmpdir($imgdir); 146 145 147 146 unless ($imgdir) { … … 309 308 my @stderr_data; 310 309 311 my ($retcode, @stdout_data) = save_execute("$conf->{focr_bin_giftext} $file", ">$imgdir/giftext.info", ">>$imgdir/giftext.err", 1); 310 my ($retcode, @stdout_data) = save_execute( 311 "$conf->{focr_bin_giftext} $file", 312 ">$imgdir/giftext.info", 313 ">>$imgdir/giftext.err", 1); 312 314 313 315 if ($retcode) { … … 334 336 $tfile .= "-fixed.gif"; 335 337 printf RAWERR "## $conf->{focr_bin_giffix} $file >$tfile 2>>$efile\n" if ($haserr>0); 336 $retcode = save_execute("$conf->{focr_bin_giffix} $file", ">$tfile", ">>$efile");338 $retcode = save_execute("$conf->{focr_bin_giffix} $file", ">$tfile", ">>$efile"); 337 339 338 340 if ($retcode) { … … 384 386 } 385 387 printf RAWERR qq(## $conf->{focr_bin_gifinter} $cfile >$tfile 2>>$efile\n) if ($haserr>0); 386 $retcode = save_execute("$conf->{focr_bin_gifinter} $cfile", ">$tfile", ">>$efile");388 $retcode = save_execute("$conf->{focr_bin_gifinter} $cfile", ">$tfile", ">>$efile"); 387 389 388 390 if ($retcode) { … … 395 397 396 398 printf RAWERR qq(## $conf->{focr_bin_giftopnm} $tfile >$pfile 2>>$efile\n) if ($haserr>0); 397 $retcode = save_execute("$conf->{focr_bin_giftopnm} $tfile", ">$pfile", ">>$efile");399 $retcode = save_execute("$conf->{focr_bin_giftopnm} $tfile", ">$pfile", ">>$efile"); 398 400 399 401 if ($retcode) { … … 425 427 } 426 428 printf RAWERR qq(## $conf->{focr_bin_jpegtopnm} $file >$pfile 2>>$efile\n) if ($haserr>0); 427 $retcode = save_execute("$conf->{focr_bin_jpegtopnm} $file", ">$pfile", ">>$efile");429 my $retcode = save_execute("$conf->{focr_bin_jpegtopnm} $file", ">$pfile", ">>$efile"); 428 430 429 431 if ($retcode) { … … 455 457 456 458 printf RAWERR qq(## $conf->{focr_bin_pngtopnm} $file >$pfile 2>>$efile\n) if ($haserr>0); 457 $retcode = save_execute("$conf->{focr_bin_pngtopnm} $file", ">$pfile", ">>$efile");459 my $retcode = save_execute("$conf->{focr_bin_pngtopnm} $file", ">$pfile", ">>$efile"); 458 460 459 461 if ($retcode) { … … 485 487 printf RAWERR qq(## $conf->{focr_bin_bmptopnm} $file >$pfile 2>>$efile\n) if ($haserr>0); 486 488 487 $retcode = save_execute("$conf->{focr_bin_bmptopnm} $file", ">$pfile", ">>$efile");489 my $retcode = save_execute("$conf->{focr_bin_bmptopnm} $file", ">$pfile", ">>$efile"); 488 490 if ($retcode) { 489 491 chomp $retcode; … … 514 516 } 515 517 printf RAWERR qq(## $conf->{focr_bin_tifftopnm} $file >$pfile 2>>$efile\n) if ($haserr>0); 516 $retcode = save_execute("$conf->{focr_bin_tifftopnm} $file", ">$pfile", ">>$efile");518 my $retcode = save_execute("$conf->{focr_bin_tifftopnm} $file", ">$pfile", ">>$efile"); 517 519 518 520 if ($retcode) { … … 571 573 #unlink $efile if (-e $efile); 572 574 debuglog("Trying: $scanset"); 573 my @ocrdata;574 575 printf RAWERR qq(## $scan 2>>$efile\n) if ($haserr>0); 575 576 576 ($retcode, @ocrdata) = save_execute("$scan", ">$imgdir/ocr.temp", ">>$efile");577 my ($retcode, @ocrdata) = save_execute("$scan", ">$imgdir/ocr.temp", ">>$efile",1); 577 578 if ($retcode) { 578 579 chomp $retcode; … … 587 588 } 588 589 589 debuglog("ocrdata=>>".join("",@ocrdata)."<<=end") 590 if ($conf->{focr_verbose} > 2); 590 debuglog("ocrdata=>>".join("",@ocrdata)."<<=end") if ($conf->{focr_verbose}>2); 591 591 push( @ocr_results, [@ocrdata] ); 592 592 push( @used_scansets, $scanset ); … … 651 651 . "\n($cnt word occurrences found)" ); 652 652 if ($cnt >= $conf->{focr_counts_required}) { 653 $score = sprintf "%0.3f", $conf->{focr_base_score} + (( $cnt - $conf->{focr_counts_required} ) * $conf->{focr_add_score} ); 653 $score = sprintf "%0.3f", $conf->{focr_base_score} + 654 (( $cnt - $conf->{focr_counts_required} ) * $conf->{focr_add_score} ); 654 655 debuglog("Message is spam, score = $score"); 655 656 } else { trunk/devel/FuzzyOcr/Config.pm
r53 r55 81 81 sub set_pid { 82 82 $pid = shift; 83 debuglog("Saved pid: $pid",2); 83 84 } 84 85 … … 89 90 sub kill_pid { 90 91 if ($pid) { 91 my $ret = kill POSIX::SIGTERM $pid; 92 # Wait for zombie process if the process is a zombie (i.e. SIGTERM didn't work) 93 wait(); 94 return ($ret, $pid); 92 debuglog("Sending SIGTERM to pid: $pid",2); 93 my $ret = kill POSIX::SIGTERM $pid; 94 # Wait for zombie process if the process is a zombie (i.e. SIGTERM didn't work) 95 wait(); 96 return ($ret, $pid); 95 97 } else { 96 98 return (-1, 0); trunk/devel/FuzzyOcr/Deanimate.pm
r51 r55 33 33 if ($info->{'has_local_color_table'}) { 34 34 debuglog("deanimate: Image has local_color_table, reducing to 255 colors"); 35 my $retcode = save_execute("$conf->{focr_bin_gifsicle} --colors=255 $tfile", ">$tfile3", ">>$efile"); 36 if ($retcode) { 37 debuglog("$conf->{'focr_bin_gifsicle'}: Timed out [$retcode], image not reduced!"); 38 } else { 39 $tfile = $tfile3; 40 } 35 my $retcode = save_execute( 36 "$conf->{focr_bin_gifsicle} --colors=255 $tfile", 37 ">$tfile3", 38 ">>$efile"); 39 if ($retcode) { 40 debuglog("$conf->{'focr_bin_gifsicle'}: Timed out [$retcode], image not reduced!"); 41 } else { 42 $tfile = $tfile3; 43 } 41 44 } 42 my $retcode = save_execute("$conf->{focr_bin_gifsicle} --unoptimize $tfile \'#$index\'", ">$tfile2", ">>$efile"); 45 my $retcode = save_execute( 46 "$conf->{focr_bin_gifsicle} --unoptimize $tfile \'#$index\'", 47 ">$tfile2", 48 ">>$efile"); 43 49 if ($retcode) { 44 50 debuglog("$conf->{focr_bin_gifsicle}: cannot extract image#${index}"); … … 55 61 my $fd = new IO::Handle; 56 62 63 my $retcode; 57 64 my @stdout_data; 58 65 my @stderr_data; … … 66 73 ); 67 74 68 my ($retcode, @stdout_data) = save_execute("$conf->{focr_bin_gifsicle} --info $giffile", ">$imgdir/gifsicle.info", ">>$imgdir/gifsicle.err", 1); 75 ($retcode, @stdout_data) = save_execute( 76 "$conf->{focr_bin_gifsicle} --info $giffile", 77 ">$imgdir/gifsicle.info", 78 ">>$imgdir/gifsicle.err", 1); 69 79 70 80 if ($retcode) { trunk/devel/FuzzyOcr/Hashing.pm
r51 r55 332 332 } 333 333 334 my ($r, @stdout_data) = save_execute("$conf->{focr_bin_ppmhist} -noheader $pfile", ">$imgdir/ppmhist.info", ">/dev/null", 1); 334 my ($r, @stdout_data) = save_execute( 335 "$conf->{focr_bin_ppmhist} -noheader $pfile", 336 ">$imgdir/ppmhist.info", 337 ">/dev/null", 1); 335 338 336 339 if ($r) { … … 342 345 my $cnt = 0; 343 346 my $c = scalar(@stdout_data); 344 my $s = (stat($pfile))[7] ;347 my $s = (stat($pfile))[7] || 0; 345 348 $hash = sprintf "%d:%d:%d:%d",$s, 346 349 defined $pic->{height} ? $pic->{height} : 0, trunk/devel/FuzzyOcr/Misc.pm
r51 r55 42 42 my $t = get_timeout(); 43 43 my ($cmd, $stdout, $stderr, $return_stdout) = @_; 44 $stdout = '>/dev/null' unless $stdout; 45 $stderr = '>/dev/null' unless $stderr; 46 debuglog("Exec: $cmd $stdout $stderr"); 44 47 my $retcode; 45 48 if ($conf->{'focr_global_timeout'}) { 46 49 my $pid = fork(); 47 if (not defined $pid) {50 if (not defined $pid) { 48 51 debuglog("Can't fork to execute external programs! Aborting"); 49 52 return -1; 50 } elsif (not $pid) {53 } elsif (not $pid) { 51 54 open(STDOUT, $stdout); 52 open(STDERR, $stderr); 53 exec($cmd); 54 exit($?); 55 } else { 56 set_pid($pid); 57 wait(); 58 unset_pid(); 59 $retcode = $?; 60 if ($return_stdout) { 61 $stdout =~ tr/>|</ /; 62 open(INFILE, "<$stdout"); 63 my @stdout_data = <INFILE>; 64 close(INFILE); 65 return($retcode, @stdout_data); 66 } 67 return $retcode; 68 } 55 open(STDERR, $stderr); 56 exec($cmd); 57 exit($?); 58 } else { 59 set_pid($pid); 60 wait(); $retcode = $?>>8; 61 unset_pid(); 62 if ($return_stdout and $stdout !~ m,/dev/null,i) { 63 $stdout =~ tr/>|</ /; 64 open(INFILE, "<$stdout"); 65 my @stdout_data = <INFILE>; 66 close(INFILE); 67 return($retcode, @stdout_data); 68 } 69 return $retcode; 70 } 69 71 } else { 70 72 my @stdout_data; 71 my $pid;73 my $pid; 72 74 $t->run_and_catch(sub { 73 $pid = fork(); 74 if (not defined $pid) { 75 debuglog("Can't fork to execute external programs! Aborting"); 76 return -1; 77 } elsif (not $pid) { 78 open(STDOUT, $stdout); 79 open(STDERR, $stderr); 80 exec($cmd); 81 exit($?); 82 } else { 83 set_pid($pid); 84 wait(); 85 unset_pid(); 86 $retcode = $?; 87 if ($return_stdout) { 88 $stdout =~ tr/>|</ /; 89 open(INFILE, "<$stdout"); 90 @stdout_data = <INFILE>; 91 close(INFILE); 92 } 93 } 94 }); 95 if ($t->timed_out()) { 75 $pid = fork(); 76 if (not defined $pid) { 77 debuglog("Can't fork to execute external programs! Aborting"); 78 return -1; 79 } elsif (not $pid) { 80 open(STDOUT, $stdout); 81 open(STDERR, $stderr); 82 exec($cmd); 83 exit($?); 84 } else { 85 set_pid($pid); 86 wait(); $retcode = $?>>8; 87 unset_pid(); 88 if ($return_stdout and $stdout !~ m,/dev/null,i) { 89 $stdout =~ tr/>|</ /; 90 open(INFILE, "<$stdout"); 91 @stdout_data = <INFILE>; 92 close(INFILE); 93 } 94 } 95 }); 96 if ($t->timed_out()) { 96 97 debuglog("Command \"$cmd\" timed out after $conf->{focr_timeout} seconds."); 97 debuglog("Consider decreasing your load and/or increasing the timeout.");98 debuglog("Killing possibly running pid...");99 my ($ret, $pid) = kill_pid();100 if ($ret > 0) {101 debuglog("Successfully killed PID $pid");102 } elsif ($ret < 0) {103 debuglog("No processes left... this shouldn't happen...");98 debuglog("Consider decreasing your load and/or increasing the timeout."); 99 debuglog("Killing possibly running pid..."); 100 my ($ret, $pid) = kill_pid(); 101 if ($ret > 0) { 102 debuglog("Successfully killed PID $pid"); 103 } elsif ($ret < 0) { 104 debuglog("No processes left... this shouldn't happen..."); 104 105 } else { 105 debuglog("Failed to kill PID $pid, possibly stale process");106 }106 debuglog("Failed to kill PID $pid, possibly stale process"); 107 } 107 108 return -1; 108 } else {109 if ($return_stdout) {110 return($retcode, @stdout_data);111 } else {112 return $retcode;113 }114 }109 } else { 110 if ($return_stdout) { 111 return($retcode, @stdout_data); 112 } else { 113 return $retcode; 114 } 115 } 115 116 } 116 117 }
