Changeset 42
- Timestamp:
- 20.11.2006 18:54:51 (2 years ago)
- Files:
-
- trunk/devel/FuzzyOcr.pm (modified) (29 diffs)
- trunk/devel/FuzzyOcr/Config.pm (modified) (8 diffs)
- trunk/devel/FuzzyOcr/Hashing.pm (modified) (15 diffs)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
trunk/devel/FuzzyOcr.pm
r41 r42 49 49 save_pms($pms); 50 50 51 if ( $pms->get_score() > $conf->{ 'focr_autodisable_score'} ) {52 debuglog("Scan canceled, message has already more than $conf->{ 'focr_autodisable_score'} points.");51 if ( $pms->get_score() > $conf->{focr_autodisable_score} ) { 52 debuglog("Scan canceled, message has already more than $conf->{focr_autodisable_score} points."); 53 53 return 0; 54 54 } … … 63 63 #debuglog("Starting FuzzyOcr..."); 64 64 #debuglog("Attempting to load personal wordlist..."); 65 if ($conf->{ "focr_personal_wordlist"} =~ m/^\//) {66 load_personal_words( $conf->{ "focr_personal_wordlist"} );65 if ($conf->{focr_personal_wordlist} =~ m/^\//) { 66 load_personal_words( $conf->{focr_personal_wordlist} ); 67 67 } else { 68 68 my $homedir = (getpwuid($<))[7]; 69 69 if ($homedir) { 70 load_personal_words( $homedir . "/$conf->{ 'focr_personal_wordlist'}" );70 load_personal_words( $homedir . "/$conf->{focr_personal_wordlist}" ); 71 71 } elsif (defined($ENV{HOME})) { 72 load_personal_words( $ENV{HOME} . "/$conf->{ 'focr_personal_wordlist'}" );72 load_personal_words( $ENV{HOME} . "/$conf->{focr_personal_wordlist}" ); 73 73 } else { 74 74 debuglog("Variable \$ENV{HOME} not defined and getpwuid failed, personal wordlist function not available..."); … … 146 146 if ($b != 0xff) { 147 147 debuglog("Invalid JPEG image"); 148 $pos = $pdatalen + 1; 148 149 last; 149 150 } … … 161 162 } else { 162 163 ($h,$w) = unpack("nn",substr($pdata,$pos+3,4)); 163 }164 debuglog("JPEG: $imgfilename '${h}x${w}'");165 $imgfiles{$imgfilename}{ftype} = 2;166 $imgfiles{$imgfilename}{height} = $h;167 $imgfiles{$imgfilename}{width} = $w;164 debuglog("JPEG: $imgfilename '${h}x${w}'"); 165 $imgfiles{$imgfilename}{ftype} = 2; 166 $imgfiles{$imgfilename}{height} = $h; 167 $imgfiles{$imgfilename}{width} = $w; 168 } 168 169 } elsif ( substr($pdata,0,4) eq "\x89\x50\x4e\x47" ) { 169 170 # PNG File … … 201 202 $imgfiles{$imgfilename}{height} = $h ? $h : 1; 202 203 } 204 next unless defined $imgfiles{$imgfilename}{ftype}; 203 205 $imgfiles{$imgfilename}{fname} = $fname; 204 206 $imgfiles{$imgfilename}{ctype} = $ctype; … … 218 220 if ($cnt == 0) { 219 221 debuglog("Skipping OCR, no image files found..."); 220 removedir($imgdir) if (defined($imgdir) and ($conf->{ "focr_keep_bad_images"}<2));222 removedir($imgdir) if (defined($imgdir) and ($conf->{focr_keep_bad_images}<2)); 221 223 return 0; 222 224 } 223 225 debuglog("Found: $cnt images"); $cnt = 0; 224 my $t = Mail::SpamAssassin::Timeout->new({ secs => $conf->{ "focr_timeout"} });226 my $t = Mail::SpamAssassin::Timeout->new({ secs => $conf->{focr_timeout} }); 225 227 my $retcode; 226 228 … … 247 249 next IMAGE; 248 250 } 249 if (defined($conf->{ "focr_max_size_gif"}) and ($$pic{fsize} > $conf->{"focr_max_size_gif"})) {250 debuglog("GIF file size ($ s) exceeds maximum file size for this format, skipping...");251 if (defined($conf->{focr_max_size_gif}) and ($$pic{fsize} > $conf->{focr_max_size_gif})) { 252 debuglog("GIF file size ($$pic{fsize}) exceeds maximum file size for this format, skipping..."); 251 253 next; 252 254 } … … 268 270 269 271 $retcode = $t->run_and_catch(sub { 270 @stdout_data = qx($conf->{ "focr_bin_giftext"} $file);272 @stdout_data = qx($conf->{focr_bin_giftext} $file); 271 273 }); 272 274 if ($retcode) { 273 275 chomp $retcode; 274 debuglog("$conf->{ 'focr_bin_giftext'} Timed out [$retcode], skipping...");275 ++$imgerr if $conf->{ "focr_keep_bad_images"}>0; next;276 debuglog("$conf->{focr_bin_giftext} Timed out [$retcode], skipping..."); 277 ++$imgerr if $conf->{focr_keep_bad_images}>0; next; 276 278 } 277 279 foreach (@stdout_data) { … … 291 293 debuglog("Image is single non-interlaced..."); 292 294 $tfile .= "-fixed.gif"; 293 printf RAWERR "## $conf->{ 'focr_bin_giffix'} $file >$tfile 2>>$efile\n" if ($haserr>0);295 printf RAWERR "## $conf->{focr_bin_giffix} $file >$tfile 2>>$efile\n" if ($haserr>0); 294 296 $retcode = $t->run_and_catch(sub { 295 qx($conf->{ "focr_bin_giffix"} $file >$tfile 2>>$efile);297 qx($conf->{focr_bin_giffix} $file >$tfile 2>>$efile); 296 298 }); 297 299 if ($retcode) { 298 300 chomp $retcode; 299 debuglog("$conf->{ 'focr_bin_giffix'}: Timed out [$retcode], skipping...");301 debuglog("$conf->{focr_bin_giffix}: Timed out [$retcode], skipping..."); 300 302 printf RAWERR "?? Timed out > $retcode\n" if ($haserr>0); 301 ++$imgerr if $conf->{ "focr_keep_bad_images"}>0; next;303 ++$imgerr if $conf->{focr_keep_bad_images}>0; next; 302 304 } 303 305 if (open ERR, $efile) { … … 316 318 if ($interlaced_gif or ($image_count > 1)) { 317 319 debuglog("Skipping corrupted interlaced image..."); 318 corrupt_img($conf->{ 'focr_corrupt_unfixable_score'}, $corrupt);320 corrupt_img($conf->{focr_corrupt_unfixable_score}, $corrupt); 319 321 next; 320 322 } 321 323 if (-z $tfile) { 322 324 debuglog("Uncorrectable corruption detected, skipping non-interlaced image..."); 323 corrupt_img($conf->{ 'focr_corrupt_unfixable_score'}, $corrupt);325 corrupt_img($conf->{focr_corrupt_unfixable_score}, $corrupt); 324 326 next; 325 327 } 326 328 debuglog("Image is corrupt, but seems fixable, continuing..."); 327 corrupt_img($conf->{ 'focr_corrupt_score'}, $corrupt);329 corrupt_img($conf->{focr_corrupt_score}, $corrupt); 328 330 } 329 331 … … 341 343 $tfile .= ".gif"; 342 344 } 343 printf RAWERR qq(## $conf->{ 'focr_bin_gifinter'} $cfile >$tfile 2>>$efile\n) if ($haserr>0);345 printf RAWERR qq(## $conf->{focr_bin_gifinter} $cfile >$tfile 2>>$efile\n) if ($haserr>0); 344 346 $retcode = $t->run_and_catch(sub{ 345 qx($conf->{ "focr_bin_gifinter"} $cfile >$tfile 2>>$efile);347 qx($conf->{focr_bin_gifinter} $cfile >$tfile 2>>$efile); 346 348 }); 347 349 if ($retcode) { 348 350 chomp $retcode; 349 debuglog("$conf->{'focr_bin_gifinter'}: Timed out [$retcode], skipping...");350 351 printf RAWERR "?? Timed out > $retcode\n" if ($haserr>0); 351 ++$imgerr if $conf->{"focr_keep_bad_images"}>0; next; 352 } 353 } 354 355 printf RAWERR qq(## $conf->{'focr_bin_giftopnm'} $tfile >$pfile 2>>$efile\n) if ($haserr>0); 352 debuglog("$conf->{focr_bin_gifinter}: Timed out [$retcode], skipping..."); 353 ++$imgerr if $conf->{focr_keep_bad_images}>0; next; 354 } 355 } 356 357 printf RAWERR qq(## $conf->{focr_bin_giftopnm} $tfile >$pfile 2>>$efile\n) if ($haserr>0); 356 358 $retcode = $t->run_and_catch(sub { 357 qx($conf->{ "focr_bin_giftopnm"} $tfile >$pfile 2>>$efile);359 qx($conf->{focr_bin_giftopnm} $tfile >$pfile 2>>$efile); 358 360 }); 359 361 if ($retcode) { 360 362 chomp $retcode; 361 debuglog("$conf->{'focr_bin_giftopnm'}: Timed out [$retcode], skipping...");362 363 printf RAWERR "?? Timed out > $retcode\n" if ($haserr>0); 363 ++$imgerr if $conf->{"focr_keep_bad_images"}>0; next; 364 debuglog("$conf->{focr_bin_giftopnm}: Timed out [$retcode], skipping..."); 365 ++$imgerr if $conf->{focr_keep_bad_images}>0; next; 364 366 } 365 367 } … … 371 373 } 372 374 373 if (defined($conf->{ "focr_max_size_jpeg"}) and ($$pic{fsize} > $conf->{"focr_max_size_jpeg"})) {374 debuglog("JPEG file size ($ s) exceeds maximum file size for this format, skipping...");375 if (defined($conf->{focr_max_size_jpeg}) and ($$pic{fsize} > $conf->{focr_max_size_jpeg})) { 376 debuglog("JPEG file size ($$pic{fsize}) exceeds maximum file size for this format, skipping..."); 375 377 next; 376 378 } … … 384 386 } 385 387 } 386 printf RAWERR qq(## $conf->{ 'focr_bin_jpegtopnm'} $file >$pfile 2>>$efile\n) if ($haserr>0);388 printf RAWERR qq(## $conf->{focr_bin_jpegtopnm} $file >$pfile 2>>$efile\n) if ($haserr>0); 387 389 $retcode = $t->run_and_catch(sub { 388 qx($conf->{ "focr_bin_jpegtopnm"} $file >$pfile 2>>$efile);390 qx($conf->{focr_bin_jpegtopnm} $file >$pfile 2>>$efile); 389 391 }); 390 392 if ($retcode) { 391 393 chomp $retcode; 392 debuglog("$conf->{'focr_bin_jpegtopnm'}: Timed out [$retcode], skipping..."); 393 ++$imgerr if $conf->{"focr_keep_bad_images"}>0; next; 394 printf RAWERR "?? Timed out > $retcode\n" if ($haserr>0); 395 debuglog("$conf->{focr_bin_jpegtopnm}: Timed out [$retcode], skipping..."); 396 ++$imgerr if $conf->{focr_keep_bad_images}>0; next; 394 397 } 395 398 } … … 400 403 next IMAGE; 401 404 } 402 if (defined($conf->{ "focr_max_size_png"}) and ($$pic{fsize} > $conf->{"focr__max_size_png"})) {403 debuglog("PNG file size ($ s) exceeds maximum file size for this format, skipping...");405 if (defined($conf->{focr_max_size_png}) and ($$pic{fsize} > $conf->{focr__max_size_png})) { 406 debuglog("PNG file size ($$pic{fsize}) exceeds maximum file size for this format, skipping..."); 404 407 next; 405 408 } … … 413 416 } 414 417 } 415 printf RAWERR qq(## $conf->{ 'focr_bin_pngtopnm'} $file >$pfile 2>>$efile\n) if ($haserr>0);418 printf RAWERR qq(## $conf->{focr_bin_pngtopnm} $file >$pfile 2>>$efile\n) if ($haserr>0); 416 419 $retcode = $t->run_and_catch(sub { 417 qx($conf->{ "focr_bin_pngtopnm"} $file >$pfile 2>>$efile);420 qx($conf->{focr_bin_pngtopnm} $file >$pfile 2>>$efile); 418 421 }); 419 422 if ($retcode) { 420 423 chomp $retcode; 421 debuglog("$conf->{'focr_bin_pngtopnm'}: Timed out [$retcode], skipping...");422 424 printf RAWERR "?? Timed out > $retcode\n" if ($haserr>0); 423 ++$imgerr if $conf->{"focr_keep_bad_images"}>0; next; 425 debuglog("$conf->{focr_bin_pngtopnm}: Timed out [$retcode], skipping..."); 426 ++$imgerr if $conf->{focr_keep_bad_images}>0; next; 424 427 } 425 428 } … … 430 433 next IMAGE; 431 434 } 432 if (defined($conf->{ "focr_max_size_bmp"}) and ($$pic{fsize} > $conf->{"focr_max_size_bmp"})) {433 debuglog("BMP file size ($ s) exceeds maximum file size for this format, skipping...");435 if (defined($conf->{focr_max_size_bmp}) and ($$pic{fsize} > $conf->{focr_max_size_bmp})) { 436 debuglog("BMP file size ($$pic{fsize}) exceeds maximum file size for this format, skipping..."); 434 437 next; 435 438 } … … 443 446 } 444 447 } 445 printf RAWERR qq(## $conf->{ 'focr_bin_bmptopnm'} $file >$pfile 2>>$efile\n) if ($haserr>0);448 printf RAWERR qq(## $conf->{focr_bin_bmptopnm} $file >$pfile 2>>$efile\n) if ($haserr>0); 446 449 $retcode = $t->run_and_catch(sub { 447 450 qx($conf->{"focr_bin_bmptopnm"} $file >$pfile 2>>$efile); … … 449 452 if ($retcode) { 450 453 chomp $retcode; 451 debuglog("$conf->{'focr_bin_bmptopnm'}: Timed out [$retcode], skipping...");452 454 printf RAWERR "?? Timed out > $retcode\n" if ($haserr>0); 453 ++$imgerr if $conf->{"focr_keep_bad_images"}>0; next; 455 debuglog("$conf->{focr_bin_bmptopnm}: Timed out [$retcode], skipping..."); 456 ++$imgerr if $conf->{focr_keep_bad_images}>0; next; 454 457 } 455 458 } … … 460 463 next IMAGE; 461 464 } 462 if (defined($conf->{ "focr_max_size_tiff"}) and ($$pic{fsize} > $conf->{"focr_max_size_tiff"})) {463 debuglog("TIFF file size ($ s) exceeds maximum file size for this format, skipping...");465 if (defined($conf->{focr_max_size_tiff}) and ($$pic{fsize} > $conf->{focr_max_size_tiff})) { 466 debuglog("TIFF file size ($$pic{fsize}) exceeds maximum file size for this format, skipping..."); 464 467 next; 465 468 } … … 468 471 } 469 472 470 printf RAWERR qq(## $conf->{'focr_bin_tifftopnm'} $file >$pfile 2>>$efile\n) if ($haserr>0); 473 foreach my $a (qw/tifftopnm/) { 474 unless (defined $conf->{"focr_bin_$a"}) { 475 debuglog("Cannot exec $a, skipping image"); 476 next IMAGE; 477 } 478 } 479 printf RAWERR qq(## $conf->{focr_bin_tifftopnm} $file >$pfile 2>>$efile\n) if ($haserr>0); 471 480 $retcode = $t->run_and_catch(sub { 472 481 qx($conf->{'focr_bin_tifftopnm'} $file >$pfile 2>>$efile); … … 474 483 if ($retcode) { 475 484 chomp $retcode; 476 debuglog("$conf->{'focr_bin_tifftopnm'}: Timed out [$retcode], skipping...");477 485 printf RAWERR "?? Timed out > $retcode\n" if ($haserr>0); 478 ++$imgerr if $conf->{"focr_keep_bad_images"}>0; next; 486 debuglog("$conf->{focr_bin_tifftopnm}: Timed out [$retcode], skipping..."); 487 ++$imgerr if $conf->{focr_keep_bad_images}>0; next; 479 488 } 480 489 } … … 484 493 } 485 494 486 if($conf->{ "focr_enable_image_hashing"}) {495 if($conf->{focr_enable_image_hashing}) { 487 496 debuglog("Calculating the image hash: $pfile"); 488 ($corrupt, $digest) = calc_image_hash($pfile );497 ($corrupt, $digest) = calc_image_hash($pfile,$pic); 489 498 if ($corrupt) { 490 499 debuglog("Error calculating the image hash, skipping hash check..."); … … 497 506 if ($score > 0) { 498 507 known_img_hash($score,$dinfo); 499 debuglog("Message is SPAM. $dinfo") ;508 debuglog("Message is SPAM. $dinfo") if ($conf->{focr_enable_image_hashing} < 3); 500 509 removedir($imgdir); 501 510 return 0; … … 521 530 foreach my $scanset (@$scansets) { 522 531 my $scan = $scanset; 523 $scan =~ s/\$gocr/$conf->{ "focr_bin_gocr"}/;524 $scan =~ s/\$ocrad/$conf->{ "focr_bin_ocrad"}/;532 $scan =~ s/\$gocr/$conf->{focr_bin_gocr}/; 533 $scan =~ s/\$ocrad/$conf->{focr_bin_ocrad}/; 525 534 $scan =~ s/\$pfile/$pfile/; 526 535 $scan =~ s/\$efile/$efile/g; … … 577 586 $cnt += $wcnt; 578 587 $mcnt += $wcnt; 579 if ( ( $conf->{ "focr_verbose"} > 0 ) and ($wcnt) ) {588 if ( ( $conf->{focr_verbose} > 0 ) and ($wcnt) ) { 580 589 push( @found, "\"$w\" in $wcnt lines" ); 581 590 } 582 591 } 583 if ($conf->{ "focr_enable_image_hashing"}) {584 my $info = join('::',$mcnt,$$pic{fname},$$pic{ctype},$ digest);592 if ($conf->{focr_enable_image_hashing}) { 593 my $info = join('::',$mcnt,$$pic{fname},$$pic{ctype},$$pic{ftype},$digest); 585 594 push(@hashes, $info); 586 595 } … … 589 598 590 599 if ($cnt == 0) { 591 if ($conf->{ "focr_enable_image_hashing"} > 1 and @hashes) {600 if ($conf->{focr_enable_image_hashing} > 1 and @hashes) { 592 601 debuglog("Message is ham, saving..."); 593 602 foreach my $h (@hashes) { … … 606 615 . join( "\n", @found ) 607 616 . "\n($cnt word occurrences found)" ); 608 if ($cnt >= $conf->{ "focr_counts_required"}) {609 $score = sprintf "%0.3f", $conf->{ 'focr_base_score'} + (( $cnt - $conf->{"focr_counts_required"} ) * $conf->{'focr_add_score'} );617 if ($cnt >= $conf->{focr_counts_required}) { 618 $score = sprintf "%0.3f", $conf->{focr_base_score} + (( $cnt - $conf->{focr_counts_required} ) * $conf->{focr_add_score} ); 610 619 debuglog("Message is spam, score = $score"); 611 620 } else { 612 $score = sprintf("%0.3f", $conf->{ 'focr_add_score'} * $cnt) if $conf->{"focr_score_ham"};621 $score = sprintf("%0.3f", $conf->{focr_add_score} * $cnt) if $conf->{focr_score_ham}; 613 622 debuglog("Message is ham, score = $score"); 614 623 } 615 if ($conf->{ "focr_enable_image_hashing"} and616 $conf->{ "focr_hashing_learn_scanned"} and624 if ($conf->{focr_enable_image_hashing} and 625 $conf->{focr_hashing_learn_scanned} and 617 626 $score > 0) { 618 627 foreach my $h (@hashes) { … … 625 634 } 626 635 } 627 if ( $conf->{ "focr_verbose"} > 0) {636 if ( $conf->{focr_verbose} > 0 and $conf->{focr_verbose} < 3 ) { 628 637 debuglog($debuginfo); 629 638 } … … 634 643 $pms->{conf}->{descriptions}->{FUZZY_OCR} . "\n$debuginfo" ); 635 644 } 636 if ($imgerr == 0 and $conf->{ "focr_keep_bad_images"}<2) {645 if ($imgerr == 0 and $conf->{focr_keep_bad_images}<2) { 637 646 removedir($imgdir); 638 647 } trunk/devel/FuzzyOcr/Config.pm
r41 r42 125 125 return $Mail::SpamAssassin::Conf::INVALID_VALUE; 126 126 } 127 $self->{ 'focr_verbose'} = $value+0;127 $self->{focr_verbose} = $value+0; 128 128 } 129 129 }); … … 151 151 return $Mail::SpamAssassin::Conf::INVALID_VALUE; 152 152 } 153 $self->{ 'focr_enable_image_hashing'} = $value+0;153 $self->{focr_enable_image_hashing} = $value+0; 154 154 } 155 155 }); … … 208 208 return $Mail::SpamAssassin::Conf::INVALID_VALUE; 209 209 } 210 $self->{ 'focr_keep_bad_images'} = $value+0;210 $self->{focr_keep_bad_images} = $value+0; 211 211 } 212 212 }); … … 281 281 push (@cmds, { 282 282 setting => 'focr_scansets', 283 default => '$gocr -i $pfile, $gocr -l 180 -d 2 -i $pfile, $ gocr -l 140 -d 2 -i$pfile',283 default => '$gocr -i $pfile, $gocr -l 180 -d 2 -i $pfile, $ocrad -s5 -T 0.5 $pfile', 284 284 type => $Mail::SpamAssassin::Conf::CONF_TYPE_STRING 285 285 }); … … 334 334 my ($self, $opts) = @_; 335 335 $conf = $opts->{conf}; 336 337 # Use specified scansets338 @scansets = split(/,\s*/, $conf->{'focr_scansets'});339 debuglog("Using scan: $_") foreach @scansets;340 336 341 337 # find external binaries … … 360 356 } 361 357 } 358 359 # Use specified scansets 360 @scansets = split(/,\s*/, $conf->{'focr_scansets'}); 361 foreach my $scan (@scansets) { 362 if ($scan =~ m/(gocr|ocrad)/) { 363 next unless -x $conf->{"focr_bin_$1"}; 364 } 365 debuglog("Using scan: $scan"); 366 } 367 362 368 if ($conf->{focr_enable_image_hashing} == 2 and -r $conf->{focr_digest_db}) { 363 369 my %DB; my $dbm; my $err = 0; 364 370 my $now = time - ($conf->{focr_db_max_days}*86400); 371 debuglog($conf->{focr_db_hash}); 365 372 tie %DB, 'MLDBM', $conf->{focr_db_hash} or $err++; 366 373 if ($err) { … … 571 578 $logtext =~ s/\n/\n /g; 572 579 573 unless ( open LOGFILE, ">>", $conf->{ 'focr_logfile'} ) {574 warn "Can't open $conf->{ 'focr_logfile'} for writing, check permissions";580 unless ( open LOGFILE, ">>", $conf->{focr_logfile} ) { 581 warn "Can't open $conf->{focr_logfile} for writing, check permissions"; 575 582 } 576 583 flock( LOGFILE, LOCK_EX ); … … 586 593 my $limit = defined $_[1] ? $_[1] : 1; 587 594 foreach (@lines) { dbg ("FuzzyOcr: $_"); } 588 if ( $conf->{ "focr_verbose"} > $limit ) {589 logfile($_[0]) if defined $conf->{ "focr_logfile"};595 if ( $conf->{focr_verbose} > $limit ) { 596 logfile($_[0]) if defined $conf->{focr_logfile}; 590 597 } 591 598 } trunk/devel/FuzzyOcr/Hashing.pm
r41 r42 78 78 my $db = $conf->{focr_mysql_db}; 79 79 my $sql = qq(select * from $db.$dbfile where $dbfile.key='$key'); 80 debuglog($sql);81 80 my @data = $ddb->selectrow_array($sql); 82 81 my $next = 0; 83 82 my $when = 0; 84 83 if (scalar(@data)>0) { 85 debuglog("Found : Score='$data[8]' Info: '$data[9]'");84 debuglog("Found[$dbfile]: Score='$data[8]' Info: '$data[9]'"); 86 85 $next = $data[5]; $next++; 87 $when = $data[ 6]; $data[8] += 0;86 $when = $data[7]; $data[8] += 0; 88 87 $ret = $data[8] == 0 ? 0.001 : $data[8]; 89 88 $dinfo = $data[9] || ''; … … 101 100 } 102 101 } else { 103 my $then = time - ($conf->{ "focr_db_max_days"}*86400);102 my $then = time - ($conf->{focr_db_max_days}*86400); 104 103 $sql = qq(select * from $db.$dbfile); 105 104 my $sth = $ddb->prepare($sql); $sth->execute; … … 112 111 $next = $row[5] + 1; 113 112 $when = $row[7] || $now; 114 $ret = $dbfile eq $conf->{ "focr_mysql_hash"} ? $row[8] : $row[5];113 $ret = $dbfile eq $conf->{focr_mysql_hash} ? $row[8] : $row[5]; 115 114 $dinfo = $row[9] || ''; 116 debuglog("Found in Table:'$dbfile'");115 debuglog("Found[$dbfile]: Score='$row[8]' Info: '$row[9]'"); 117 116 last; 118 117 } … … 124 123 } 125 124 if ($ret > 0) { 126 if ($dbfile eq $conf->{ "focr_mysql_hash"}) {125 if ($dbfile eq $conf->{focr_mysql_hash}) { 127 126 debuglog("Found Score <$ret> for $txt Image Hash"); 128 127 } 129 debuglog("Matched [$next] time(s). Prev match: ".fmt_time($ when));130 $sql = qq(update $ conf->{focr_mysql_db}.$dbfile set $dbfile.match='$next',$dbfile.check='$now' where $dbfile.key='$key');128 debuglog("Matched [$next] time(s). Prev match: ".fmt_time($now - $when)); 129 $sql = qq(update $db.$dbfile set $dbfile.match='$next',$dbfile.check='$now' where $dbfile.key='$key'); 131 130 debuglog($sql); 132 131 $ddb->do($sql); … … 134 133 return ($ret,$dinfo); 135 134 } 136 elsif ($conf->{ "focr_enable_image_hashing"} == 2) {135 elsif ($conf->{focr_enable_image_hashing} == 2) { 137 136 my %DB = (); my $dbm; 138 137 tie %DB, 'MLDBM', $dbfile, O_RDWR or $ret++; … … 153 152 } 154 153 if ($ret == 0) { 155 my $then = time - ($conf->{ "focr_db_max_days"}*86400);154 my $then = time - ($conf->{focr_db_max_days}*86400); 156 155 foreach my $k (keys %DB) { 157 156 $dbm = $DB{$k}; 158 157 $hash = $dbm->{basic} ? $dbm->{basic} : "0:0:0:0::$k"; 159 158 if (within_threshold($digest,$hash)) { 160 $ret = $dbfile eq $conf->{ "focr_db_hash"} ? $dbm->{score} : $dbm->{match};159 $ret = $dbfile eq $conf->{focr_db_hash} ? $dbm->{score} : $dbm->{match}; 161 160 $txt = 'Approx'; $dinfo = $dbm->{dinfo} || ''; 162 161 debuglog("Found in: <$dbfile>"); … … 166 165 $dbm->{check} = $now - 1 unless defined $dbm->{check}; 167 166 if ($dbm->{check} < $then) { 168 debuglog("Expiring <$k> older than $conf->{ 'focr_db_max_days'} days");167 debuglog("Expiring <$k> older than $conf->{focr_db_max_days} days"); 169 168 delete $DB{$k}; 170 169 } … … 173 172 if ($ret>0) { 174 173 $dbm->{match}++; 175 if ($dbfile eq $conf->{ "focr_db_hash"}) {174 if ($dbfile eq $conf->{focr_db_hash}) { 176 175 $ret = sprintf("%0.3f",$dbm->{score}); 177 176 debuglog("Found Score <$ret> for $txt Image Hash"); … … 184 183 return ($ret,$dinfo); 185 184 } 186 elsif ($conf->{ "focr_enable_image_hashing"} == 1) {187 $ret = open HASH, $conf->{ "focr_digest_db"};185 elsif ($conf->{focr_enable_image_hashing} == 1) { 186 $ret = open HASH, $conf->{focr_digest_db}; 188 187 unless($ret) { 189 debuglog("No Image Hash database found at \"$conf->{ 'focr_digest_db'}\", or permissions wrong.");188 debuglog("No Image Hash database found at \"$conf->{focr_digest_db}\", or permissions wrong."); 190 189 return (0,''); 191 190 } … … 225 224 my @data = $ddb->selectrow_array($sql); 226 225 unless (scalar(@data)) { 227 $sql = "insert into $db.$table values ("; 228 $sql .= "'$key','$img','$_[3]','$_[4]',"; 229 $sql .= sprintf ("'%d','%d','%d','%d','%d','%s')", 230 defined($_[5]) ? $_[5] : 0, 231 $table eq $conf->{focr_mysql_hash} ? 0 : 1, 232 time,time,$score, 233 defined($_[6]) ? $_[6] : '' 234 ); 226 $sql = 227 "insert into $db.$table values ('$key','$img','$_[3]','$_[4]','" . 228 defined($_[5]) ? $_[5] : 0 . "','" . 229 $table eq $conf->{focr_mysql_hash} ? 0 : 1 . "','" . 230 time . "','" . time . "','" . $score . "'.'" . 231 defined($_[6]) ? $_[6] : '' . "')"; 235 232 debuglog($sql); 236 233 $ddb->do($sql); 237 234 } else { 238 235 debuglog("Hash already in $db.$table skipping..."); 239 } 240 } 241 } 242 elsif ($conf->{"focr_enable_image_hashing"} == 2) { 243 my $dbfile = $_[2] || $conf->{"focr_db_hash"}; 236 debuglog("Key: $key"); 237 } 238 } 239 } 240 elsif ($conf->{focr_enable_image_hashing} == 2) { 241 my $dbfile = $_[2] || $conf->{focr_db_hash}; 244 242 my %DB = (); 245 243 tie %DB, 'MLDBM', $dbfile or $ret++; … … 260 258 $dbm->{input} = 261 259 $dbm->{check} = time; 262 $dbm->{match} = $dbfile eq $conf->{ "focr_db_hash"} ? 0 : 1;260 $dbm->{match} = $dbfile eq $conf->{focr_db_hash} ? 0 : 1; 263 261 $DB{$key} = $dbm; 264 262 } 265 263 untie %DB; 266 264 } 267 elsif ($conf->{ "focr_enable_image_hashing"} == 1) {268 if (-e $conf->{ "focr_digest_db"}) {269 $ret = open DB, ">>$conf->{ 'focr_digest_db'}";265 elsif ($conf->{focr_enable_image_hashing} == 1) { 266 if (-e $conf->{focr_digest_db}) { 267 $ret = open DB, ">>$conf->{focr_digest_db}"; 270 268 } else { 271 $ret = open DB, ">$conf->{ 'focr_digest_db'}";269 $ret = open DB, ">$conf->{focr_digest_db}"; 272 270 } 273 271 unless ($ret) { 274 debuglog("Unable to open/create Image Hash database at \"$conf->{ 'focr_digest_db'}\", check permissions.");272 debuglog("Unable to open/create Image Hash database at \"$conf->{focr_digest_db}\", check permissions."); 275 273 return; 276 274 } 277 debuglog("Adding Hash to \"$conf->{ 'focr_digest_db'}\"");275 debuglog("Adding Hash to \"$conf->{focr_digest_db}\""); 278 276 flock( DB, LOCK_EX ); 279 277 seek( DB, 0, 2 ); … … 290 288 my %Threshold = %$thresref; 291 289 my $pfile = $_[0]; 290 my $pic = $_[1]; 292 291 my ($rcode, $hash); 293 292 294 foreach my $a (qw/p amfile ppmhist/) {293 foreach my $a (qw/ppmhist/) { #pamfile 295 294 unless (defined $conf->{"focr_bin_$a"}) { 296 295 info("FuzzyOcr: calc_image_hash cannot exec $a"); … … 305 304 306 305 my @stdout_data; 307 my ($w,$h,$s,$t) = (0,0,0,0); 308 $t = Mail::SpamAssassin::Timeout->new({ secs => $conf->{"focr_timeout"} }); 306 my $t = Mail::SpamAssassin::Timeout->new({ secs => $conf->{focr_timeout} }); 309 307 $rcode = $t->run_and_catch(sub { 310 @stdout_data = qx($conf->{ "focr_bin_pamfile"}$pfile 2>/dev/null);308 @stdout_data = qx($conf->{focr_bin_ppmhist} -noheader $pfile 2>/dev/null); 311 309 }); 312 310 if ($rcode) { 313 311 chomp $rcode; 314 debuglog("$conf->{'focr_bin_pamfile'}: Timed out [$rcode], skipping..."); 315 return (1, ''); 316 } 317 if ($stdout_data[0] =~ m/(\d+) by (\d+)/) { 318 $w = $1; $h = $2; 319 $s = (stat($pfile))[7]; 320 } 321 322 $rcode = $t->run_and_catch(sub { 323 @stdout_data = qx($conf->{'focr_bin_ppmhist'} -noheader $pfile 2>/dev/null); 324 }); 325 if ($rcode) { 326 chomp $rcode; 327 debuglog("$conf->{'focr_bin_ppmhist'}: Timed out [$rcode], skipping..."); 312 debuglog("$conf->{focr_bin_ppmhist}: Timed out [$rcode], skipping..."); 328 313 return (1, ''); 329 314 } 330 315 my $cnt = 0; 331 316 my $c = scalar(@stdout_data); 332 $hash = sprintf "%d:%d:%d:%d",$s,$h,$w,$c; 317 $hash = sprintf "%d:%d:%d:%d", 318 defined $pic->{fsize} ? $pic->{fsize} : 0, 319 defined $pic->{height} ? $pic->{height} : 0, 320 defined $pic->{width} ? $pic->{width} : 0, 321 $c; 333 322 if ($Threshold{max_hash}) { 334 323 foreach (@stdout_data) { … … 346 335 347 336 sub fmt_time { 348 my $when = time -$_[0];337 my $when = $_[0]; 349 338 my $ret; 350 339 340 debuglog("fmt_time: When = $when"); 351 341 if ($when>86400) { 352 342 my $d = int($when/86400); … … 357 347 my $h = int($when/3600); 358 348 $when -= $h*3600; 359 $ret .= " $h h ours";349 $ret .= " $h hrs."; 360 350 } 361 351 if ($when>60) { 362 352 my $m = int($when/60); 363 353 $when -= $m*60; 364 $ret .= " $m min utes";354 $ret .= " $m min."; 365 355 } 366 356 if ($when>0) { 367 $ret .= " $when sec onds";357 $ret .= " $when sec."; 368 358 } 369 359 $ret .= " ago";
