Changeset 108
- Timestamp:
- 11.12.2006 17:38:57 (2 years ago)
- Files:
-
- trunk/devel/FuzzyOcr.cf (modified) (1 diff)
- trunk/devel/FuzzyOcr.pm (modified) (19 diffs)
- trunk/devel/FuzzyOcr/Config.pm (modified) (1 diff)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
trunk/devel/FuzzyOcr.cf
r107 r108 33 33 # Default value: 1 34 34 #focr_verbose 3 35 36 # Log Message-Id, From, To 37 # Default: 1 38 #focr_log_pmsinfo 0 35 39 36 40 # Send logging output to stderr. trunk/devel/FuzzyOcr.pm
r104 r108 125 125 my $main = $self->{main}; 126 126 127 my $from = $pms->get('From') ? $pms->get('From') : "<no sender>";128 my $to = $pms->get('To') ? $pms->get('To') : "<no receipients>";129 my $msgid = $pms->get('Message-Id') ? $pms->get('Message-Id') : "<no messageid>";130 131 chomp($from, $to, $msgid);132 133 127 debuglog("Starting FuzzyOcr..."); 134 infolog("Processing Message with ID \"$msgid\" ($from -> $to)"); 128 129 #Show PMS info if asked to 130 unless ($conf->{focr_log_pmsinfo}) { 131 my $msgid = $pms->get('Message-Id') ? $pms->get('Message-Id') : "<no messageid>"; 132 my $from = $pms->get('From') ? $pms->get('From') : "<no sender>"; 133 my $to = $pms->get('To') ? $pms->get('To') : "<no receipients>"; 134 chomp($from, $to, $msgid); 135 infolog("Processing Message with ID \"$msgid\" ($from -> $to)"); 136 } 137 135 138 foreach my $p ( 136 139 $pms->{msg}->find_parts(qr(^image\b)i), … … 152 155 my $w = 0; my $h = 0; 153 156 154 my $blah = substr($pdata,0,3);155 156 157 if ( substr($pdata,0,3) eq "\x47\x49\x46" ) { 157 158 ## GIF File 158 159 $imgfiles{$filename}{ftype} = 1; 159 160 ($w,$h) = unpack("vv",substr($pdata,6,4)); 160 infolog("GIF: [${h}x${w}] $filename ");161 infolog("GIF: [${h}x${w}] $filename ($pdatalen)"); 161 162 $imgfiles{$filename}{width} = $w; 162 163 $imgfiles{$filename}{height} = $h; … … 185 186 } else { 186 187 ($h,$w) = unpack("nn",substr($pdata,$pos+3,4)); 187 infolog("JPEG: [${h}x${w}] $filename ");188 infolog("JPEG: [${h}x${w}] $filename ($pdatalen)"); 188 189 $imgfiles{$filename}{ftype} = 2; 189 190 $imgfiles{$filename}{height} = $h; … … 196 197 $imgfiles{$filename}{width} = $w; 197 198 $imgfiles{$filename}{height} = $h; 198 infolog("PNG: [${h}x${w}] $filename ");199 infolog("PNG: [${h}x${w}] $filename ($pdatalen)"); 199 200 } elsif ( substr($pdata,0,2) eq "BM" ) { 200 201 ## BMP File 201 ($w,$h) = unpack(" NN",substr($pdata,18,8));202 ($w,$h) = unpack("VV",substr($pdata,18,8)); 202 203 $imgfiles{$filename}{ftype} = 4; 203 204 $imgfiles{$filename}{width} = $w; 204 205 $imgfiles{$filename}{height} = $h; 205 infolog("BMP: [${h}x${w}] $filename ");206 infolog("BMP: [${h}x${w}] $filename ($pdatalen)"); 206 207 } elsif ( 207 208 ## TIFF File … … 219 220 last if ($h != 0 and $w != 0); 220 221 } 221 infolog("TIFF: [${h}x${w}] $filename ($ worder)");222 infolog("TIFF: [${h}x${w}] $filename ($pdatalen) ($worder)"); 222 223 infolog("Cannot determine size of TIFF image, setting to '1x1'") if ($h == 0 and $w == 0); 223 224 $imgfiles{$filename}{ftype} = 5; … … 231 232 delete $imgfiles{$filename}; 232 233 next; 233 234 } 235 236 #Skip images that cannot contain text 237 if ($imgfiles{$filename}{height} < 4) { 238 infolog("Skipping narrow image"); 239 delete $imgfiles{$filename}; 240 next; 241 } 242 243 #Skip images that cannot contain text 244 if ($imgfiles{$filename}{width} < 4) { 245 infolog("Skipping flat image"); 246 delete $imgfiles{$filename}; 247 next; 234 248 } 235 249 … … 309 323 } 310 324 311 IMAGE:312 325 my $haserr; 313 326 foreach my $filename (keys %imgfiles) { … … 352 365 if ($conf->{focr_skip_gif}) { 353 366 infolog("Skipping image check"); 354 next IMAGE;367 next; 355 368 } 356 369 if (defined($conf->{focr_max_size_gif}) and ($$pic{fsize} > $conf->{focr_max_size_gif})) { … … 375 388 unless (defined $conf->{"focr_bin_$a"}) { 376 389 errorlog("Cannot exec $a, skipping image"); 377 next IMAGE; 378 } 379 } 390 next; 391 } 392 } 393 380 394 my @stderr_data; 381 382 395 my ($retcode, @stdout_data) = save_execute( 383 396 "$conf->{focr_bin_giftext} $file", … … 504 517 if ($conf->{focr_skip_jpeg}) { 505 518 infolog("Skipping image check"); 506 next IMAGE;519 next; 507 520 } 508 521 … … 524 537 unless (defined $conf->{"focr_bin_$a"}) { 525 538 errorlog("Cannot exec $a, skipping image"); 526 next IMAGE;539 next; 527 540 } 528 541 } … … 546 559 if ($conf->{focr_skip_png}) { 547 560 infolog("Skipping image check"); 548 next IMAGE;561 next; 549 562 } 550 563 if (defined($conf->{focr_max_size_png}) and ($$pic{fsize} > $conf->{focr__max_size_png})) { … … 563 576 unless (defined $conf->{"focr_bin_$a"}) { 564 577 errorlog("Cannot exec $a, skipping image"); 565 next IMAGE;578 next; 566 579 } 567 580 } … … 586 599 if ($conf->{focr_skip_bmp}) { 587 600 infolog("Skipping image check"); 588 next IMAGE;601 next; 589 602 } 590 603 if (defined($conf->{focr_max_size_bmp}) and ($$pic{fsize} > $conf->{focr_max_size_bmp})) { … … 603 616 unless (defined $conf->{"focr_bin_$a"}) { 604 617 errorlog("Cannot exec $a, skipping image"); 605 next IMAGE;618 next; 606 619 } 607 620 } … … 625 638 if ($conf->{focr_skip_tiff}) { 626 639 infolog("Skipping image check"); 627 next IMAGE;640 next; 628 641 } 629 642 if (defined($conf->{focr_max_size_tiff}) and ($$pic{fsize} > $conf->{focr_max_size_tiff})) { … … 643 656 unless (defined $conf->{"focr_bin_$a"}) { 644 657 errorlog("Cannot exec $a, skipping image"); 645 next IMAGE;658 next; 646 659 } 647 660 } … … 689 702 if ($score > 0) { 690 703 infolog("Image in KNOWN_GOOD. Skipping OCR checks..."); 691 next IMAGE;704 next; 692 705 } 693 706 } 694 707 if ($digest eq '') { 695 708 infolog("Empty Hash, skipping..."); 696 next IMAGE;709 next; 697 710 } 698 711 } else { … … 833 846 } 834 847 } 835 infolog("Resorting scanset list...");848 my $newlist; 836 849 @$scansets = sort { $b->{hit_counter} <=> $a->{hit_counter} } @$scansets; 850 foreach my $s (@$scansets) { 851 $newlist .= ' ' . $s->{label} . '(' . $s->{hit_counter} . ')'; 852 } 853 infolog("New sorted list:$newlist"); 854 837 855 } 838 856 last; trunk/devel/FuzzyOcr/Config.pm
r107 r108 241 241 push (@cmds, { 242 242 setting => 'focr_log_stderr', 243 default => 1, 244 type => $Mail::SpamAssassin::Conf::CONF_TYPE_BOOL 245 }); 246 247 push (@cmds, { 248 setting => 'focr_log_pmsinfo', 243 249 default => 1, 244 250 type => $Mail::SpamAssassin::Conf::CONF_TYPE_BOOL
