root/tags/FuzzyOcr-3.5.0-rc1/FuzzyOcr.pm

Revision 105, 36.3 kB (checked in by decoder, 2 years ago)

Added FuzzyOcr? 3.5.0-rc1 tag

Line 
1 # FuzzyOcr plugin, version 3.4
2 #
3 # written by Christian Holler (decoder_at_own-hero_dot_net)
4 #   and Jorge Valdes (jorge_at_joval_dot_info)
5
6 package FuzzyOcr;
7
8 use strict;
9 use warnings;
10 use Mail::SpamAssassin;
11 use Mail::SpamAssassin::Logger;
12 use Mail::SpamAssassin::Util;
13 use Mail::SpamAssassin::Timeout;
14 use Mail::SpamAssassin::Plugin;
15
16 use Time::HiRes qw( gettimeofday tv_interval );
17 use String::Approx 'adistr';
18 use FileHandle;
19 use Fcntl ':flock';
20 use POSIX;
21
22 use lib qw(/etc/mail/spamassassin); # Allow placing of FuzzyOcr in siteconfigdir
23
24 use FuzzyOcr::Logging qw(debuglog errorlog warnlog infolog);
25 use FuzzyOcr::Config qw(kill_pid
26     get_tmpdir
27     set_tmpdir
28     get_all_tmpdirs
29     get_pms
30     save_pms
31     get_timeout
32     get_mysql_ddb
33     get_scansets
34     get_wordlist
35     set_config
36     get_config
37     parse_config
38     finish_parsing_end
39     read_words);
40 use FuzzyOcr::Hashing qw(check_image_hash_db add_image_hash_db calc_image_hash);
41 use FuzzyOcr::Deanimate qw(deanimate);
42 use FuzzyOcr::Scoring qw(wrong_ctype wrong_extension corrupt_img known_img_hash);
43 use FuzzyOcr::Misc qw(max removedir removedirs save_execute);
44
45 our @ISA = qw(Mail::SpamAssassin::Plugin);
46
47 # constructor: register the eval rule
48 sub new {
49     my ( $class, $mailsa ) = @_;
50     $class = ref($class) || $class;
51     my $self = $class->SUPER::new($mailsa);
52     bless( $self, $class );
53     $self->register_eval_rule("fuzzyocr_check");
54     $self->register_eval_rule("dummy_check");
55     $self->set_config($mailsa->{conf});
56     return $self;
57 }
58
59 sub dummy_check {
60     return 0;
61 }
62
63 sub fuzzyocr_check {
64     my ( $self, $pms ) = @_;
65     my $conf = get_config();
66
67     save_pms($pms);
68
69     my $end;
70     my $begin = [gettimeofday];
71     if ($conf->{focr_global_timeout}) {
72         my $t = get_timeout();
73         debuglog("Global Timeout set at ".$conf->{focr_timeout}." sec.");
74         $t->run(sub {
75             $end = fuzzyocr_do( $self, $conf, $pms );
76         });
77         if ($t->timed_out()) {
78             infolog("Scan timed out after $conf->{focr_timeout} seconds.");
79             infolog("Killing possibly running pid...");
80             my ($ret, $pid) = kill_pid();
81             if ($ret > 0) {
82                     infolog("Successfully killed PID $pid");
83             } elsif ($ret < 0) {
84                 infolog("No processes left... exiting");
85             } else {
86                 infolog("Failed to kill PID $pid, stale process!");
87             }
88             infolog("Removing possibly leftover tempdirs...");
89             removedirs(get_all_tmpdirs());
90             return 0;
91         }
92     } else {
93         $end = fuzzyocr_do( $self, $conf, $pms );
94     }
95     debuglog("Processed in ".
96         sprintf("%.6f",tv_interval($begin, [gettimeofday]))
97         ." sec.");
98     return $end;
99 }
100
101 sub fuzzyocr_do {
102     my ( $self, $conf, $pms ) = @_;
103
104     my $internal_score = 0;
105     my $current_score = $pms->get_score();
106     my $score = $conf->{focr_autodisable_score} || 100;
107
108     if ( $current_score > $score ) {
109         infolog("Scan canceled, message has already more than $score points ($current_score).");
110         return 0;
111     }
112
113     my $nscore = $conf->{focr_autodiable_negative_score} || -100;
114     if ( $current_score < $nscore ) {
115         infolog("Scan canceled, message has less than $nscore points ($current_score).");
116         return 0;
117     }
118
119     my $imgdir;
120     my %imgfiles = ();
121     my @found    = ();
122     my @hashes   = ();
123     my $cnt      = 0;
124     my $imgerr   = 0;
125     my $main     = $self->{main};
126
127     my $from = $pms->get('From') ? $pms->get('From') : "<no sender>";
128     my $to = $pms->get('To') ? $pms->get('To') : "<no receipients>";
129     my $msgid = $pms->get('Message-Id') ? $pms->get('Message-Id') : "<no messageid>";
130
131     chomp($from, $to, $msgid);
132
133     debuglog("Starting FuzzyOcr...");
134     infolog("Processing Message with ID \"$msgid\" ($from -> $to)");
135     foreach my $p (
136         $pms->{msg}->find_parts(qr(^image\b)i),
137         $pms->{msg}->find_parts(qr(Application/Octet-Stream)i)
138     ) {
139         my $ctype = $p->{'type'};
140         my $fname = $p->{'name'} || 'unknown';
141         if (($fname eq 'unknown') and
142             (defined $p->{'headers'}->{'content-id'})
143             ){
144             $fname = join('',@{$p->{'headers'}->{'content-id'}});
145             $fname =~ s/[<>]//g;
146             $fname =~ tr/\@\$\%\&/_/s;
147         }
148
149         my $filename = $fname; $filename =~ tr{a-zA-Z0-9\.}{_}cs;
150         my $pdata = $p->decode();
151         my $pdatalen = length($pdata);
152         my $w = 0; my $h = 0;
153
154         my $blah = substr($pdata,0,3);
155
156         if ( substr($pdata,0,3) eq "\x47\x49\x46" ) {
157             ## GIF File
158             $imgfiles{$filename}{ftype} = 1;
159             ($w,$h) = unpack("vv",substr($pdata,6,4));
160             infolog("GIF: [${h}x${w}] $filename");
161             $imgfiles{$filename}{width}  = $w;
162             $imgfiles{$filename}{height} = $h;
163         } elsif ( substr($pdata,0,2) eq "\xff\xd8" ) {
164             ## JPEG File
165             my @Markers = (0xC0,0xC1,0xC2,0xC3,0xC5,0xC6,0xC7,0xC9,0xCA,0xCB,0xCD,0xCE,0xCF);
166             my $pos = 2;
167             while ($pos < $pdatalen) {
168                 my ($b,$m) = unpack("CC",substr($pdata,$pos,2)); $pos += 2;
169                 if ($b != 0xff) {
170                    infolog("Invalid JPEG image");
171                    $pos = $pdatalen + 1;
172                    last;
173                 }
174                 my $skip = 0;
175                 foreach my $mm (@Markers) {
176                     if ($mm == $m) {
177                         $skip++; last;
178                     }
179                 }
180                 last if ($skip);
181                 $pos += unpack("n",substr($pdata,$pos,2));
182             }
183             if ($pos > $pdatalen) {
184                 errorlog("Cannot find image dimensions");
185             } else {
186                 ($h,$w) = unpack("nn",substr($pdata,$pos+3,4));
187                 infolog("JPEG: [${h}x${w}] $filename");
188                 $imgfiles{$filename}{ftype} = 2;
189                 $imgfiles{$filename}{height} = $h;
190                 $imgfiles{$filename}{width}  = $w;
191             }
192         } elsif ( substr($pdata,0,4) eq "\x89\x50\x4e\x47" ) {
193             # PNG File
194             ($w,$h) = unpack("NN",substr($pdata,16,8));
195             $imgfiles{$filename}{ftype}  = 3;
196             $imgfiles{$filename}{width}  = $w;
197             $imgfiles{$filename}{height} = $h;
198             infolog("PNG: [${h}x${w}] $filename");
199         } elsif ( substr($pdata,0,2) eq "BM" ) {
200             ## BMP File
201             ($w,$h) = unpack("NN",substr($pdata,18,8));
202             $imgfiles{$filename}{ftype}  = 4;
203             $imgfiles{$filename}{width}  = $w;
204             $imgfiles{$filename}{height} = $h;
205             infolog("BMP: [${h}x${w}] $filename");
206         } elsif (
207             ## TIFF File
208             (substr($pdata,0,4) eq "\x4d\x4d\x00\x2a") or
209             (substr($pdata,0,4) eq "\x49\x49\x2a\x00")
210                 ) {
211             my $worder = (substr($pdata,0,2) eq "\x4d\x4d") ? 0 : 1;
212             my $offset = unpack($worder?"V":"N",substr($pdata,4,4));
213             my $number = unpack($worder?"v":"n",substr($pdata,$offset,2)) - 1;
214             foreach my $n (0 .. $number) {
215                 my $add = 2 + ($n * 12);
216                 my ($id,$tag,$cnt,$val)  = unpack($worder?"vvVV":"nnNN",substr($pdata,$offset+$add,12));
217                 $h = $val if ($id == 256);
218                 $w = $val if ($id == 257);
219                 last if ($h != 0 and $w != 0);
220             }
221             infolog("TIFF: [${h}x${w}] $filename ($worder)");
222             infolog("Cannot determine size of TIFF image, setting to '1x1'") if ($h == 0 and $w == 0);
223             $imgfiles{$filename}{ftype}  = 5;
224             $imgfiles{$filename}{width}  = $w ? $w : 1;
225             $imgfiles{$filename}{height} = $h ? $h : 1;
226         }
227
228         #Skip unless we found the right header
229         unless (defined $imgfiles{$filename}{ftype}) {
230             infolog("Skipping file with content-type=\"$ctype\" name=\"$fname\"");
231             delete $imgfiles{$filename};
232             next;
233
234         }
235
236         #Found Image!! Get a temporary dir to save image
237         $imgdir = Mail::SpamAssassin::Util::secure_tmpdir();
238         unless ($imgdir) {
239             errorlog("Scan canceled, cannot create Image TMPDIR.");
240             return 0;
241         }
242         set_tmpdir($imgdir);
243
244         #Generete unique filename to store image
245         my $imgfilename = Mail::SpamAssassin::Util::untaint_file_path(
246             $imgdir . "/" . $fname
247         );
248         my $unique = 0;
249         while (-e $imgfilename) {
250             $imgfilename = Mail::SpamAssassin::Util::untaint_file_path(
251                 $imgdir . "/" . chr(65+$unique) . "." . $fname
252             );
253             $unique++;
254         }
255
256         #Save important constants
257         $imgfiles{$filename}{fname} = $fname;
258         $imgfiles{$filename}{ctype} = $ctype;
259         $imgfiles{$filename}{fsize} = $pdatalen;
260         $imgfiles{$filename}{fpath} = $imgfilename;
261
262         #Save Image to disk.
263         unless (open PICT, ">$imgfilename") {
264             errorlog("Cannot write \"$imgfilename\", skipping...");
265             delete $imgfiles{$filename};
266             removedir($imgdir);
267             next;
268         }
269         binmode PICT;
270         print PICT $pdata;
271         close PICT;
272         debuglog("Saved: $imgfilename");
273
274         #Increment valid image file counter
275         $cnt++;
276
277         #keep raw email for debugging later
278         my $rawfilename = $imgdir . "/raw.eml";
279         if (open RAW, ">$rawfilename") {
280             print RAW $pms->{msg}->get_pristine();
281             close RAW;
282             debuglog("Saved: $rawfilename");
283         }
284
285     }
286
287     if ($cnt == 0) {
288         debuglog("Skipping OCR, no image files found...");
289         return 0;
290     }
291     infolog("Found: $cnt images"); $cnt = 0;
292     if ($conf->{focr_enable_image_hashing} == 3) {
293         $conf->{focr_mysql_ddb} = get_mysql_ddb();
294     }
295
296     # Try to load personal wordlist
297     if ($conf->{focr_personal_wordlist} =~ m/^\//) {
298         read_words( $conf->{focr_personal_wordlist} );
299     } else {
300         my $peruserlist = $main->sed_path($conf->{focr_personal_wordlist});
301         if ( -r $peruserlist ) {
302             read_words( $peruserlist );
303         } else {
304             # Only complain if the file exists
305             if ( -e $peruserlist ) {
306                 errorlog("Cannot read personal_wordlist: $peruserlist, skipping...");
307             }
308         }
309     }
310
311     IMAGE:
312     my $haserr;
313     foreach my $filename (keys %imgfiles) {
314         my $pic = $imgfiles{$filename};
315         #infolog("Analyzing file with content-type=\"$$pic{ctype}\"");
316         my @used_scansets = ();
317         my $corrupt = 0;
318         my $suffix = 0;
319         my $generic_ctype = 0;
320         my $digest;
321         my $file  = $$pic{fpath};
322         my $tfile = $file;
323         my $pfile = $file . ".pnm";
324         my $efile = $file . ".err";
325         debuglog("pfile => $pfile");
326         debuglog("efile => $efile");
327
328         #Open ERRORLOG
329         $haserr = $Mail::SpamAssassin::Logger::LOG_SA{level} == 3;
330
331         if ($haserr) {
332             $haserr = open RAWERR, ">$imgdir/raw.err";
333             debuglog("Errors to: $imgdir/raw.err") if ($haserr>0);
334         }
335
336         my $mimetype = $$pic{ctype};
337         if($mimetype =~ m'application/octet-stream'i) {
338             $generic_ctype = 1;
339         }
340
341         if($$pic{fname} =~ /\.([\w-]+)$/) {
342             $suffix = $1;
343         }
344         if ($suffix) {
345             debuglog("File has Content-Type \"$mimetype\" and File Extension \"$suffix\"");
346         } else {
347             debuglog("File has Content-Type \"$mimetype\" and no File Extension");
348         }
349
350         if ( $$pic{ftype} == 1 ) {
351             infolog("Found GIF header name=\"$$pic{fname}\"");
352             if ($conf->{focr_skip_gif}) {
353                 infolog("Skipping image check");
354                 next IMAGE;
355             }
356             if (defined($conf->{focr_max_size_gif}) and ($$pic{fsize} > $conf->{focr_max_size_gif})) {
357                 infolog("GIF file size ($$pic{fsize}) exceeds maximum file size for this format, skipping...");
358                 next;
359             }
360
361             if ( ($$pic{ctype} !~ /gif/i) and not $generic_ctype) {
362                 wrong_ctype( "GIF", $$pic{ctype} );
363                 $internal_score += $conf->{'focr_wrongctype_score'};
364             }
365
366             if ( $suffix and $suffix !~ /gif/i) {
367                 wrong_extension( "GIF", $suffix);
368                 $internal_score += $conf->{'focr_wrongext_score'};
369             }
370
371             my $interlaced_gif = 0;
372             my $image_count = 0;
373
374             foreach my $a (qw/gifsicle giftext giffix gifinter giftopnm/) {
375                 unless (defined $conf->{"focr_bin_$a"}) {
376                     errorlog("Cannot exec $a, skipping image");
377                     next IMAGE;
378                 }
379             }
380             my @stderr_data;
381
382             my ($retcode, @stdout_data) = save_execute(
383                 "$conf->{focr_bin_giftext} $file",
384                 undef,
385                 ">$imgdir/giftext.info",
386                 ">>$imgdir/giftext.err", 1);
387
388             if ($retcode<0) { # only care if we timed out
389                 chomp $retcode;
390                 errorlog("$conf->{focr_bin_giftext} Timed out [$retcode], skipping...");
391                 ++$imgerr if $conf->{focr_keep_bad_images}>0; next;
392             }
393
394             foreach (@stdout_data) {
395                 unless ($interlaced_gif) {
396                     if ( $_ =~ /Image is Interlaced/i ) {
397                         $interlaced_gif = 1;
398                     }
399                 }
400                 if ( $_ =~ /^Image #/ ) {
401                     $image_count++;
402                 }
403             }
404             if ($interlaced_gif or ($image_count > 1)) {
405                 infolog("Image is interlaced or animated...");
406             }
407             else {
408                 infolog("Image is single non-interlaced...");
409                 $tfile .= "-fixed.gif";
410                 printf RAWERR "## $conf->{focr_bin_giffix} $file >$tfile 2>>$efile\n" if ($haserr>0);
411
412                 $retcode = save_execute("$conf->{focr_bin_giffix} $file", undef, ">$tfile", ">>$efile");
413
414                 if ($retcode<0) { # only care if we timed out
415                     chomp $retcode;
416                     errorlog("$conf->{focr_bin_giffix}: Timed out [$retcode], skipping...");
417                     printf RAWERR "?? Timed out > $retcode\n" if ($haserr>0);
418                     ++$imgerr if $conf->{focr_keep_bad_images}>0; next;
419                 }
420
421                 if (open ERR, $efile) {
422                     @stderr_data = <ERR>;
423                     close ERR;
424                     foreach (@stderr_data) {
425                         if ( $_ =~ /GIF-LIB error/i ) {
426                             $corrupt = $_;
427                             last;
428                         }
429                     }
430                 }
431             }
432
433             if (defined($conf->{focr_max_size_gif}) and (((stat($tfile))[7]) > $conf->{focr_max_size_gif})) {
434                 infolog("Fixed GIF file size ($$pic{fsize}) exceeds maximum file size for this format, skipping...");
435                 next;
436             }
437
438             if ($corrupt) {
439                 if ($interlaced_gif or ($image_count > 1)) {
440                     infolog("Skipping corrupted interlaced image...");
441                     corrupt_img($conf->{focr_corrupt_unfixable_score}, $corrupt);
442                     $internal_score += $conf->{focr_corrupt_unfixable_score};
443                     next;
444                 }
445                 if (-z $tfile) {
446                     infolog("Uncorrectable corruption detected, skipping non-interlaced image...");
447                     corrupt_img($conf->{focr_corrupt_unfixable_score}, $corrupt);
448                     $internal_score += $conf->{focr_corrupt_unfixable_score};
449                     next;
450                 }
451                 infolog("Image is corrupt, but seems fixable, continuing...");
452                 corrupt_img($conf->{focr_corrupt_score}, $corrupt);
453                 $internal_score += $conf->{focr_corrupt_score};
454             }
455
456             if ($image_count > 1) {
457                 infolog("File contains <$image_count> images, deanimating...");
458                 $tfile = deanimate($tfile);
459             }
460
461             if ($interlaced_gif) {
462                 infolog("Processing interlaced_gif $tfile...");
463                 my $cfile = $tfile;
464                 if ($tfile =~ m/\.gif$/i) {
465                     $tfile =~ s/\.gif$/-fixed.gif/i;
466                 } else {
467                     $tfile .= ".gif";
468                 }
469                 printf RAWERR qq(## $conf->{focr_bin_gifinter} $cfile >$tfile 2>>$efile\n) if ($haserr>0);
470         
471                 $retcode = save_execute("$conf->{focr_bin_gifinter} $cfile", undef, ">$tfile", ">>$efile");
472
473                 if ($retcode<0) {
474                     chomp $retcode;
475                     printf RAWERR "?? Timed out > $retcode\n" if ($haserr>0);
476                     errorlog("$conf->{focr_bin_gifinter}: Timed out [$retcode], skipping...");
477                     ++$imgerr if $conf->{focr_keep_bad_images}>0; next;
478                 } elsif ($retcode>0) {
479                     chomp $retcode;
480                     printf RAWERR "?? [$retcode] returned from $conf->{focr_bin_gifinter}\n" if ($haserr>0);
481                     errorlog("$conf->{focr_bin_gifinter}: Returned [$retcode], skipping...");
482                     ++$imgerr if $conf->{focr_keep_bad_images}>0; next;
483                 }
484             }
485
486             printf RAWERR qq(## $conf->{focr_bin_giftopnm} $tfile >$pfile 2>>$efile\n) if ($haserr>0);
487
488             $retcode = save_execute("$conf->{focr_bin_giftopnm} $tfile", undef, ">$pfile", ">>$efile");
489
490             if ($retcode<0) {
491                 chomp $retcode;
492                 printf RAWERR "?? Timed out > $retcode\n" if ($haserr>0);
493                 errorlog("$conf->{focr_bin_giftopnm}: Timed out [$retcode], skipping...");
494                 ++$imgerr if $conf->{focr_keep_bad_images}>0; next;
495             } elsif ($retcode>0) {
496                 chomp $retcode;
497                 printf RAWERR "?? [$retcode] returned from $conf->{focr_bin_giftopnm}\n" if ($haserr>0);
498                 errorlog("$conf->{focr_bin_giftopnm}: Returned [$retcode], skipping...");
499                 ++$imgerr if $conf->{focr_keep_bad_images}>0; next;
500             }
501         }
502         elsif ( $$pic{ftype} == 2 ) {
503             infolog("Found JPEG header name=\"$$pic{fname}\"");
504             if ($conf->{focr_skip_jpeg}) {
505                 infolog("Skipping image check");
506                 next IMAGE;
507             }
508
509             if (defined($conf->{focr_max_size_jpeg}) and ($$pic{fsize} > $conf->{focr_max_size_jpeg})) {
510                 infolog("JPEG file size ($$pic{fsize}) exceeds maximum file size for this format, skipping...");
511                 next;
512             }
513             if ( ($$pic{ctype} !~ /(jpeg|jpg)/i) and not $generic_ctype) {
514                 wrong_ctype( "JPEG", $$pic{ctype} );
515                 $internal_score += $conf->{'focr_wrongctype_score'};
516             }
517
518             if ( $suffix and $suffix !~ /(jpeg|jpg|jfif)/i) {
519                 wrong_extension( "JPEG", $suffix);
520                 $internal_score += $conf->{'focr_wrongext_score'};
521             }
522
523             foreach my $a (qw/jpegtopnm/) {
524                 unless (defined $conf->{"focr_bin_$a"}) {
525                     errorlog("Cannot exec $a, skipping image");
526                     next IMAGE;
527                 }
528             }
529             printf RAWERR qq(## $conf->{focr_bin_jpegtopnm} $file >$pfile 2>>$efile\n) if ($haserr>0);
530             my $retcode = save_execute("$conf->{focr_bin_jpegtopnm} $file", undef, ">$pfile", ">>$efile");
531
532             if ($retcode<0) {
533                 chomp $retcode;
534                 printf RAWERR "?? Timed out > $retcode\n" if ($haserr>0);
535                 errorlog("$conf->{focr_bin_jpegtopnm}: Timed out [$retcode], skipping...");
536                 ++$imgerr if $conf->{focr_keep_bad_images}>0; next;
537             } elsif ($retcode>0) {
538                 chomp $retcode;
539                 printf RAWERR "?? [$retcode] returned from $conf->{focr_bin_jpegtopnm}\n" if ($haserr>0);
540                 errorlog("$conf->{focr_bin_jpegtopnm}: Returned [$retcode], skipping...");
541                 ++$imgerr if $conf->{focr_keep_bad_images}>0; next;
542             }
543         }
544         elsif ( $$pic{ftype} == 3 ) {
545             infolog("Found PNG header name=\"$$pic{fname}\"");
546             if ($conf->{focr_skip_png}) {
547                 infolog("Skipping image check");
548                 next IMAGE;
549             }
550             if (defined($conf->{focr_max_size_png}) and ($$pic{fsize} > $conf->{focr__max_size_png})) {
551                 infolog("PNG file size ($$pic{fsize}) exceeds maximum file size for this format, skipping...");
552                 next;
553             }
554             if ( ($$pic{ctype} !~ /png/i) and not $generic_ctype) {
555                 wrong_ctype( "PNG", $$pic{ctype} );
556                 $internal_score += $conf->{'focr_wrongctype_score'};
557             }
558             if ( $suffix and $suffix !~ /(png)/i) {
559                 wrong_extension( "PNG", $suffix);
560                 $internal_score += $conf->{'focr_wrongext_score'};
561             }
562             foreach my $a (qw/pngtopnm/) {
563                 unless (defined $conf->{"focr_bin_$a"}) {
564                     errorlog("Cannot exec $a, skipping image");
565                     next IMAGE;
566                 }
567             }
568
569             printf RAWERR qq(## $conf->{focr_bin_pngtopnm} $file >$pfile 2>>$efile\n) if ($haserr>0);
570             my $retcode = save_execute("$conf->{focr_bin_pngtopnm} $file", undef, ">$pfile", ">>$efile");
571
572             if ($retcode<0) {
573                 chomp $retcode;
574                 printf RAWERR "?? Timed out > $retcode\n" if ($haserr>0);
575                 errorlog("$conf->{focr_bin_pngtopnm}: Timed out [$retcode], skipping...");
576                 ++$imgerr if $conf->{focr_keep_bad_images}>0; next;
577             } elsif ($retcode>0) {
578                 chomp $retcode;
579                 printf RAWERR "?? [$retcode] returned from $conf->{focr_bin_pngtopnm}\n" if ($haserr>0);
580                 errorlog("$conf->{focr_bin_pngtopnm}: Returned [$retcode], skipping...");
581                 ++$imgerr if $conf->{focr_keep_bad_images}>0; next;
582             }
583         }
584         elsif ( $$pic{ftype} == 4 ) {
585             infolog("Found BMP header name=\"$$pic{fname}\"");
586             if ($conf->{focr_skip_bmp}) {
587                 infolog("Skipping image check");
588                 next IMAGE;
589             }
590             if (defined($conf->{focr_max_size_bmp}) and ($$pic{fsize} > $conf->{focr_max_size_bmp})) {
591                 infolog("BMP file size ($$pic{fsize}) exceeds maximum file size for this format, skipping...");
592                 next;
593             }
594             if ( ($$pic{ctype} !~ /bmp/i) and not $generic_ctype) {
595                 wrong_ctype( "BMP", $$pic{ctype} );
596                 $internal_score += $conf->{'focr_wrongctype_score'};
597             }
598             if ( $suffix and $suffix !~ /(bmp)/i) {
599                 wrong_extension( "BMP", $suffix);
600                 $internal_score += $conf->{'focr_wrongext_score'};
601             }
602             foreach my $a (qw/bmptopnm/) {
603                 unless (defined $conf->{"focr_bin_$a"}) {
604                     errorlog("Cannot exec $a, skipping image");
605                     next IMAGE;
606                 }
607             }
608             printf RAWERR qq(## $conf->{focr_bin_bmptopnm} $file >$pfile 2>>$efile\n) if ($haserr>0);
609
610             my $retcode = save_execute("$conf->{focr_bin_bmptopnm} $file", undef, ">$pfile", ">>$efile");
611             if ($retcode<0) {
612                 chomp $retcode;
613                 printf RAWERR "?? Timed out > $retcode\n" if ($haserr>0);
614                 errorlog("$conf->{focr_bin_bmptopnm}: Timed out [$retcode], skipping...");
615                 ++$imgerr if $conf->{focr_keep_bad_images}>0; next;
616             } elsif ($retcode>0) {
617                 chomp $retcode;
618                 printf RAWERR "?? [$retcode] returned from $conf->{focr_bin_bmptopnm}\n" if ($haserr>0);
619                 errorlog("$conf->{focr_bin_bmptopnm}: Returned [$retcode], skipping...");
620                 ++$imgerr if $conf->{focr_keep_bad_images}>0; next;
621             }
622         }
623         elsif ( $$pic{ftype} == 5 ) {
624             infolog("Found TIFF header name=\"$$pic{fname}\"");
625             if ($conf->{focr_skip_tiff}) {
626                 infolog("Skipping image check");
627                 next IMAGE;
628             }
629             if (defined($conf->{focr_max_size_tiff}) and ($$pic{fsize} > $conf->{focr_max_size_tiff})) {
630                 infolog("TIFF file size ($$pic{fsize}) exceeds maximum file size for this format, skipping...");
631                 next;
632             }
633             if ( ($$pic{ctype} !~ /tif/i) and not $generic_ctype) {
634                 wrong_ctype( "TIFF", $$pic{ctype} );
635                 $internal_score += $conf->{'focr_wrongctype_score'};
636             }
637             if ( $suffix and $suffix !~ /tif/i) {
638                 wrong_extension( "TIFF", $suffix);
639                 $internal_score += $conf->{'focr_wrongext_score'};
640             }
641
642             foreach my $a (qw/tifftopnm/) {
643                 unless (defined $conf->{"focr_bin_$a"}) {
644                     errorlog("Cannot exec $a, skipping image");
645                     next IMAGE;
646                 }
647             }
648             printf RAWERR qq(## $conf->{focr_bin_tifftopnm} $file >$pfile 2>>$efile\n) if ($haserr>0);
649             my $retcode = save_execute("$conf->{focr_bin_tifftopnm} $file", undef, ">$pfile", ">>$efile");
650
651             if ($retcode<0) {
652                 chomp $retcode;
653                 printf RAWERR "?? Timed out > $retcode\n" if ($haserr>0);
654                 errorlog("$conf->{focr_bin_tifftopnm}: Timed out [$retcode], skipping...");
655                 ++$imgerr if $conf->{focr_keep_bad_images}>0; next;
656             } elsif ($retcode>0) {
657                 chomp $retcode;
658                 printf RAWERR "?? [$retcode] returned from $conf->{focr_bin_tifftopnm}\n" if ($haserr>0);
659                 errorlog("$conf->{focr_bin_tifftopnm}: Returned [$retcode], skipping...");
660                 ++$imgerr if $conf->{focr_keep_bad_images}>0; next;
661             }
662         }
663         else {
664             errorlog("Image type not recognized, unknown format. Skipping this image...");
665             next;
666         }
667
668         if($conf->{focr_enable_image_hashing}) {
669             infolog("Calculating image hash for: $pfile");
670             ($corrupt, $digest) = calc_image_hash($pfile,$pic);
671             if ($corrupt) {
672                 infolog("Error calculating the image hash, skipping hash check...");
673             } else {
674                 my ($score, $dinfo, $whash);
675                 $whash = $conf->{focr_enable_image_hashing} == 3
676                     ? $conf->{focr_mysql_hash}
677                     : $conf->{focr_db_hash};
678                 ($score,$dinfo) = check_image_hash_db($digest, $whash, $$pic{fname}, $$pic{ctype}, $$pic{ftype});
679                 if ($score > 0) {
680                     known_img_hash($score,$dinfo);
681                     infolog("Message is SPAM. $dinfo") if ($conf->{focr_enable_image_hashing} < 3);
682                     removedirs(get_all_tmpdirs());
683                     return 0;
684                 }
685                 $whash = $conf->{focr_enable_image_hashing} == 3
686                     ? $conf->{focr_mysql_safe}
687                     : $conf->{focr_db_safe};
688                 ($score,$dinfo) = check_image_hash_db($digest, $whash, $$pic{fname}, $$pic{ctype}, $$pic{ftype});
689                 if ($score > 0) {
690                     infolog("Image in KNOWN_GOOD. Skipping OCR checks...");
691                     next IMAGE;
692                 }
693             }
694             if ($digest eq '') {
695                 infolog("Empty Hash, skipping...");
696                 next IMAGE;
697             }
698         } else {
699             infolog("Image hashing disabled in configuration, skipping...");
700         }
701
702         # Note: $current_score is here the score that the message had at the beginning
703         # and $score is the autodisable_score defined in the config
704         # $internal_score describes the score t