root/trunk/devel/FuzzyOcr.pm

Revision 133, 41.8 kB (checked in by decoder, 1 year ago)

Added License tags too all code files

Line 
1 # <@LICENSE>
2 # Licensed to the Apache Software Foundation (ASF) under one or more
3 # contributor license agreements.  See the NOTICE file distributed with
4 # this work for additional information regarding copyright ownership.
5 # The ASF licenses this file to you under the Apache License, Version 2.0
6 # (the "License"); you may not use this file except in compliance with
7 # the License.  You may obtain a copy of the License at:
8 #
9 #     http://www.apache.org/licenses/LICENSE-2.0
10 #
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
16 # </@LICENSE>
17
18 # FuzzyOcr plugin, version 3.4
19 #
20 # written by Christian Holler (decoder_at_own-hero_dot_net)
21 #   and Jorge Valdes (jorge_at_joval_dot_info)
22
23 package FuzzyOcr;
24
25 use strict;
26 use warnings;
27 use POSIX;
28 use Fcntl ':flock';
29 use Mail::SpamAssassin;
30 use Mail::SpamAssassin::Logger;
31 use Mail::SpamAssassin::Util;
32 use Mail::SpamAssassin::Timeout;
33 use Mail::SpamAssassin::Plugin;
34
35 use Time::HiRes qw( gettimeofday tv_interval );
36 use String::Approx 'adistr';
37 use FileHandle;
38
39 use lib qw(/etc/mail/spamassassin); # Allow placing of FuzzyOcr in siteconfigdir
40
41 use FuzzyOcr::Logging qw(debuglog errorlog warnlog infolog);
42 use FuzzyOcr::Config qw(kill_pid
43     get_tmpdir
44     set_tmpdir
45     get_all_tmpdirs
46     get_pms
47     save_pms
48     get_timeout
49     get_mysql_ddb
50     get_scansets
51     get_wordlist
52     set_config
53     get_config
54     parse_config
55     finish_parsing_end
56     read_words);
57 use FuzzyOcr::Hashing qw(check_image_hash_db add_image_hash_db calc_image_hash);
58 use FuzzyOcr::Deanimate qw(deanimate);
59 use FuzzyOcr::Scoring qw(wrong_ctype wrong_extension corrupt_img known_img_hash);
60 use FuzzyOcr::Misc qw(max removedir removedirs save_execute);
61
62 our @ISA = qw(Mail::SpamAssassin::Plugin);
63
64 # constructor: register the eval rule
65 sub new {
66     my ( $class, $mailsa ) = @_;
67     $class = ref($class) || $class;
68     my $self = $class->SUPER::new($mailsa);
69     bless( $self, $class );
70     $self->register_eval_rule("fuzzyocr_check");
71     $self->register_eval_rule("dummy_check");
72     $self->set_config($mailsa->{conf});
73     return $self;
74 }
75
76 sub dummy_check {
77     return 0;
78 }
79
80 sub fuzzyocr_check {
81     my ( $self, $pms ) = @_;
82     my $conf = get_config();
83
84     save_pms($pms);
85
86     my $end;
87     my $begin = [gettimeofday];
88     if ($conf->{focr_global_timeout}) {
89         my $t = get_timeout();
90         debuglog("Global Timeout set at ".$conf->{focr_timeout}." sec.");
91         $t->run(sub {
92             $end = fuzzyocr_do( $self, $conf, $pms );
93         });
94         if ($t->timed_out()) {
95             infolog("Scan timed out after $conf->{focr_timeout} seconds.");
96             infolog("Killing possibly running pid...");
97             my ($ret, $pid) = kill_pid();
98             if ($ret > 0) {
99                     infolog("Successfully killed PID $pid");
100             } elsif ($ret < 0) {
101                 infolog("No processes left... exiting");
102             } else {
103                 infolog("Failed to kill PID $pid, stale process!");
104             }
105             infolog("Removing possibly leftover tempdirs...");
106             removedirs(get_all_tmpdirs());
107             return 0;
108         }
109     } else {
110         $end = fuzzyocr_do( $self, $conf, $pms );
111     }
112     debuglog("Processed in ".
113         sprintf("%.6f",tv_interval($begin, [gettimeofday]))
114         ." sec.");
115     return $end;
116 }
117
118 sub fuzzyocr_do {
119     my ( $self, $conf, $pms ) = @_;
120
121     my $internal_score = 0;
122     my $current_score = $pms->get_score();
123     my $score = $conf->{focr_autodisable_score} || 100;
124
125     if ( $current_score > $score ) {
126         infolog("Scan canceled, message has already more than $score points ($current_score).");
127         return 0;
128     }
129
130     my $nscore = $conf->{focr_autodisable_negative_score} || -100;
131     if ( $current_score < $nscore ) {
132         infolog("Scan canceled, message has less than $nscore points ($current_score).");
133         return 0;
134     }
135
136     my $imgdir;
137     my %imgfiles = ();
138     my @found    = ();
139     my @hashes   = ();
140     my $cnt      = 0;
141     my $imgerr   = 0;
142     my $main     = $self->{main};
143
144     debuglog("Starting FuzzyOcr...");
145    
146     #Show PMS info if asked to
147     if ($conf->{focr_log_pmsinfo}) {
148         my $msgid = $pms->get('Message-Id') ? $pms->get('Message-Id') : "<no messageid>";
149         my $from = $pms->get('From') ? $pms->get('From') : "<no sender>";
150         my $to = $pms->get('To') ? $pms->get('To') : "<no receipients>";
151         chomp($from, $to, $msgid);
152         infolog("Processing Message with ID \"$msgid\" ($from -> $to)");
153     }
154
155     foreach my $p (
156         $pms->{msg}->find_parts(qr(^image\b)i),
157         $pms->{msg}->find_parts(qr(Application/Octet-Stream)i),
158         $pms->{msg}->find_parts(qr(application/pdf)i)
159     ) {
160         my $ctype = $p->{'type'};
161         my $fname = $p->{'name'} || 'unknown';
162         if (($fname eq 'unknown') and
163             (defined $p->{'headers'}->{'content-id'})
164             ){
165             $fname = join('',@{$p->{'headers'}->{'content-id'}});
166             $fname =~ s/[<>]//g;
167             $fname =~ tr/\@\$\%\&/_/s;
168         }
169
170         my $filename = $fname; $filename =~ tr{a-zA-Z0-9\-.}{_}cs;
171         debuglog("fname: \"$fname\" => \"$filename\"");
172         my $pdata = $p->decode();
173         my $pdatalen = length($pdata);
174         my $w = 0; my $h = 0;
175
176         if ( substr($pdata,0,3) eq "\x47\x49\x46" ) {
177             ## GIF File
178             $imgfiles{$filename}{ftype} = 1;
179             ($w,$h) = unpack("vv",substr($pdata,6,4));
180             infolog("GIF: [${h}x${w}] $filename ($pdatalen)");
181             $imgfiles{$filename}{width}  = $w;
182             $imgfiles{$filename}{height} = $h;
183         } elsif ( substr($pdata,0,2) eq "\xff\xd8" ) {
184             ## JPEG File
185             my @Markers = (0xC0,0xC1,0xC2,0xC3,0xC5,0xC6,0xC7,0xC9,0xCA,0xCB,0xCD,0xCE,0xCF);
186             my $pos = 2;
187             while ($pos < $pdatalen) {
188                 my ($b,$m) = unpack("CC",substr($pdata,$pos,2)); $pos += 2;
189                 if ($b != 0xff) {
190                    infolog("Invalid JPEG image");
191                    $pos = $pdatalen + 1;
192                    last;
193                 }
194                 my $skip = 0;
195                 foreach my $mm (@Markers) {
196                     if ($mm == $m) {
197                         $skip++; last;
198                     }
199                 }
200                 last if ($skip);
201                 $pos += unpack("n",substr($pdata,$pos,2));
202             }
203             if ($pos > $pdatalen) {
204                 errorlog("Cannot find image dimensions");
205             } else {
206                 ($h,$w) = unpack("nn",substr($pdata,$pos+3,4));
207                 infolog("JPEG: [${h}x${w}] $filename ($pdatalen)");
208                 $imgfiles{$filename}{ftype} = 2;
209                 $imgfiles{$filename}{height} = $h;
210                 $imgfiles{$filename}{width}  = $w;
211             }
212         } elsif ( substr($pdata,0,4) eq "\x89\x50\x4e\x47" ) {
213             # PNG File
214             ($w,$h) = unpack("NN",substr($pdata,16,8));
215             $imgfiles{$filename}{ftype}  = 3;
216             $imgfiles{$filename}{width}  = $w;
217             $imgfiles{$filename}{height} = $h;
218             infolog("PNG: [${h}x${w}] $filename ($pdatalen)");
219         } elsif ( substr($pdata,0,2) eq "BM" ) {
220             ## BMP File
221             ($w,$h) = unpack("VV",substr($pdata,18,8));
222             $imgfiles{$filename}{ftype}  = 4;
223             $imgfiles{$filename}{width}  = $w;
224             $imgfiles{$filename}{height} = $h;
225             infolog("BMP: [${h}x${w}] $filename ($pdatalen)");
226         } elsif (
227             ## TIFF File
228             (substr($pdata,0,4) eq "\x4d\x4d\x00\x2a") or
229             (substr($pdata,0,4) eq "\x49\x49\x2a\x00")
230                 ) {
231             my $worder = (substr($pdata,0,2) eq "\x4d\x4d") ? 0 : 1;
232             my $offset = unpack($worder?"V":"N",substr($pdata,4,4));
233             my $number = unpack($worder?"v":"n",substr($pdata,$offset,2)) - 1;
234             foreach my $n (0 .. $number) {
235                 my $add = 2 + ($n * 12);
236                 my ($id,$tag,$cnt,$val)  = unpack($worder?"vvVV":"nnNN",substr($pdata,$offset+$add,12));
237                 $h = $val if ($id == 256);
238                 $w = $val if ($id == 257);
239                 last if ($h != 0 and $w != 0);
240             }
241             infolog("TIFF: [${h}x${w}] $filename ($pdatalen) ($worder)");
242             infolog("Cannot determine size of TIFF image, setting to '1x1'") if ($h == 0 and $w == 0);
243             $imgfiles{$filename}{ftype}  = 5;
244             $imgfiles{$filename}{width}  = $w ? $w : 1;
245             $imgfiles{$filename}{height} = $h ? $h : 1;
246         } elsif (substr($pdata,0,5) eq "\x25\x50\x44\x46\x2d") {
247             my $version = substr($pdata,5,3);
248             infolog("PDF: [version $version] $filename ($pdatalen)");
249             $imgfiles{$filename}{ftype} = 6;
250             $imgfiles{$filename}{version} = $version;
251             $imgfiles{$filename}{width}  = 0;
252             $imgfiles{$filename}{height} = 0;
253         }
254
255         #Skip unless we found the right header
256         unless (defined $imgfiles{$filename}{ftype}) {
257             infolog("Skipping file with content-type=\"$ctype\" name=\"$fname\"");
258             delete $imgfiles{$filename};
259             next;
260         }
261         if ($imgfiles{$filename}{ftype} == 6) {
262                 unless ($conf->{focr_scan_pdfs}) {
263                         infolog("Skipping PDF file: PDF Scanning was disabled in config");
264                         next;
265                 }
266         } else {
267                 #Skip images that cannot contain text
268                 if ($imgfiles{$filename}{height} < $conf->{focr_min_height}) {
269                     infolog("Skipping image: height < $conf->{focr_min_height}");
270                     delete $imgfiles{$filename};
271                     next;
272                 }
273
274                 #Skip images that cannot contain text
275                 if ($imgfiles{$filename}{width} < $conf->{focr_min_width}) {
276                     infolog("Skipping image: width < $conf->{focr_min_width}");
277                     delete $imgfiles{$filename};
278                     next;
279                 }
280
281                 #Skip too big images, screenshots etc
282                 if ($imgfiles{$filename}{height} > $conf->{focr_max_height}) {
283                     infolog("Skipping image: height > $conf->{focr_max_height}");
284                     delete $imgfiles{$filename};
285                     next;
286                 }
287
288                 #Skip too big images, screenshots etc
289                 if ($imgfiles{$filename}{width} > $conf->{focr_max_width}) {
290                     infolog("Skipping image: width > $conf->{focr_max_width}");
291                     delete $imgfiles{$filename};
292                     next;
293                 }
294         }
295         #Found Image!! Get a temporary dir to save image
296         $imgdir = Mail::SpamAssassin::Util::secure_tmpdir();
297         unless ($imgdir) {
298             errorlog("Scan canceled, cannot create Image TMPDIR.");
299             return 0;
300         }
301         set_tmpdir($imgdir);
302
303         #Generete unique filename to store image
304         my $imgfilename = Mail::SpamAssassin::Util::untaint_file_path(
305             $imgdir . "/" . $filename
306         );
307         my $unique = 0;
308         while (-e $imgfilename) {
309             $imgfilename = Mail::SpamAssassin::Util::untaint_file_path(
310                 $imgdir . "/" . chr(65+$unique) . "." . $filename
311             );
312             $unique++;
313         }
314
315         #Save important constants
316         $imgfiles{$filename}{fname} = $fname;
317         $imgfiles{$filename}{ctype} = $ctype;
318         $imgfiles{$filename}{fsize} = $pdatalen;
319         $imgfiles{$filename}{fpath} = $imgfilename;
320
321         #Save Image to disk.
322         unless (open PICT, ">$imgfilename") {
323             errorlog("Cannot write \"$imgfilename\", skipping...");
324             delete $imgfiles{$filename};
325             removedir($imgdir);
326             next;
327         }
328         binmode PICT;
329         print PICT $pdata;
330         close PICT;
331         debuglog("Saved: $imgfilename");
332
333         #Increment valid image file counter
334         $cnt++;
335
336         #keep raw email for debugging later
337         my $rawfilename = $imgdir . "/raw.eml";
338         if (open RAW, ">$rawfilename") {
339             print RAW $pms->{msg}->get_pristine();
340             close RAW;
341             debuglog("Saved: $rawfilename");
342         }
343
344     }
345
346     if ($cnt == 0) {
347         debuglog("Skipping OCR, no image files found...");
348         return 0;
349     }
350     infolog("Found: $cnt images"); $cnt = 0;
351     if ($conf->{focr_enable_image_hashing} == 3) {
352         $conf->{focr_mysql_ddb} = get_mysql_ddb();
353     }
354
355     # Try to load personal wordlist
356     unless ($conf->{focr_no_homedirs}) {
357         if ($conf->{focr_personal_wordlist} =~ m/^\//) {
358             read_words( $conf->{focr_personal_wordlist} );
359         } else {
360             my $peruserlist = $main->sed_path($conf->{focr_personal_wordlist});
361             if ( -r $peruserlist ) {
362                 read_words( $peruserlist );
363             } else {
364                 # Only complain if the file exists
365                 if ( -e $peruserlist ) {
366                     errorlog("Cannot read personal_wordlist: $peruserlist, skipping...");
367                 }
368             }
369         }
370     }
371     my $haserr;
372     foreach my $filename (keys %imgfiles) {
373         my $pic = $imgfiles{$filename};
374         #infolog("Analyzing file with content-type=\"$$pic{ctype}\"");
375         my @used_scansets = ();
376         my $corrupt = 0;
377         my $suffix = 0;
378         my $generic_ctype = 0;
379         my $digest;
380         my $file  = $$pic{fpath};
381         my $tfile = $file;
382         my $pfile = $file . ".pnm";
383         my $efile = $file . ".err";
384         debuglog("pfile => $pfile");
385         debuglog("efile => $efile");
386
387         #Open ERRORLOG
388         $haserr = $Mail::SpamAssassin::Logger::LOG_SA{level} == 3;
389
390         if ($haserr) {
391             $haserr = open RAWERR, ">$imgdir/raw.err";
392             debuglog("Errors to: $imgdir/raw.err") if ($haserr>0);
393         }
394
395         my $mimetype = $$pic{ctype};
396         if($mimetype =~ m'application/octet-stream'i) {
397             $generic_ctype = 1;
398         }
399
400         if($$pic{fname} =~ /\.([\w-]+)$/) {
401             $suffix = $1;
402         }
403         if ($suffix) {
404             debuglog("File has Content-Type \"$mimetype\" and File Extension \"$suffix\"");
405         } else {
406             debuglog("File has Content-Type \"$mimetype\" and no File Extension");
407         }
408
409         if ( $$pic{ftype} == 1 ) {
410             infolog("Found GIF header name=\"$$pic{fname}\"");
411             if ($conf->{focr_skip_gif}) {
412                 infolog("Skipping image check");
413                 next;
414             }
415             if (defined($conf->{focr_max_size_gif}) and ($$pic{fsize} > $conf->{focr_max_size_gif})) {
416                 infolog("GIF file size ($$pic{fsize}) exceeds maximum file size for this format, skipping...");
417                 next;
418             }
419
420             if ( ($$pic{ctype} !~ /gif/i) and not $generic_ctype) {
421                 wrong_ctype( "GIF", $$pic{ctype} );
422                 $internal_score += $conf->{'focr_wrongctype_score'};
423             }
424
425             if ( $suffix and $suffix !~ /gif/i) {
426                 wrong_extension( "GIF", $suffix);
427                 $internal_score += $conf->{'focr_wrongext_score'};
428             }
429
430             my $interlaced_gif = 0;
431             my $image_count = 0;
432
433             foreach my $a (qw/gifsicle giftext giffix gifinter giftopnm/) {
434                 unless (defined $conf->{"focr_bin_$a"}) {
435                     errorlog("Cannot exec $a, skipping image");
436                     next;
437                 }
438             }
439
440             my @stderr_data;
441             my ($retcode, @stdout_data) = save_execute(
442                 "$conf->{focr_bin_giftext} $file",
443                 undef,
444                 ">$imgdir/giftext.info",
445                 ">>$imgdir/giftext.err", 1);
446
447             if ($retcode<0) { # only care if we timed out
448                 chomp $retcode;
449                 errorlog("$conf->{focr_bin_giftext} Timed out [$retcode], skipping...");
450                 ++$imgerr if $conf->{focr_keep_bad_images}>0; next;
451             }
452
453             foreach (@stdout_data) {
454                 unless ($interlaced_gif) {
455                     if ( $_ =~ /Image is Interlaced/i ) {
456                         $interlaced_gif = 1;
457                     }
458                 }
459                 if ( $_ =~ /^Image #/ ) {
460                     $image_count++;
461                 }
462             }
463             if ($interlaced_gif or ($image_count > 1)) {
464                 infolog("Image is interlaced or animated...");
465             }
466             else {
467                 infolog("Image is single non-interlaced...");
468                 $tfile .= "-fixed.gif";
469                 printf RAWERR "## $conf->{focr_bin_giffix} $file >$tfile 2>>$efile\n" if ($haserr>0);
470
471                 $retcode = save_execute("$conf->{focr_bin_giffix} $file", undef, ">$tfile", ">>$efile");
472
473                 if ($retcode<0) { # only care if we timed out
474                     chomp $retcode;
475                     errorlog("$conf->{focr_bin_giffix}: Timed out [$retcode], skipping...");
476                     printf RAWERR "?? Timed out > $retcode\n" if ($haserr>0);
477                     ++$imgerr if $conf->{focr_keep_bad_images}>0; next;
478                 }
479
480                 if (open ERR, $efile) {
481                     @stderr_data = <ERR>;
482                     close ERR;
483                     foreach (@stderr_data) {
484                         if ( $_ =~ /GIF-LIB error/i ) {
485                             $corrupt = $_;
486                             last;
487                         }
488                     }
489                 }
490             }
491             my $fixedsize = (stat($tfile))[7];
492             if (defined($conf->{focr_max_size_gif}) and ($fixedsize > $conf->{focr_max_size_gif})) {
493                 infolog("Fixed GIF file size ($fixedsize) exceeds maximum file size for this format, skipping...");
494                 next;
495             }
496
497             if ($corrupt) {
498                 if ($interlaced_gif or ($image_count > 1)) {
499                     infolog("Skipping corrupted interlaced image...");
500                     corrupt_img($conf->{focr_corrupt_unfixable_score}, $corrupt);
501                     $internal_score += $conf->{focr_corrupt_unfixable_score};
502                     next;
503                 }
504                 if (-z $tfile) {
505                     infolog("Uncorrectable corruption detected, skipping non-interlaced image...");
506                     corrupt_img($conf->{focr_corrupt_unfixable_score}, $corrupt);
507                     $internal_score += $conf->{focr_corrupt_unfixable_score};
508                     next;
509                 }
510                 infolog("Image is corrupt, but seems fixable, continuing...");
511                 corrupt_img($conf->{focr_corrupt_score}, $corrupt);
512                 $internal_score += $conf->{focr_corrupt_score};
513             }
514
515             if ($image_count > 1) {
516                 infolog("File contains <$image_count> images, deanimating...");
517                 $tfile = deanimate($tfile);
518             }
519
520             if ($interlaced_gif) {
521                 infolog("Processing interlaced_gif $tfile...");
522                 my $cfile = $tfile;
523                 if ($tfile =~ m/\.gif$/i) {
524                     $tfile =~ s/\.gif$/-fixed.gif/i;
525                 } else {
526                     $tfile .= ".gif";
527                 }
528                 printf RAWERR qq(## $conf->{focr_bin_gifinter} $cfile >$tfile 2>>$efile\n) if ($haserr>0);
529         
530                 $retcode = save_execute("$conf->{focr_bin_gifinter} $cfile", undef, ">$tfile", ">>$efile");
531
532                 if ($retcode<0) {
533                     chomp $retcode;
534                     printf RAWERR "?? Timed out > $retcode\n" if ($haserr>0);
535                     errorlog("$conf->{focr_bin_gifinter}: Timed out [$retcode], skipping...");
536                     ++$imgerr if $conf->{focr_keep_bad_images}>0; next;
537                 } elsif ($retcode>0) {
538                     chomp $retcode;
539                     printf RAWERR "?? [$retcode] returned from $conf->{focr_bin_gifinter}\n" if ($haserr>0);
540                     errorlog("$conf->{focr_bin_gifinter}: Returned [$retcode], skipping...");
541                     ++$imgerr if $conf->{focr_keep_bad_images}>0; next;
542                 }
543             }
544
545             printf RAWERR qq(## $conf->{focr_bin_giftopnm} $tfile >$pfile 2>>$efile\n) if ($haserr>0);
546
547             $retcode = save_execute("$conf->{focr_bin_giftopnm} $tfile", undef, ">$pfile", ">>$efile");
548
549             if ($retcode<0) {
550                 chomp $retcode;
551                 printf RAWERR "?? Timed out > $retcode\n" if ($haserr>0);
552                 errorlog("$conf->{focr_bin_giftopnm}: Timed out [$retcode], skipping...");
553                 ++$imgerr if $conf->{focr_keep_bad_images}>0; next;
554             } elsif ($retcode>0) {
555                 chomp $retcode;
556                 printf RAWERR "?? [$retcode] returned from $conf->{focr_bin_giftopnm}\n" if ($haserr>0);
557                 errorlog("$conf->{focr_bin_giftopnm}: Returned [$retcode], skipping...");
558                 ++$imgerr if $conf->{focr_keep_bad_images}>0; next;
559             }
560         }
561         elsif ( $$pic{ftype} == 2 ) {
562             infolog("Found JPEG header name=\"$$pic{fname}\"");
563             if ($conf->{focr_skip_jpeg}) {
564                 infolog("Skipping image check");
565                 next;
566             }
567
568             if (defined($conf->{focr_max_size_jpeg}) and ($$pic{fsize} > $conf->{focr_max_size_jpeg})) {
569                 infolog("JPEG file size ($$pic{fsize}) exceeds maximum file size for this format, skipping...");
570                 next;
571             }
572             if ( ($$pic{ctype} !~ /(jpeg|jpg)/i) and not $generic_ctype) {
573                 wrong_ctype( "JPEG", $$pic{ctype} );
574                 $internal_score += $conf->{'focr_wrongctype_score'};
575             }
576
577             if ( $suffix and $suffix !~ /(jpeg|jpg|jfif)/i) {
578                 wrong_extension( "JPEG", $suffix);
579                 $internal_score += $conf->{'focr_wrongext_score'};
580             }
581
582             foreach my $a (qw/jpegtopnm/) {
583                 unless (defined $conf->{"focr_bin_$a"}) {
584                     errorlog("Cannot exec $a, skipping image");
585                     next;
586                 }
587             }
588             printf RAWERR qq(## $conf->{focr_bin_jpegtopnm} $file >$pfile 2>>$efile\n) if ($haserr>0);
589             my $retcode = save_execute("$conf->{focr_bin_jpegtopnm} $file", undef, ">$pfile", ">>$efile");
590
591             if ($retcode<0) {
592                 chomp $retcode;
593                 printf RAWERR "?? Timed out > $retcode\n" if ($haserr>0);
594                 errorlog("$conf->{focr_bin_jpegtopnm}: Timed out [$retcode], skipping...");
595                 ++$imgerr if $conf->{focr_keep_bad_images}>0; next;
596             } elsif ($retcode>0) {
597                 chomp $retcode;
598                 printf RAWERR "?? [$retcode] returned from $conf->{focr_bin_jpegtopnm}\n" if ($haserr>0);
599                 errorlog("$conf->{focr_bin_jpegtopnm}: Returned [$retcode], skipping...");
600                 ++$imgerr if $conf->{focr_keep_bad_images}>0; next;
601             }
602         }
603         elsif ( $$pic{ftype} == 3 ) {
604             infolog("Found PNG header name=\"$$pic{fname}\"");
605             if ($conf->{focr_skip_png}) {
606                 infolog("Skipping image check");
607                 next;
608             }
609             if (defined($conf->{focr_max_size_png}) and ($$pic{fsize} > $conf->{focr_max_size_png})) {
610                 infolog("PNG file size ($$pic{fsize}) exceeds maximum file size for this format, skipping...");
611                 next;
612             }
613             if ( ($$pic{ctype} !~ /png/i) and not $generic_ctype) {
614                 wrong_ctype( "PNG", $$pic{ctype} );
615                 $internal_score += $conf->{'focr_wrongctype_score'};
616             }
617             if ( $suffix and $suffix !~ /(png)/i) {
618                 wrong_extension( "PNG", $suffix);
619                 $internal_score += $conf->{'focr_wrongext_score'};
620             }
621             foreach my $a (qw/pngtopnm/) {
622                 unless (defined $conf->{"focr_bin_$a"}) {
623                     errorlog("Cannot exec $a, skipping image");
624                     next;
625                 }
626             }
627
628             printf RAWERR qq(## $conf->{focr_bin_pngtopnm} $file >$pfile 2>>$efile\n) if ($haserr>0);
629             my $retcode = save_execute("$conf->{focr_bin_pngtopnm} $file", undef, ">$pfile", ">>$efile");
630
631             if ($retcode<0) {
632                 chomp $retcode;
633                 printf RAWERR "?? Timed out > $retcode\n" if ($haserr>0);
634                 errorlog("$conf->{focr_bin_pngtopnm}: Timed out [$retcode], skipping...");
635                 ++$imgerr if $conf->{focr_keep_bad_images}>0; next;
636             } elsif ($retcode>0) {
637                 chomp $retcode;
638                 printf RAWERR "?? [$retcode] returned from $conf->{focr_bin_pngtopnm}\n" if ($haserr>0);
639                 errorlog("$conf->{focr_bin_pngtopnm}: Returned [$retcode], skipping...");
640                 ++$imgerr if $conf->{focr_keep_bad_images}>0; next;
641             }
642         }
643         elsif ( $$pic{ftype} == 4 ) {
644             infolog("Found BMP header name=\"$$pic{fname}\"");
645             if ($conf->{focr_skip_bmp}) {
646                 infolog("Skipping image check");
647                 next;
648             }
649             if (defined($conf->{focr_max_size_bmp}) and ($$pic{fsize} > $conf->{focr_max_size_bmp})) {
650                 infolog("BMP file size ($$pic{fsize}) exceeds maximum file size for this format, skipping...");
651                 next;
652             }
653             if ( ($$pic{ctype} !~ /bmp/i) and not $generic_ctype) {
654                 wrong_ctype( "BMP", $$pic{ctype} );
655                 $internal_score += $conf->{'focr_wrongctype_score'};
656             }
657             if ( $suffix and $suffix !~ /(bmp)/i) {
658                 wrong_extension( "BMP", $suffix);
659                 $internal_score += $conf->{'focr_wrongext_score'};
660             }
661             foreach my $a (qw/bmptopnm/) {
662                 unless (defined $conf->{"focr_bin_$a"}) {
663                     errorlog("Cannot exec $a, skipping image");
664                     next;
665                 }
666             }
667             printf RAWERR qq(## $conf->{focr_bin_bmptopnm} $file >$pfile 2>>$efile\n) if ($haserr>0);
668
669             my $retcode = save_execute("$conf->{focr_bin_bmptopnm} $file", undef, ">$pfile", ">>$efile");
670             if ($retcode<0) {
671                 chomp $retcode;
672                 printf RAWERR "?? Timed out > $retcode\n" if ($haserr>0);
673                 errorlog("$conf->{focr_bin_bmptopnm}: Timed out [$retcode], skipping...");
674                 ++$imgerr if $conf->{focr_keep_bad_images}>0; next;
675             } elsif ($retcode>0) {
676                 chomp $retcode;
677                 printf RAWERR "?? [$retcode] returned from $conf->{focr_bin_bmptopnm}\n" if ($haserr>0);
678                 errorlog("$conf->{focr_bin_bmptopnm}: Returned [$retcode], skipping...");
679                 ++$imgerr if $conf->{focr_keep_bad_images}>0; next;
680             }
681         }
682         elsif ( $$pic{ftype} == 5 ) {
683             infolog("Found TIFF header name=\"$$pic{fname}\"");
684             if ($conf->{focr_skip_tiff}) {
685                 infolog("Skipping image check");
686                 next;
687             }
688             if (defined($conf->{focr_max_size_tiff}) and ($$pic{fsize} > $conf->{focr_max_size_tiff})) {
689                 infolog("TIFF file size ($$pic{fsize}) exceeds maximum file size for this format, skipping...");
690                 next;
691             }
692             if ( ($$pic{ctype} !~ /tif/i) and not $generic_ctype) {
693                 wrong_ctype( "TIFF", $$pic{ctype} );
694                 $internal_score += $conf->{'focr_wrongctype_score'};
695             }
696             if ( $suffix and $suffix !~ /tif/i) {
697                 wrong_extension( "TIFF", $suffix);
698                 $internal_score += $conf->{'focr_wrongext_score'};
699             }
700
701             foreach my $a (qw/tifftopnm/) {
702                 unless (