Index: FuzzyOcr.cf
===================================================================
--- FuzzyOcr.cf	(revision 107)
+++ FuzzyOcr.cf	(working copy)
@@ -33,6 +33,10 @@
 # Default value: 1
 #focr_verbose 3
 
+# Log Message-Id, From, To
+# Default: 1
+#focr_log_pmsinfo 0
+
 # Send logging output to stderr.
 # Default value: 1
 #focr_log_stderr 0
@@ -163,6 +167,12 @@
 # Default value: 0
 #focr_global_timeout 1
 
+# Minimum image size to scan. Images with dimensions smaller than the
+# ones specified here will be skipped:
+# Default: Height:4 Width:4
+#focr_min_height 4
+#focr_min_width 4
+
 # Maximum file size for different formats in byte, bigger pictures 
 # will not be scanned 
 # Default values: Unlimited)
Index: FuzzyOcr.pm
===================================================================
--- FuzzyOcr.pm	(revision 107)
+++ FuzzyOcr.pm	(working copy)
@@ -124,14 +124,17 @@
     my $imgerr   = 0;
     my $main     = $self->{main};
 
-    my $from = $pms->get('From') ? $pms->get('From') : "<no sender>";
-    my $to = $pms->get('To') ? $pms->get('To') : "<no receipients>";
-    my $msgid = $pms->get('Message-Id') ? $pms->get('Message-Id') : "<no messageid>";
+    debuglog("Starting FuzzyOcr...");
+    
+    #Show PMS info if asked to
+    if ($conf->{focr_log_pmsinfo}) {
+        my $msgid = $pms->get('Message-Id') ? $pms->get('Message-Id') : "<no messageid>";
+        my $from = $pms->get('From') ? $pms->get('From') : "<no sender>";
+        my $to = $pms->get('To') ? $pms->get('To') : "<no receipients>";
+        chomp($from, $to, $msgid);
+        infolog("Processing Message with ID \"$msgid\" ($from -> $to)");
+    }
 
-    chomp($from, $to, $msgid);
-
-    debuglog("Starting FuzzyOcr...");
-    infolog("Processing Message with ID \"$msgid\" ($from -> $to)");
     foreach my $p (
         $pms->{msg}->find_parts(qr(^image\b)i),
         $pms->{msg}->find_parts(qr(Application/Octet-Stream)i)
@@ -146,18 +149,17 @@
             $fname =~ tr/\@\$\%\&/_/s;
         }
 
-        my $filename = $fname; $filename =~ tr{a-zA-Z0-9\.}{_}cs;
+        my $filename = $fname; $filename =~ tr{a-zA-Z0-9\-.}{_}cs;
+        debuglog("fname: \"$fname\" => \"$filename\"");
         my $pdata = $p->decode();
         my $pdatalen = length($pdata);
         my $w = 0; my $h = 0;
 
-        my $blah = substr($pdata,0,3);
-
         if ( substr($pdata,0,3) eq "\x47\x49\x46" ) {
             ## GIF File
             $imgfiles{$filename}{ftype} = 1; 
             ($w,$h) = unpack("vv",substr($pdata,6,4));
-            infolog("GIF: [${h}x${w}] $filename");
+            infolog("GIF: [${h}x${w}] $filename ($pdatalen)");
             $imgfiles{$filename}{width}  = $w;
             $imgfiles{$filename}{height} = $h;
         } elsif ( substr($pdata,0,2) eq "\xff\xd8" ) {
@@ -184,7 +186,7 @@
                 errorlog("Cannot find image dimensions");
             } else {
                 ($h,$w) = unpack("nn",substr($pdata,$pos+3,4));
-                infolog("JPEG: [${h}x${w}] $filename");
+                infolog("JPEG: [${h}x${w}] $filename ($pdatalen)");
                 $imgfiles{$filename}{ftype} = 2;
                 $imgfiles{$filename}{height} = $h;
                 $imgfiles{$filename}{width}  = $w;
@@ -195,14 +197,14 @@
             $imgfiles{$filename}{ftype}  = 3;
             $imgfiles{$filename}{width}  = $w;
             $imgfiles{$filename}{height} = $h;
-            infolog("PNG: [${h}x${w}] $filename");
+            infolog("PNG: [${h}x${w}] $filename ($pdatalen)");
         } elsif ( substr($pdata,0,2) eq "BM" ) {
             ## BMP File
-            ($w,$h) = unpack("NN",substr($pdata,18,8));
+            ($w,$h) = unpack("VV",substr($pdata,18,8));
             $imgfiles{$filename}{ftype}  = 4;
             $imgfiles{$filename}{width}  = $w;
             $imgfiles{$filename}{height} = $h;
-            infolog("BMP: [${h}x${w}] $filename");
+            infolog("BMP: [${h}x${w}] $filename ($pdatalen)");
         } elsif (
             ## TIFF File
             (substr($pdata,0,4) eq "\x4d\x4d\x00\x2a") or
@@ -218,7 +220,7 @@
                 $w = $val if ($id == 257);
                 last if ($h != 0 and $w != 0);
             }
-            infolog("TIFF: [${h}x${w}] $filename ($worder)");
+            infolog("TIFF: [${h}x${w}] $filename ($pdatalen) ($worder)");
             infolog("Cannot determine size of TIFF image, setting to '1x1'") if ($h == 0 and $w == 0);
             $imgfiles{$filename}{ftype}  = 5;
             $imgfiles{$filename}{width}  = $w ? $w : 1;
@@ -230,9 +232,22 @@
             infolog("Skipping file with content-type=\"$ctype\" name=\"$fname\"");
             delete $imgfiles{$filename};
             next;
+        }
 
+        #Skip images that cannot contain text
+        if ($imgfiles{$filename}{height} < $conf->{focr_min_height}) {
+            infolog("Skipping image: height < $conf->{focr_min_height}");
+            delete $imgfiles{$filename};
+            next;
         }
 
+        #Skip images that cannot contain text
+        if ($imgfiles{$filename}{width} < $conf->{focr_min_width}) {
+            infolog("Skipping image: width < $conf->{focr_min_width}");
+            delete $imgfiles{$filename};
+            next;
+        }
+
         #Found Image!! Get a temporary dir to save image
         $imgdir = Mail::SpamAssassin::Util::secure_tmpdir();
         unless ($imgdir) {
@@ -243,12 +258,12 @@
 
         #Generete unique filename to store image
         my $imgfilename = Mail::SpamAssassin::Util::untaint_file_path(
-            $imgdir . "/" . $fname
+            $imgdir . "/" . $filename
         );
         my $unique = 0;
         while (-e $imgfilename) {
             $imgfilename = Mail::SpamAssassin::Util::untaint_file_path(
-                $imgdir . "/" . chr(65+$unique) . "." . $fname
+                $imgdir . "/" . chr(65+$unique) . "." . $filename
             );
             $unique++;
         }
@@ -308,7 +323,6 @@
         }
     }
 
-    IMAGE:
     my $haserr;
     foreach my $filename (keys %imgfiles) {
         my $pic = $imgfiles{$filename};
@@ -351,7 +365,7 @@
             infolog("Found GIF header name=\"$$pic{fname}\"");
             if ($conf->{focr_skip_gif}) {
                 infolog("Skipping image check");
-                next IMAGE;
+                next;
             }
             if (defined($conf->{focr_max_size_gif}) and ($$pic{fsize} > $conf->{focr_max_size_gif})) {
                 infolog("GIF file size ($$pic{fsize}) exceeds maximum file size for this format, skipping...");
@@ -374,11 +388,11 @@
             foreach my $a (qw/gifsicle giftext giffix gifinter giftopnm/) {
                 unless (defined $conf->{"focr_bin_$a"}) {
                     errorlog("Cannot exec $a, skipping image");
-                    next IMAGE;
+                    next;
                 }
             }
+
             my @stderr_data;
-
             my ($retcode, @stdout_data) = save_execute(
                 "$conf->{focr_bin_giftext} $file",
                 undef,
@@ -429,9 +443,9 @@
                     }
                 }
             }
-
-            if (defined($conf->{focr_max_size_gif}) and (((stat($tfile))[7]) > $conf->{focr_max_size_gif})) {
-                infolog("Fixed GIF file size ($$pic{fsize}) exceeds maximum file size for this format, skipping...");
+            my $fixedsize = (stat($tfile))[7];
+            if (defined($conf->{focr_max_size_gif}) and ($fixedsize > $conf->{focr_max_size_gif})) {
+                infolog("Fixed GIF file size ($fixedsize) exceeds maximum file size for this format, skipping...");
                 next;
             }
 
@@ -503,7 +517,7 @@
             infolog("Found JPEG header name=\"$$pic{fname}\"");
             if ($conf->{focr_skip_jpeg}) {
                 infolog("Skipping image check");
-                next IMAGE;
+                next;
             }
 
             if (defined($conf->{focr_max_size_jpeg}) and ($$pic{fsize} > $conf->{focr_max_size_jpeg})) {
@@ -523,7 +537,7 @@
             foreach my $a (qw/jpegtopnm/) {
                 unless (defined $conf->{"focr_bin_$a"}) {
                     errorlog("Cannot exec $a, skipping image");
-                    next IMAGE;
+                    next;
                 }
             }
             printf RAWERR qq(## $conf->{focr_bin_jpegtopnm} $file >$pfile 2>>$efile\n) if ($haserr>0);
@@ -545,7 +559,7 @@
             infolog("Found PNG header name=\"$$pic{fname}\"");
             if ($conf->{focr_skip_png}) {
                 infolog("Skipping image check");
-                next IMAGE;
+                next;
             }
             if (defined($conf->{focr_max_size_png}) and ($$pic{fsize} > $conf->{focr__max_size_png})) {
                 infolog("PNG file size ($$pic{fsize}) exceeds maximum file size for this format, skipping...");
@@ -562,7 +576,7 @@
             foreach my $a (qw/pngtopnm/) {
                 unless (defined $conf->{"focr_bin_$a"}) {
                     errorlog("Cannot exec $a, skipping image");
-                    next IMAGE;
+                    next;
                 }
             }
 
@@ -585,7 +599,7 @@
             infolog("Found BMP header name=\"$$pic{fname}\"");
             if ($conf->{focr_skip_bmp}) {
                 infolog("Skipping image check");
-                next IMAGE;
+                next;
             }
             if (defined($conf->{focr_max_size_bmp}) and ($$pic{fsize} > $conf->{focr_max_size_bmp})) {
                 infolog("BMP file size ($$pic{fsize}) exceeds maximum file size for this format, skipping...");
@@ -602,7 +616,7 @@
             foreach my $a (qw/bmptopnm/) {
                 unless (defined $conf->{"focr_bin_$a"}) {
                     errorlog("Cannot exec $a, skipping image");
-                    next IMAGE;
+                    next;
                 }
             }
             printf RAWERR qq(## $conf->{focr_bin_bmptopnm} $file >$pfile 2>>$efile\n) if ($haserr>0);
@@ -624,7 +638,7 @@
             infolog("Found TIFF header name=\"$$pic{fname}\"");
             if ($conf->{focr_skip_tiff}) {
                 infolog("Skipping image check");
-                next IMAGE;
+                next;
             }
             if (defined($conf->{focr_max_size_tiff}) and ($$pic{fsize} > $conf->{focr_max_size_tiff})) {
                 infolog("TIFF file size ($$pic{fsize}) exceeds maximum file size for this format, skipping...");
@@ -642,7 +656,7 @@
             foreach my $a (qw/tifftopnm/) {
                 unless (defined $conf->{"focr_bin_$a"}) {
                     errorlog("Cannot exec $a, skipping image");
-                    next IMAGE;
+                    next;
                 }
             }
             printf RAWERR qq(## $conf->{focr_bin_tifftopnm} $file >$pfile 2>>$efile\n) if ($haserr>0);
@@ -688,12 +702,12 @@
                 ($score,$dinfo) = check_image_hash_db($digest, $whash, $$pic{fname}, $$pic{ctype}, $$pic{ftype});
                 if ($score > 0) {
                     infolog("Image in KNOWN_GOOD. Skipping OCR checks...");
-                    next IMAGE;
+                    next;
                 }
             }
             if ($digest eq '') {
                 infolog("Empty Hash, skipping...");
-                next IMAGE;
+                next;
             }
         } else {
             infolog("Image hashing disabled in configuration, skipping...");
@@ -711,6 +725,11 @@
 
         my @ocr_results = ();
         my $scansets = get_scansets();
+        my $newlist = '';
+        foreach my $s (@$scansets) {
+            $newlist .= ' ' . $s->{label} . '(' . $s->{hit_counter} . ')';
+        }
+        infolog("Scanset Order:$newlist");
         my $mcnt = 0;
         my $modus = 0;
         my $modus_match = 0;
@@ -832,8 +851,7 @@
                             }
                         }
                     }
-                    infolog("Resorting scanset list...");
-                    @$scansets = sort { $b->{hit_counter} <=> $a->{hit_counter} } @$scansets;
+
                 }
                 last;
             }
Index: FuzzyOcr/Config.pm
===================================================================
--- FuzzyOcr/Config.pm	(revision 107)
+++ FuzzyOcr/Config.pm	(working copy)
@@ -130,6 +130,9 @@
 }
 
 sub get_scansets {
+    if ($conf->{focr_autosort_scanset}) {
+        @scansets = sort { $b->{hit_counter} <=> $a->{hit_counter} } @scansets;
+    }
     return \@scansets;
 }
 
@@ -194,6 +197,13 @@
             type => $Mail::SpamAssassin::Conf::CONF_TYPE_NUMERIC
         });
     }
+    foreach my $t (qw/height width/) {
+        push (@cmds, {
+            setting => 'focr_min_'.$t,
+            default => 4,
+            type => $Mail::SpamAssassin::Conf::CONF_TYPE_NUMERIC
+        });
+    }
     push (@cmds, {
         setting => 'focr_threshold',
         default => 0.25,
@@ -245,6 +255,12 @@
     });
 
     push (@cmds, {
+        setting => 'focr_log_pmsinfo',
+        default => 1,
+        type => $Mail::SpamAssassin::Conf::CONF_TYPE_BOOL
+    });
+
+    push (@cmds, {
         setting => 'focr_enable_image_hashing',
         default => 0,
         code => sub {
Index: FuzzyOcr/Hashing.pm
===================================================================
--- FuzzyOcr/Hashing.pm	(revision 107)
+++ FuzzyOcr/Hashing.pm	(working copy)
@@ -94,15 +94,15 @@
             $dinfo = $data[9] || '';
             if ($data[2] eq '') {
                 infolog("Updating $txt info File-Name:'$fname'");
-                $ddb->do(qq(update $db.$dbfile set $dbfile.fname='$fname' where $dbfile.key='$key'));
+                $ddb->do(qq(update $db.$dbfile set $dbfile.fname=? where $dbfile.key='$key'),undef,$fname);
             }
             if ($data[3] eq '') {
                 infolog("Updating $txt info Content-Type:'$ctype'");
-                $ddb->do(qq(update $db.$dbfile set $dbfile.ctype='$ctype' where $dbfile.key='$key'));
+                $ddb->do(qq(update $db.$dbfile set $dbfile.ctype=? where $dbfile.key='$key'),undef,$ctype);
             }
             if ($data[4] != $ftype) {
                 infolog("Updating $txt info File-Type:'$ftype'");
-                $ddb->do(qq(update $db.$dbfile set $dbfile.ftype='$ftype' where $dbfile.key='$key'));
+                $ddb->do(qq(update $db.$dbfile set $dbfile.ftype=? where $dbfile.key='$key'),undef,$ftype);
             }
         }
         unless ($match) {
@@ -134,8 +134,8 @@
             }
             infolog("Matched [$next] time(s). Prev match: ".fmt_time($now - $when));
             $sql = qq(update $db.$dbfile set $dbfile.match='$next',$dbfile.check='$now' where $dbfile.key='$key');
-            debuglog($sql,2);
             $ddb->do($sql);
+            debuglog($sql);
         }
         return ($ret,$dinfo);
     }
@@ -143,7 +143,7 @@
         use MLDBM qw(DB_File Storable);
         use MLDBM::Sync;
         my %DB = (); my $dbm; my $sdbm;
-        $sdbm = tie %DB, 'MLDBM::Sync', $dbfile, O_RDWR or $ret++;
+        $sdbm = tie %DB, 'MLDBM::Sync', $dbfile, O_CREAT|O_RDWR or $ret++;
         if ($ret>0) {
             warnlog("No Image Hash database found at \"$dbfile\", or permissions wrong.");
             return (0,'');
@@ -242,34 +242,42 @@
                 if ($conf->{focr_mysql_update_hash}) {
                     infolog("Hash already in $db.$table updating...");
                     $sql  = "update $db.$table set ";
-                    $sql .= "basic='$img',"   unless ($data[1] eq $img);
-                    $sql .= "fname='$fname'," unless ($data[2] eq $fname); 
-                    $sql .= "ctype='$ctype'," unless ($data[3] eq $ctype); 
-                    $sql .= "ftype='$ftype'," unless ($data[4] == $ftype);
-                    $sql .= "score='$score'," unless ($data[8] == $score);
-                    $sql .= "dinfo='$dinfo'," unless ($data[9] eq $dinfo);
+                    my @params;
+                    unless ($data[1] eq $img) {
+                        $sql .= "basic=?,"; push @params,$img;
+                    }
+                    unless ($data[2] eq $fname) {
+                        $sql .= "fname=?,"; push @params,$fname;
+                    }
+                    unless ($data[3] eq $ctype) {
+                        $sql .= "ctype=?,"; push @params,$ctype;
+                    }
+                    unless ($data[4] == $ftype) {
+                        $sql .= "ftype=?,"; push @params,$ftype;
+                    }
+                    unless ($data[8] == $score) {
+                        $sql .= "score=?,"; push @params,$score;
+                    }
+                    unless ($data[9] == $dinfo) {
+                        $sql .= "dinfo=?,"; push @params,$dinfo;
+                    }
                     $sql  =~ s/,$//;
                     $sql .= " where $table.key='$key'";
+                    $ddb->do($sql,undef,@params);
+                    foreach my $p (@params) { $sql =~ s/\?/$p/; }
                     debuglog($sql);
-                    $ddb->do($sql);
                 } else {
                     infolog("Hash already in $db.$table skipping...");
                 }
             } else {
-                $sql =
-                    "insert into $db.$table values ('". $key
-                     . "','" . $img
-                     . "','" . $fname
-                     . "','" . $ctype
-                     . "','" . $ftype
-                     . "','" . ($table eq $conf->{focr_mysql_hash} ? 0 : 1)
-                     . "','" . time
-                     . "','" . time
-                     . "','" . $score
-                     . "','" . $dinfo
-                     . "')";
+                my @params = (
+                    $key, $img, $fname, $ctype, $ftype,
+                    ($table eq $conf->{focr_mysql_hash} ? 0 : 1),
+                    time, time, $score, $dinfo);
+                $sql = "insert into $db.$table values (?,?,?,?,?,?,?,?,?,?)";
+                $ddb->do($sql,undef,@params);
+                foreach my $p (@params) { $sql =~ s/\?/$p/; }
                 debuglog($sql);
-                $ddb->do($sql);
             }
         }
     }
