Changeset 112

Show
Ignore:
Timestamp:
12.12.2006 16:46:56 (2 years ago)
Author:
jorge
Message:

Added: focr_min_height (default:4) Images with height < focr_min_height will be skipped.
Added: focr_min_width (default:4) Images with width < focr_min_width will be skipped.
Fixed: regex to generate simple filenames, producing "util: cannot untaint path" errors.

Files:

Legend:

Unmodified
Added
Removed
Modified
Copied
Moved
  • trunk/devel/FuzzyOcr.cf

    r108 r112  
    168168#focr_global_timeout 1 
    169169 
     170# Minimum image size to scan. Images with dimensions smaller than the 
     171# ones specified here will be skipped: 
     172# Default: Height:4 Width:4 
     173#focr_min_height 4 
     174#focr_min_width 4 
     175 
    170176# Maximum file size for different formats in byte, bigger pictures  
    171177# will not be scanned  
  • trunk/devel/FuzzyOcr.pm

    r111 r112  
    150150        } 
    151151 
    152         my $filename = $fname; $filename =~ tr{a-zA-Z0-9\.}{_}cs; 
     152        my $filename = $fname; $filename =~ tr{a-zA-Z0-9\-.}{_}cs; 
     153        debuglog("fname: \"$fname\" => \"$filename\""); 
    153154        my $pdata = $p->decode(); 
    154155        my $pdatalen = length($pdata); 
     
    235236 
    236237        #Skip images that cannot contain text 
    237         if ($imgfiles{$filename}{height} < 4) { 
    238             infolog("Skipping narrow image"); 
     238        if ($imgfiles{$filename}{height} < $conf->{focr_min_height}) { 
     239            infolog("Skipping image: height < $conf->{focr_min_height}"); 
    239240            delete $imgfiles{$filename}; 
    240241            next; 
     
    242243 
    243244        #Skip images that cannot contain text 
    244         if ($imgfiles{$filename}{width} < 4) { 
    245             infolog("Skipping flat image"); 
     245        if ($imgfiles{$filename}{width} < $conf->{focr_min_width}) { 
     246            infolog("Skipping image: width < $conf->{focr_min_width}"); 
    246247            delete $imgfiles{$filename}; 
    247248            next; 
  • trunk/devel/FuzzyOcr/Config.pm

    r110 r112  
    195195            setting => 'focr_threshold_'.$t, 
    196196            default => 5, 
     197            type => $Mail::SpamAssassin::Conf::CONF_TYPE_NUMERIC 
     198        }); 
     199    } 
     200    foreach my $t (qw/height width/) { 
     201        push (@cmds, { 
     202            setting => 'focr_min_'.$t, 
     203            default => 4, 
    197204            type => $Mail::SpamAssassin::Conf::CONF_TYPE_NUMERIC 
    198205        });