Changeset 16

Show
Ignore:
Timestamp:
28.10.2006 13:09:24 (2 years ago)
Author:
decoder
Message:

Added maximum file sizes for all formats (untested code)
Changed config file accordingly

Files:

Legend:

Unmodified
Added
Removed
Modified
Copied
Moved
  • trunk/devel/FuzzyOcr.cf

    r12 r16  
    7171# Timeout for the plugin, in seconds. (Maximum runtime of the plugin) (Default value: 10) 
    7272#focr_timeout 15 
     73# 
     74# Maximum file size for different formats in byte, bigger pictures will not be scanned (Default values: Unlimited) 
     75#focr_max_size_gif 80000 
     76#focr_max_size_jpeg 100000 
     77#focr_max_size_png 80000 
     78#focr_max_size_bmp 500000 
     79#focr_max_size_tiff 500000 
    7380# 
    7481# Default detection treshold (see manual) (Default value: 0.3) (Can be changed on a per word basis in the wordlist). 
  • trunk/devel/FuzzyOcr.pm

    r15 r16  
    1515 
    1616use String::Approx 'adistr'; 
    17 #use Image::Magick; 
    1817use MLDBM qw(DB_File Storable); 
    1918use FileHandle; 
     
    3837 
    3938our @pgm_opts = qw/personal_wordlist global_wordlist logfile 
    40     threshold counts_required verbose timeout  
    41     db_hash db_safe db_max_days path_bin scansets keep_bad_images 
    42     score_ham enable_image_hashing digest_db hashing_learn_scanned/; 
     39    threshold counts_required verbose timeout max_size_gif max_size_jpeg 
     40    max_size_tiff max_size_bmp db_hash db_safe db_max_days path_bin  
     41    scansets keep_bad_images score_ham enable_image_hashing digest_db 
     42    hashing_learn_scanned/; 
    4343 
    4444our @paths = qw(/usr/local/netpbm/bin /usr/local/bin /usr/bin); 
     
    5050$Option{timeout} = 10; 
    5151$Option{logfile} = undef; 
     52#We should define these max_size values once we have good default values 
     53$Option{max_size_gif} = undef; 
     54$Option{max_size_jpeg} = undef; 
     55$Option{max_size_png} = undef; 
     56$Option{max_size_tiff} = undef; 
     57$Option{max_size_bmp} = undef; 
    5258$Option{enable_image_hashing} = 0; 
    5359$Option{hashing_learn_scanned} = 1; 
     
    839845            debuglog("Found GIF header name=\"$$pic{fname}\""); 
    840846            $ptype = 1; 
     847            my $s = (stat($file))[7]; 
     848            if (defined($Option{max_size_gif}) and ($s > $Option{max_size_gif})) { 
     849                debuglog("GIF file size ($s) exceeds maximum file size for this format, skipping..."); 
     850                next; 
     851            } 
     852 
    841853            if ( $$pic{ctype} !~ /gif/i ) { 
    842854                wrong_ctype( "GIF", $$pic{ctype} ); 
     
    982994            debuglog("Found JPEG header name=\"$$pic{fname}\""); 
    983995            $ptype = 2; 
     996            my $s = (stat($file))[7]; 
     997 
     998            if (defined($Option{max_size_jpeg}) and ($s > $Option{max_size_jpeg})) { 
     999                debuglog("JPEG file size ($s) exceeds maximum file size for this format, skipping..."); 
     1000                next; 
     1001            } 
    9841002            if ( $$pic{ctype} !~ /(jpeg|jpg)/i ) { 
    9851003                wrong_ctype( "JPEG", $$pic{ctype} ); 
     
    10041022            debuglog("Found PNG header name=\"$$pic{fname}\""); 
    10051023            $ptype = 3; 
     1024            my $s = (stat($file))[7]; 
     1025 
     1026            if (defined($Option{max_size_png}) and ($s > $Option{max_size_png})) { 
     1027                debuglog("PNG file size ($s) exceeds maximum file size for this format, skipping..."); 
     1028                next; 
     1029            } 
    10061030            if ( $$pic{ctype} !~ /png/i ) { 
    10071031                wrong_ctype( "PNG", $$pic{ctype} ); 
     
    10271051            debuglog("Found BMP header name=\"$$pic{fname}\""); 
    10281052            $ptype = 4; 
     1053            my $s = (stat($file))[7]; 
     1054            if (defined($Option{max_size_bmp}) and ($s > $Option{max_size_bmp})) { 
     1055                debuglog("BMP file size ($s) exceeds maximum file size for this format, skipping..."); 
     1056                next; 
     1057            } 
    10291058            if ( $$pic{ctype} !~ /bmp/i ) { 
    10301059                wrong_ctype( "BMP", $$pic{ctype} ); 
     
    10531082            debuglog("Found TIFF header name=\"$$pic{fname}\""); 
    10541083            $ptype = 5; 
     1084            my $s = (stat($file))[7]; 
     1085            if (defined($Option{max_size_tiff}) and ($s > $Option{max_size_tiff})) { 
     1086                debuglog("TIFF file size ($s) exceeds maximum file size for this format, skipping..."); 
     1087                next; 
     1088            } 
    10551089            if ( $$pic{ctype} !~ /tiff/i ) { 
    10561090                wrong_ctype( "TIFF", $$pic{ctype} );