Changeset 16
- Timestamp:
- 28.10.2006 13:09:24 (2 years ago)
- Files:
-
- trunk/devel/FuzzyOcr.cf (modified) (1 diff)
- trunk/devel/FuzzyOcr.pm (modified) (8 diffs)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
trunk/devel/FuzzyOcr.cf
r12 r16 71 71 # Timeout for the plugin, in seconds. (Maximum runtime of the plugin) (Default value: 10) 72 72 #focr_timeout 15 73 # 74 # Maximum file size for different formats in byte, bigger pictures will not be scanned (Default values: Unlimited) 75 #focr_max_size_gif 80000 76 #focr_max_size_jpeg 100000 77 #focr_max_size_png 80000 78 #focr_max_size_bmp 500000 79 #focr_max_size_tiff 500000 73 80 # 74 81 # Default detection treshold (see manual) (Default value: 0.3) (Can be changed on a per word basis in the wordlist). trunk/devel/FuzzyOcr.pm
r15 r16 15 15 16 16 use String::Approx 'adistr'; 17 #use Image::Magick;18 17 use MLDBM qw(DB_File Storable); 19 18 use FileHandle; … … 38 37 39 38 our @pgm_opts = qw/personal_wordlist global_wordlist logfile 40 threshold counts_required verbose timeout 41 db_hash db_safe db_max_days path_bin scansets keep_bad_images 42 score_ham enable_image_hashing digest_db hashing_learn_scanned/; 39 threshold counts_required verbose timeout max_size_gif max_size_jpeg 40 max_size_tiff max_size_bmp db_hash db_safe db_max_days path_bin 41 scansets keep_bad_images score_ham enable_image_hashing digest_db 42 hashing_learn_scanned/; 43 43 44 44 our @paths = qw(/usr/local/netpbm/bin /usr/local/bin /usr/bin); … … 50 50 $Option{timeout} = 10; 51 51 $Option{logfile} = undef; 52 #We should define these max_size values once we have good default values 53 $Option{max_size_gif} = undef; 54 $Option{max_size_jpeg} = undef; 55 $Option{max_size_png} = undef; 56 $Option{max_size_tiff} = undef; 57 $Option{max_size_bmp} = undef; 52 58 $Option{enable_image_hashing} = 0; 53 59 $Option{hashing_learn_scanned} = 1; … … 839 845 debuglog("Found GIF header name=\"$$pic{fname}\""); 840 846 $ptype = 1; 847 my $s = (stat($file))[7]; 848 if (defined($Option{max_size_gif}) and ($s > $Option{max_size_gif})) { 849 debuglog("GIF file size ($s) exceeds maximum file size for this format, skipping..."); 850 next; 851 } 852 841 853 if ( $$pic{ctype} !~ /gif/i ) { 842 854 wrong_ctype( "GIF", $$pic{ctype} ); … … 982 994 debuglog("Found JPEG header name=\"$$pic{fname}\""); 983 995 $ptype = 2; 996 my $s = (stat($file))[7]; 997 998 if (defined($Option{max_size_jpeg}) and ($s > $Option{max_size_jpeg})) { 999 debuglog("JPEG file size ($s) exceeds maximum file size for this format, skipping..."); 1000 next; 1001 } 984 1002 if ( $$pic{ctype} !~ /(jpeg|jpg)/i ) { 985 1003 wrong_ctype( "JPEG", $$pic{ctype} ); … … 1004 1022 debuglog("Found PNG header name=\"$$pic{fname}\""); 1005 1023 $ptype = 3; 1024 my $s = (stat($file))[7]; 1025 1026 if (defined($Option{max_size_png}) and ($s > $Option{max_size_png})) { 1027 debuglog("PNG file size ($s) exceeds maximum file size for this format, skipping..."); 1028 next; 1029 } 1006 1030 if ( $$pic{ctype} !~ /png/i ) { 1007 1031 wrong_ctype( "PNG", $$pic{ctype} ); … … 1027 1051 debuglog("Found BMP header name=\"$$pic{fname}\""); 1028 1052 $ptype = 4; 1053 my $s = (stat($file))[7]; 1054 if (defined($Option{max_size_bmp}) and ($s > $Option{max_size_bmp})) { 1055 debuglog("BMP file size ($s) exceeds maximum file size for this format, skipping..."); 1056 next; 1057 } 1029 1058 if ( $$pic{ctype} !~ /bmp/i ) { 1030 1059 wrong_ctype( "BMP", $$pic{ctype} ); … … 1053 1082 debuglog("Found TIFF header name=\"$$pic{fname}\""); 1054 1083 $ptype = 5; 1084 my $s = (stat($file))[7]; 1085 if (defined($Option{max_size_tiff}) and ($s > $Option{max_size_tiff})) { 1086 debuglog("TIFF file size ($s) exceeds maximum file size for this format, skipping..."); 1087 next; 1088 } 1055 1089 if ( $$pic{ctype} !~ /tiff/i ) { 1056 1090 wrong_ctype( "TIFF", $$pic{ctype} );
