Changeset 80
- Timestamp:
- 02.12.2006 21:39:04 (2 years ago)
- Files:
-
- trunk/devel/FuzzyOcr.cf (modified) (2 diffs)
- trunk/devel/FuzzyOcr.pm (modified) (10 diffs)
- trunk/devel/FuzzyOcr/Config.pm (modified) (1 diff)
- trunk/devel/FuzzyOcr/Scoring.pm (modified) (2 diffs)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
trunk/devel/FuzzyOcr.cf
r79 r80 4 4 loadplugin FuzzyOcr FuzzyOcr.pm 5 5 6 body FUZZY_OCR eval:fuzzyocr_check() 7 body FUZZY_OCR_WRONG_CTYPE eval:dummy_check() 8 body FUZZY_OCR_CORRUPT_IMG eval:dummy_check() 9 body FUZZY_OCR_KNOWN_HASH eval:dummy_check() 10 11 describe FUZZY_OCR Mail contains an image with common spam text inside 12 describe FUZZY_OCR_WRONG_CTYPE Mail contains an image with wrong content-type set 13 describe FUZZY_OCR_CORRUPT_IMG Mail contains a corrupted image 14 describe FUZZY_OCR_KNOWN_HASH Mail contains an image with known hash 6 body FUZZY_OCR eval:fuzzyocr_check() 7 body FUZZY_OCR_WRONG_CTYPE eval:dummy_check() 8 body FUZZY_OCR_CORRUPT_IMG eval:dummy_check() 9 body FUZZY_OCR_WRONG_EXTENSION eval:dummy_check() 10 body FUZZY_OCR_KNOWN_HASH eval:dummy_check() 11 12 describe FUZZY_OCR Mail contains an image with common spam text inside 13 describe FUZZY_OCR_WRONG_CTYPE Mail contains an image with wrong content-type set 14 describe FUZZY_OCR_WRONG_EXTENSION Mail contains an image with wrong file extension 15 describe FUZZY_OCR_CORRUPT_IMG Mail contains a corrupted image 16 describe FUZZY_OCR_KNOWN_HASH Mail contains an image with known hash 15 17 16 18 priority FUZZY_OCR 900 … … 191 193 #focr_wrongctype_score 1.5 192 194 195 # This is the score to give for a wrong file extension. 196 # e.g. JPEG image but file extension says GIF 197 # Default value: 1.5 198 #focr_wrongext_score 1.5 199 193 200 # This is the score to give for a corrupted image. 194 201 # This currently affects only GIF images trunk/devel/FuzzyOcr.pm
r75 r80 39 39 use FuzzyOcr::Hashing qw(check_image_hash_db add_image_hash_db calc_image_hash); 40 40 use FuzzyOcr::Deanimate qw(deanimate); 41 use FuzzyOcr::Scoring qw(wrong_ctype corrupt_img known_img_hash);41 use FuzzyOcr::Scoring qw(wrong_ctype wrong_extension corrupt_img known_img_hash); 42 42 use FuzzyOcr::Misc qw(max removedir save_execute); 43 43 … … 137 137 my $test = 0; 138 138 $test++ if ($ctype =~ /image/i); 139 $test++ if ($fname =~ /(gif|jpg|jpeg|png|bmp|tiff )$/i);139 $test++ if ($fname =~ /(gif|jpg|jpeg|png|bmp|tiff?)$/i); 140 140 141 141 if ($test == 0) { … … 177 177 my $pdatalen = length($pdata); 178 178 my $w = 0; my $h = 0; 179 179 180 if ( substr($pdata,0,3) eq "\x47\x49\x46" ) { 180 181 ## GIF File … … 297 298 my @used_scansets = (); 298 299 my $corrupt = 0; 300 my $suffix = 0; 301 my $generic_ctype = 0; 299 302 my $digest; 300 303 my $tfile = $file; … … 304 307 debuglog("efile => $efile"); 305 308 309 my $mimetype = $$pic{ctype}; 310 if($mimetype =~ m'application/octet-stream'i) { 311 $generic_ctype = 1; 312 } 313 314 if($$pic{fname} =~ /\.([\w-]+)$/) { 315 $suffix = $1; 316 } 317 if ($suffix) { 318 debuglog("File has Content-Type \"$mimetype\" and File Extension \"$suffix\""); 319 } else { 320 debuglog("File has Content-Type \"$mimetype\" and no File Extension"); 321 } 322 306 323 if ( $$pic{ftype} == 1 ) { 307 324 infolog("Found GIF header name=\"$$pic{fname}\""); … … 315 332 } 316 333 317 if ( $$pic{ctype} !~ /gif/i) {334 if ( ($$pic{ctype} !~ /gif/i) and not $generic_ctype) { 318 335 wrong_ctype( "GIF", $$pic{ctype} ); 319 336 } 337 338 if ( $suffix and $suffix !~ /gif/i) { 339 wrong_extension( "GIF", $suffix); 340 } 341 320 342 my $interlaced_gif = 0; 321 343 my $image_count = 0; … … 452 474 next; 453 475 } 454 if ( $$pic{ctype} !~ /(jpeg|jpg)/i) {476 if ( ($$pic{ctype} !~ /(jpeg|jpg)/i) and not $generic_ctype) { 455 477 wrong_ctype( "JPEG", $$pic{ctype} ); 456 478 } 479 480 if ( $suffix and $suffix !~ /(jpeg|jpg|jfif)/i) { 481 wrong_extension( "JPEG", $suffix); 482 } 483 457 484 foreach my $a (qw/jpegtopnm/) { 458 485 unless (defined $conf->{"focr_bin_$a"}) { … … 486 513 next; 487 514 } 488 if ( $$pic{ctype} !~ /png/i) {515 if ( ($$pic{ctype} !~ /png/i) and not $generic_ctype) { 489 516 wrong_ctype( "PNG", $$pic{ctype} ); 517 } 518 if ( $suffix and $suffix !~ /(png)/i) { 519 wrong_extension( "PNG", $suffix); 490 520 } 491 521 foreach my $a (qw/pngtopnm/) { … … 521 551 next; 522 552 } 523 if ( $$pic{ctype} !~ /bmp/i) {553 if ( ($$pic{ctype} !~ /bmp/i) and not $generic_ctype) { 524 554 wrong_ctype( "BMP", $$pic{ctype} ); 555 } 556 if ( $suffix and $suffix !~ /(bmp)/i) { 557 wrong_extension( "BMP", $suffix); 525 558 } 526 559 foreach my $a (qw/bmptopnm/) { … … 555 588 next; 556 589 } 557 if ( $$pic{ctype} !~ /tiff/i) {590 if ( ($$pic{ctype} !~ /tif/i) and not $generic_ctype) { 558 591 wrong_ctype( "TIFF", $$pic{ctype} ); 592 } 593 if ( $suffix and $suffix !~ /tif/i) { 594 wrong_extension( "TIFF", $suffix); 559 595 } 560 596 trunk/devel/FuzzyOcr/Config.pm
r79 r80 329 329 push (@cmds, { 330 330 setting => 'focr_wrongctype_score', 331 default => 1.5, 332 type => $Mail::SpamAssassin::Conf::CONF_TYPE_NUMERIC 333 }); 334 335 push (@cmds, { 336 setting => 'focr_wrongext_score', 331 337 default => 1.5, 332 338 type => $Mail::SpamAssassin::Conf::CONF_TYPE_NUMERIC trunk/devel/FuzzyOcr/Scoring.pm
r70 r80 3 3 4 4 use base 'Exporter'; 5 our @EXPORT_OK = qw(wrong_ctype corrupt_img known_img_hash );5 our @EXPORT_OK = qw(wrong_ctype corrupt_img known_img_hash wrong_extension); 6 6 7 7 use lib qw(..); … … 29 29 $conf->{'focr_wrongctype_score'}, "BODY: ", 30 30 $pms->{conf}->{descriptions}->{FUZZY_OCR_WRONG_CTYPE} . "\n$debuginfo" ); 31 } 32 } 33 34 sub wrong_extension { 35 my $conf = get_config(); 36 my $pms = get_pms(); 37 my ( $format, $suffix ) = @_; 38 if ($conf->{'focr_wrongext_score'}) { 39 my $debuginfo = ""; 40 if ( $conf->{"focr_verbose"} > 0 ) { 41 $debuginfo = 42 ("Image has format \"$format\" but file extension is \"$suffix\""); 43 } 44 infolog($debuginfo); 45 my $ws = sprintf( "%0.3f", $conf->{'focr_wrongext_score'} ); 46 for my $set ( 0 .. 3 ) { 47 $pms->{conf}->{scoreset}->[$set]->{"FUZZY_OCR_WRONG_EXTENSION"} = $ws; 48 } 49 $pms->_handle_hit( "FUZZY_OCR_WRONG_EXTENSION", 50 $conf->{'focr_wrongext_score'}, "BODY: ", 51 $pms->{conf}->{descriptions}->{FUZZY_OCR_WRONG_EXTENSION} . "\n$debuginfo" ); 31 52 } 32 53 }
