| 1 |
|
|---|
| 2 |
|
|---|
| 3 |
|
|---|
| 4 |
|
|---|
| 5 |
|
|---|
| 6 |
|
|---|
| 7 |
|
|---|
| 8 |
|
|---|
| 9 |
|
|---|
| 10 |
|
|---|
| 11 |
|
|---|
| 12 |
|
|---|
| 13 |
|
|---|
| 14 |
|
|---|
| 15 |
|
|---|
| 16 |
|
|---|
| 17 |
|
|---|
| 18 |
use strict; |
|---|
| 19 |
package FuzzyOcr::Scoring; |
|---|
| 20 |
|
|---|
| 21 |
use base 'Exporter'; |
|---|
| 22 |
our @EXPORT_OK = qw(wrong_ctype corrupt_img known_img_hash wrong_extension); |
|---|
| 23 |
|
|---|
| 24 |
use lib qw(..); |
|---|
| 25 |
use FuzzyOcr::Config qw(get_pms get_config); |
|---|
| 26 |
use FuzzyOcr::Logging qw(infolog); |
|---|
| 27 |
|
|---|
| 28 |
|
|---|
| 29 |
|
|---|
| 30 |
sub wrong_ctype { |
|---|
| 31 |
my $conf = get_config(); |
|---|
| 32 |
my $pms = get_pms(); |
|---|
| 33 |
my ( $format, $ctype ) = @_; |
|---|
| 34 |
if ($conf->{'focr_wrongctype_score'}) { |
|---|
| 35 |
my $debuginfo = ""; |
|---|
| 36 |
if ( $conf->{"focr_verbose"} > 0 ) { |
|---|
| 37 |
$debuginfo = |
|---|
| 38 |
("Image has format \"$format\" but content-type is \"$ctype\""); |
|---|
| 39 |
} |
|---|
| 40 |
infolog($debuginfo); |
|---|
| 41 |
my $ws = sprintf( "%0.3f", $conf->{'focr_wrongctype_score'} ); |
|---|
| 42 |
for my $set ( 0 .. 3 ) { |
|---|
| 43 |
$pms->{conf}->{scoreset}->[$set]->{"FUZZY_OCR_WRONG_CTYPE"} = $ws; |
|---|
| 44 |
} |
|---|
| 45 |
my @dinfo = split('\n', $debuginfo); |
|---|
| 46 |
foreach (@dinfo) { |
|---|
| 47 |
$pms->test_log($_); |
|---|
| 48 |
} |
|---|
| 49 |
$pms->_handle_hit( "FUZZY_OCR_WRONG_CTYPE", |
|---|
| 50 |
$conf->{'focr_wrongctype_score'}, "BODY: ", "BODY", |
|---|
| 51 |
$pms->{conf}->get_description_for_rule("FUZZY_OCR_WRONG_CTYPE")); |
|---|
| 52 |
} |
|---|
| 53 |
} |
|---|
| 54 |
|
|---|
| 55 |
sub wrong_extension { |
|---|
| 56 |
my $conf = get_config(); |
|---|
| 57 |
my $pms = get_pms(); |
|---|
| 58 |
my ( $format, $suffix ) = @_; |
|---|
| 59 |
if ($conf->{'focr_wrongext_score'}) { |
|---|
| 60 |
my $debuginfo = ""; |
|---|
| 61 |
if ( $conf->{"focr_verbose"} > 0 ) { |
|---|
| 62 |
$debuginfo = |
|---|
| 63 |
("Image has format \"$format\" but file extension is \"$suffix\""); |
|---|
| 64 |
} |
|---|
| 65 |
infolog($debuginfo); |
|---|
| 66 |
my $ws = sprintf( "%0.3f", $conf->{'focr_wrongext_score'} ); |
|---|
| 67 |
for my $set ( 0 .. 3 ) { |
|---|
| 68 |
$pms->{conf}->{scoreset}->[$set]->{"FUZZY_OCR_WRONG_EXTENSION"} = $ws; |
|---|
| 69 |
} |
|---|
| 70 |
my @dinfo = split('\n', $debuginfo); |
|---|
| 71 |
foreach (@dinfo) { |
|---|
| 72 |
$pms->test_log($_); |
|---|
| 73 |
} |
|---|
| 74 |
$pms->_handle_hit( "FUZZY_OCR_WRONG_EXTENSION", |
|---|
| 75 |
$conf->{'focr_wrongext_score'}, "BODY: ", "BODY", |
|---|
| 76 |
$pms->{conf}->get_description_for_rule("FUZZY_OCR_WRONG_EXTENSION")); |
|---|
| 77 |
} |
|---|
| 78 |
} |
|---|
| 79 |
|
|---|
| 80 |
sub corrupt_img { |
|---|
| 81 |
my $conf = get_config(); |
|---|
| 82 |
my $pms = get_pms(); |
|---|
| 83 |
my ($score, $err) = @_; |
|---|
| 84 |
if ($score>0) { |
|---|
| 85 |
my $debuginfo = ""; |
|---|
| 86 |
if ( $conf->{"focr_verbose"} > 0 ) { |
|---|
| 87 |
chomp($err); |
|---|
| 88 |
$debuginfo = ("Corrupt image: $err"); |
|---|
| 89 |
} |
|---|
| 90 |
infolog($debuginfo); |
|---|
| 91 |
my $ws = sprintf( "%0.3f", $score ); |
|---|
| 92 |
for my $set ( 0 .. 3 ) { |
|---|
| 93 |
$pms->{conf}->{scoreset}->[$set]->{"FUZZY_OCR_CORRUPT_IMG"} = $ws; |
|---|
| 94 |
} |
|---|
| 95 |
my @dinfo = split('\n', $debuginfo); |
|---|
| 96 |
foreach (@dinfo) { |
|---|
| 97 |
$pms->test_log($_); |
|---|
| 98 |
} |
|---|
| 99 |
$pms->_handle_hit( "FUZZY_OCR_CORRUPT_IMG", $score, "BODY: ", "BODY", |
|---|
| 100 |
$pms->{conf}->get_description_for_rule("FUZZY_OCR_CORRUPT_IMG")); |
|---|
| 101 |
} |
|---|
| 102 |
} |
|---|
| 103 |
|
|---|
| 104 |
sub known_img_hash { |
|---|
| 105 |
my $conf = get_config(); |
|---|
| 106 |
my $pms = get_pms(); |
|---|
| 107 |
my $score = $_[0] || $conf->{'focr_base_score'}; |
|---|
| 108 |
my $debuginfo = $_[1] ? "\n$_[1]" : ''; |
|---|
| 109 |
my $ws = sprintf( "%0.3f", $score ); |
|---|
| 110 |
for my $set ( 0 .. 3 ) { |
|---|
| 111 |
$pms->{conf}->{scoreset}->[$set]->{"FUZZY_OCR_KNOWN_HASH"} = $ws; |
|---|
| 112 |
} |
|---|
| 113 |
my @dinfo = split('\n', $debuginfo); |
|---|
| 114 |
foreach (@dinfo) { |
|---|
| 115 |
$pms->test_log($_); |
|---|
| 116 |
} |
|---|
| 117 |
$pms->_handle_hit( "FUZZY_OCR_KNOWN_HASH", $score, "BODY: ", "BODY", |
|---|
| 118 |
$pms->{conf}->get_description_for_rule("FUZZY_OCR_KNOWN_HASH")); |
|---|
| 119 |
} |
|---|
| 120 |
|
|---|
| 121 |
1; |
|---|