root/trunk/devel/FuzzyOcr/Scoring.pm

Revision 133, 4.1 kB (checked in by decoder, 1 year ago)

Added License tags too all code files

Line 
1 # <@LICENSE>
2 # Licensed to the Apache Software Foundation (ASF) under one or more
3 # contributor license agreements.  See the NOTICE file distributed with
4 # this work for additional information regarding copyright ownership.
5 # The ASF licenses this file to you under the Apache License, Version 2.0
6 # (the "License"); you may not use this file except in compliance with
7 # the License.  You may obtain a copy of the License at:
8 #
9 #     http://www.apache.org/licenses/LICENSE-2.0
10 #
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
16 # </@LICENSE>
17
18 use strict;
19 package FuzzyOcr::Scoring;
20
21 use base 'Exporter';
22 our @EXPORT_OK = qw(wrong_ctype corrupt_img known_img_hash wrong_extension);
23
24 use lib qw(..);
25 use FuzzyOcr::Config qw(get_pms get_config);
26 use FuzzyOcr::Logging qw(infolog);
27
28 # Provide custom scoring functions
29
30 sub wrong_ctype {
31     my $conf = get_config();
32     my $pms = get_pms();
33     my ( $format, $ctype ) = @_;
34     if ($conf->{'focr_wrongctype_score'}) {
35         my $debuginfo = "";
36         if ( $conf->{"focr_verbose"} > 0 ) {
37             $debuginfo =
38               ("Image has format \"$format\" but content-type is \"$ctype\"");
39         }
40         infolog($debuginfo);
41         my $ws = sprintf( "%0.3f", $conf->{'focr_wrongctype_score'} );
42         for my $set ( 0 .. 3 ) {
43             $pms->{conf}->{scoreset}->[$set]->{"FUZZY_OCR_WRONG_CTYPE"} = $ws;
44         }
45         my @dinfo = split('\n', $debuginfo);
46         foreach (@dinfo) {
47             $pms->test_log($_);
48         }
49         $pms->_handle_hit( "FUZZY_OCR_WRONG_CTYPE",
50             $conf->{'focr_wrongctype_score'}, "BODY: ", "BODY",
51             $pms->{conf}->get_description_for_rule("FUZZY_OCR_WRONG_CTYPE"));
52     }
53 }
54
55 sub wrong_extension {
56     my $conf = get_config();
57     my $pms = get_pms();
58     my ( $format, $suffix ) = @_;
59     if ($conf->{'focr_wrongext_score'}) {
60         my $debuginfo = "";
61         if ( $conf->{"focr_verbose"} > 0 ) {
62             $debuginfo =
63               ("Image has format \"$format\" but file extension is \"$suffix\"");
64         }
65         infolog($debuginfo);
66         my $ws = sprintf( "%0.3f", $conf->{'focr_wrongext_score'} );
67         for my $set ( 0 .. 3 ) {
68             $pms->{conf}->{scoreset}->[$set]->{"FUZZY_OCR_WRONG_EXTENSION"} = $ws;
69         }
70         my @dinfo = split('\n', $debuginfo);
71         foreach (@dinfo) {
72             $pms->test_log($_);
73         }
74         $pms->_handle_hit( "FUZZY_OCR_WRONG_EXTENSION",
75             $conf->{'focr_wrongext_score'}, "BODY: ", "BODY",
76             $pms->{conf}->get_description_for_rule("FUZZY_OCR_WRONG_EXTENSION"));
77     }
78 }
79
80 sub corrupt_img {
81     my $conf = get_config();
82     my $pms = get_pms();
83     my ($score, $err) = @_;
84     if ($score>0) {
85         my $debuginfo = "";
86         if ( $conf->{"focr_verbose"} > 0 ) {
87             chomp($err);
88             $debuginfo = ("Corrupt image: $err");
89         }
90         infolog($debuginfo);
91         my $ws = sprintf( "%0.3f", $score );
92         for my $set ( 0 .. 3 ) {
93             $pms->{conf}->{scoreset}->[$set]->{"FUZZY_OCR_CORRUPT_IMG"} = $ws;
94         }
95         my @dinfo = split('\n', $debuginfo);
96         foreach (@dinfo) {
97                 $pms->test_log($_);
98         }
99         $pms->_handle_hit( "FUZZY_OCR_CORRUPT_IMG", $score, "BODY: ", "BODY",
100             $pms->{conf}->get_description_for_rule("FUZZY_OCR_CORRUPT_IMG"));
101     }
102 }
103
104 sub known_img_hash {
105     my $conf = get_config();
106     my $pms = get_pms();
107     my $score = $_[0] || $conf->{'focr_base_score'};
108     my $debuginfo = $_[1] ? "\n$_[1]" : '';
109     my $ws = sprintf( "%0.3f", $score );
110     for my $set ( 0 .. 3 ) {
111         $pms->{conf}->{scoreset}->[$set]->{"FUZZY_OCR_KNOWN_HASH"} = $ws;
112     }
113     my @dinfo = split('\n', $debuginfo);
114     foreach (@dinfo) {
115         $pms->test_log($_);
116     }
117     $pms->_handle_hit( "FUZZY_OCR_KNOWN_HASH", $score, "BODY: ", "BODY",
118         $pms->{conf}->get_description_for_rule("FUZZY_OCR_KNOWN_HASH"));
119 }
120
121 1;
Note: See TracBrowser for help on using the browser.