| 1 | # <@LICENSE> |
|---|
| 2 | # Licensed to the Apache Software Foundation (ASF) under one or more |
|---|
| 3 | # contributor license agreements. See the NOTICE file distributed with |
|---|
| 4 | # this work for additional information regarding copyright ownership. |
|---|
| 5 | # The ASF licenses this file to you under the Apache License, Version 2.0 |
|---|
| 6 | # (the "License"); you may not use this file except in compliance with |
|---|
| 7 | # the License. You may obtain a copy of the License at: |
|---|
| 8 | # |
|---|
| 9 | # http://www.apache.org/licenses/LICENSE-2.0 |
|---|
| 10 | # |
|---|
| 11 | # Unless required by applicable law or agreed to in writing, software |
|---|
| 12 | # distributed under the License is distributed on an "AS IS" BASIS, |
|---|
| 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|---|
| 14 | # See the License for the specific language governing permissions and |
|---|
| 15 | # limitations under the License. |
|---|
| 16 | # </@LICENSE> |
|---|
| 17 | |
|---|
| 18 | use strict; |
|---|
| 19 | package FuzzyOcr::Scoring; |
|---|
| 20 | |
|---|
| 21 | use base 'Exporter'; |
|---|
| 22 | our @EXPORT_OK = qw(wrong_ctype corrupt_img known_img_hash wrong_extension); |
|---|
| 23 | |
|---|
| 24 | use lib qw(..); |
|---|
| 25 | use FuzzyOcr::Config qw(get_pms get_config); |
|---|
| 26 | use FuzzyOcr::Logging qw(infolog); |
|---|
| 27 | |
|---|
| 28 | # Provide custom scoring functions |
|---|
| 29 | |
|---|
| 30 | sub wrong_ctype { |
|---|
| 31 | my $conf = get_config(); |
|---|
| 32 | my $pms = get_pms(); |
|---|
| 33 | my ( $format, $ctype ) = @_; |
|---|
| 34 | if ($conf->{'focr_wrongctype_score'}) { |
|---|
| 35 | my $debuginfo = ""; |
|---|
| 36 | if ( $conf->{"focr_verbose"} > 0 ) { |
|---|
| 37 | $debuginfo = |
|---|
| 38 | ("Image has format \"$format\" but content-type is \"$ctype\""); |
|---|
| 39 | } |
|---|
| 40 | infolog($debuginfo); |
|---|
| 41 | my $ws = sprintf( "%0.3f", $conf->{'focr_wrongctype_score'} ); |
|---|
| 42 | for my $set ( 0 .. 3 ) { |
|---|
| 43 | $pms->{conf}->{scoreset}->[$set]->{"FUZZY_OCR_WRONG_CTYPE"} = $ws; |
|---|
| 44 | } |
|---|
| 45 | my @dinfo = split('\n', $debuginfo); |
|---|
| 46 | foreach (@dinfo) { |
|---|
| 47 | $pms->test_log($_); |
|---|
| 48 | } |
|---|
| 49 | $pms->_handle_hit( "FUZZY_OCR_WRONG_CTYPE", |
|---|
| 50 | $conf->{'focr_wrongctype_score'}, "BODY: ", "BODY", |
|---|
| 51 | $pms->{conf}->get_description_for_rule("FUZZY_OCR_WRONG_CTYPE")); |
|---|
| 52 | } |
|---|
| 53 | } |
|---|
| 54 | |
|---|
| 55 | sub wrong_extension { |
|---|
| 56 | my $conf = get_config(); |
|---|
| 57 | my $pms = get_pms(); |
|---|
| 58 | my ( $format, $suffix ) = @_; |
|---|
| 59 | if ($conf->{'focr_wrongext_score'}) { |
|---|
| 60 | my $debuginfo = ""; |
|---|
| 61 | if ( $conf->{"focr_verbose"} > 0 ) { |
|---|
| 62 | $debuginfo = |
|---|
| 63 | ("Image has format \"$format\" but file extension is \"$suffix\""); |
|---|
| 64 | } |
|---|
| 65 | infolog($debuginfo); |
|---|
| 66 | my $ws = sprintf( "%0.3f", $conf->{'focr_wrongext_score'} ); |
|---|
| 67 | for my $set ( 0 .. 3 ) { |
|---|
| 68 | $pms->{conf}->{scoreset}->[$set]->{"FUZZY_OCR_WRONG_EXTENSION"} = $ws; |
|---|
| 69 | } |
|---|
| 70 | my @dinfo = split('\n', $debuginfo); |
|---|
| 71 | foreach (@dinfo) { |
|---|
| 72 | $pms->test_log($_); |
|---|
| 73 | } |
|---|
| 74 | $pms->_handle_hit( "FUZZY_OCR_WRONG_EXTENSION", |
|---|
| 75 | $conf->{'focr_wrongext_score'}, "BODY: ", "BODY", |
|---|
| 76 | $pms->{conf}->get_description_for_rule("FUZZY_OCR_WRONG_EXTENSION")); |
|---|
| 77 | } |
|---|
| 78 | } |
|---|
| 79 | |
|---|
| 80 | sub corrupt_img { |
|---|
| 81 | my $conf = get_config(); |
|---|
| 82 | my $pms = get_pms(); |
|---|
| 83 | my ($score, $err) = @_; |
|---|
| 84 | if ($score>0) { |
|---|
| 85 | my $debuginfo = ""; |
|---|
| 86 | if ( $conf->{"focr_verbose"} > 0 ) { |
|---|
| 87 | chomp($err); |
|---|
| 88 | $debuginfo = ("Corrupt image: $err"); |
|---|
| 89 | } |
|---|
| 90 | infolog($debuginfo); |
|---|
| 91 | my $ws = sprintf( "%0.3f", $score ); |
|---|
| 92 | for my $set ( 0 .. 3 ) { |
|---|
| 93 | $pms->{conf}->{scoreset}->[$set]->{"FUZZY_OCR_CORRUPT_IMG"} = $ws; |
|---|
| 94 | } |
|---|
| 95 | my @dinfo = split('\n', $debuginfo); |
|---|
| 96 | foreach (@dinfo) { |
|---|
| 97 | $pms->test_log($_); |
|---|
| 98 | } |
|---|
| 99 | $pms->_handle_hit( "FUZZY_OCR_CORRUPT_IMG", $score, "BODY: ", "BODY", |
|---|
| 100 | $pms->{conf}->get_description_for_rule("FUZZY_OCR_CORRUPT_IMG")); |
|---|
| 101 | } |
|---|
| 102 | } |
|---|
| 103 | |
|---|
| 104 | sub known_img_hash { |
|---|
| 105 | my $conf = get_config(); |
|---|
| 106 | my $pms = get_pms(); |
|---|
| 107 | my $score = $_[0] || $conf->{'focr_base_score'}; |
|---|
| 108 | my $debuginfo = $_[1] ? "\n$_[1]" : ''; |
|---|
| 109 | my $ws = sprintf( "%0.3f", $score ); |
|---|
| 110 | for my $set ( 0 .. 3 ) { |
|---|
| 111 | $pms->{conf}->{scoreset}->[$set]->{"FUZZY_OCR_KNOWN_HASH"} = $ws; |
|---|
| 112 | } |
|---|
| 113 | my @dinfo = split('\n', $debuginfo); |
|---|
| 114 | foreach (@dinfo) { |
|---|
| 115 | $pms->test_log($_); |
|---|
| 116 | } |
|---|
| 117 | $pms->_handle_hit( "FUZZY_OCR_KNOWN_HASH", $score, "BODY: ", "BODY", |
|---|
| 118 | $pms->{conf}->get_description_for_rule("FUZZY_OCR_KNOWN_HASH")); |
|---|
| 119 | } |
|---|
| 120 | |
|---|
| 121 | 1; |
|---|