Changeset 22

Show
Ignore:
Timestamp:
01.11.2006 15:54:06 (2 years ago)
Author:
jorge
Message:

auto adjust word thresholds based on word size, requiring a better match for shorter words for both global and personal word-lists

Files:

Legend:

Unmodified
Added
Removed
Modified
Copied
Moved
  • trunk/devel/FuzzyOcr/Config.pm

    r17 r22  
    33 
    44use base 'Exporter'; 
    5 our @EXPORT_OK = qw(get_pms save_pms get_scansets get_thresholds get_config get_wordlist set_config finish_parsing_end load_global_words load_personal_words debuglog logfile); 
     5our @EXPORT_OK = qw(get_pms 
     6    save_pms  
     7    get_scansets  
     8    get_thresholds  
     9    get_config  
     10    get_wordlist  
     11    set_config  
     12    finish_parsing_end  
     13    load_global_words  
     14    load_personal_words  
     15    debuglog  
     16    logfile); 
    617 
    718use Fcntl ':flock'; 
     
    321332            ($w, $wt) = (lc($1), $2); 
    322333            $wt = $conf->{focr_threshold} unless ($wt =~ m/[\d\.]+/); 
     334        } else { 
     335            $wt *= 0.750 if length($w) == 5; 
     336            $wt *= 0.500 if length($w) == 4; 
     337            $wt *= 0.250 if length($w)  < 4; 
    323338        } 
    324339        $words{$w} = $wt; $cnt++; 
     
    349364            ($w, $wt) = ($1, $2); 
    350365            $wt = $conf->{focr_threshold} unless ($wt =~ m/[\d\.]+/); 
     366        } else { 
     367            $wt *= 0.750 if length($w) == 5; 
     368            $wt *= 0.500 if length($w) == 4; 
     369            $wt *= 0.250 if length($w)  < 4; 
    351370        } 
    352371        $words{$w} = $wt; $cnt++;