Changeset 65

Show
Ignore:
Timestamp:
28.11.2006 21:01:04 (2 years ago)
Author:
decoder
Message:

New function focr_autosort_scanset: Enables the plugin to automatically adapt the order of the scansets to the current mail traffic.
Currently effective scansets are sorted to the beginning of the scanset list, to save scanner passes.

Files:

Legend:

Unmodified
Added
Removed
Modified
Copied
Moved
  • trunk/devel/FuzzyOcr.cf

    r63 r65  
    6868## This saves resources, but lowers the scores because not the best, but the first best scanset is taken as result. 
    6969#focr_minimal_scanset 0.0 
     70# 
     71## This option is only used when focr_minimal_scanset is enabled. Basically, this counts the effectiveness 
     72## of a scanset on the current mail traffic and resorts the scansets with the most effective first. 
     73## This saves unnecessary scanner passes and saves resources. The default is 1. 
     74#focr_autosort_scanset 1 
     75# 
     76## This is a parameter for the focr_autosort_scanset function, and specifies the maximum value of the effectiveness 
     77## counter used in each scanset. If you increase this, it will take longer until the autosort function adapts to new 
     78## types of spam, setting it too low will lower the effectiveness of the function. Default is 10. 
     79#focr_autosort_buffer 10 
    7080# 
    7181######### 
  • trunk/devel/FuzzyOcr.pm

    r64 r65  
    658658            if ($mcnt >= $conf->{focr_counts_required} and $conf->{focr_minimal_scanset}) { 
    659659                debuglog("Scanset \"$scanlabel\" generates enough hits ($mcnt), skipping further scansets..."); 
     660                if ($conf->{focr_autosort_scanset}) { 
     661                    foreach my $s (@$scansets) { 
     662                        if ($s->{label} eq $scanlabel) { 
     663                            if ($s->{hit_counter} < $conf->{focr_autosort_buffer}) { 
     664                                $s->{hit_counter} = $s->{hit_counter} + 1; 
     665                            } 
     666                        } else { 
     667                            if ($s->{hit_counter} > 0) { 
     668                                $s->{hit_counter} = $s->{hit_counter} - 1; 
     669                            } 
     670                        } 
     671                    } 
     672                    debuglog("Resorting scanset list..."); 
     673                    @$scansets = sort { $b->{hit_counter} <=> $a->{hit_counter} } @$scansets; 
     674                } 
    660675                last; 
    661676            } 
    662             #push( @ocr_results, [@result] ); 
    663             #push( @used_scansets, $scanset ); 
    664677        } 
    665678        if ($conf->{focr_enable_image_hashing}) { 
  • trunk/devel/FuzzyOcr/Config.pm

    r63 r65  
    381381            setting => 'focr_minimal_scanset', 
    382382            default => 0, 
     383            type => $Mail::SpamAssassin::Conf::CONF_TYPE_BOOL 
     384        }); 
     385    push (@cmds, { 
     386            setting => 'focr_autosort_scanset', 
     387            default => 1, 
     388            type => $Mail::SpamAssassin::Conf::CONF_TYPE_BOOL 
     389        }); 
     390    push (@cmds, { 
     391            setting => 'focr_autosort_buffer', 
     392            default => 10, 
    383393            type => $Mail::SpamAssassin::Conf::CONF_TYPE_BOOL 
    384394        }); 
  • trunk/devel/FuzzyOcr/Scanset.pm

    r62 r65  
    1111        "preprocessors" => $preprocessors, 
    1212        "ocr_command"   => $ocr_command, 
    13         "force_output_in" => $output_in 
     13        "force_output_in" => $output_in, 
     14        "hit_counter"   => 0 
    1415    }, $class; 
    1516}