Changeset 100

Show
Ignore:
Timestamp:
10.12.2006 01:29:31 (2 years ago)
Author:
decoder
Message:

Do not use, contains a bug!

Adjusted some defaults in config file / Config.pm
Made final changes to FuzzyOcr?.scansets and .preps

Files:

Legend:

Unmodified
Added
Removed
Modified
Copied
Moved
  • trunk/devel/FuzzyOcr.cf

    r93 r100  
    134134# This saves resources, but lowers the scores because not the best,  
    135135# but the first best scanset is taken as result. 
    136 # Default value: 0 
    137 #focr_minimal_scanset 1 
     136# Default value: 1 
     137#focr_minimal_scanset 0 
    138138 
    139139# This option is only used when focr_minimal_scanset is enabled.  
     
    246246# Allowed values: 
    247247#  1 ... use digest_hash only (deprecated) 
    248 #  2 ... use digest_db w/digest_hash import (see requirements
    249 #  3 ... use mysql database (see requirements
     248#  2 ... use digest_db w/digest_hash import (see requirements, recommended
     249#  3 ... use mysql database (see requirements, experimental
    250250#-- 
    251251# The score is saved with the hash in the database, allowing the plugin to 
     
    260260 
    261261# Automatically add hashes of spam images recognized by OCR to the Image  
    262 # Hash database, to disable, set to 0.0  
     262# Hash database, to disable, set to 0 
    263263# Default value: 1 (learn) 
    264264#focr_hashing_learn_scanned 1 
     
    289289 
    290290### 
    291 ### MySQL options 
     291### MySQL options (Type 3 Hashing) 
    292292### 
    293293 
  • trunk/devel/FuzzyOcr.preps

    r70 r100  
    22# Do not modify this on your own unless you have read the manual and know what you're doing 
    33 
     4# Normalizes a PNM 
    45preprocessor normalize { 
    56    command = pnmnorm 
    67} 
    78 
     9# Inverts a PNM 
    810preprocessor invert { 
    911    command = pnminvert 
    1012} 
    1113 
    12 # requires ImageMagic convert 
     14# Converts PPM (Color PNM) to PGM (Greyscale PNM) 
     15preprocessor ppmtopgm { 
     16    command = ppmtopgm 
     17
     18 
     19# Converts PAM to PNM 
     20preprocessor pamtopnm { 
     21    command = pamtopnm 
     22
     23 
     24# Uses thresholding on the PAM file 
     25preprocessor pamthreshold { 
     26    command = pamthreshold 
     27    args = -simple -threshold 0.5 
     28
     29 
     30# converts PNM to TIFF (this is used for tesseract) 
    1331preprocessor maketiff { 
    14     command = convert 
    15     args = $input tiff:$output 
     32    command = pnmtotiff 
     33    args = -color -truecolor 
    1634} 
  • trunk/devel/FuzzyOcr.scansets

    r70 r100  
    22# Do not modify this on your own unless you have read the manual and know what you're doing 
    33 
     4# Note: If the focr_minimal_scanset option is enabled (default), 
     5# not all of these scansets run for every picture. But be aware 
     6# that for ham images, all of them will always run. 
     7 
     8 
     9# Standard Ocrad Scanset 
    410scanset ocrad { 
    511    command = $ocrad 
     
    713} 
    814 
     15# Inverted Ocrad scanset 
    916scanset ocrad-invert { 
    1017    command = $ocrad 
     
    1219} 
    1320 
     21# Inverted Ocrad scanset 
     22scanset ocrad-invert { 
     23    preprocessors = ppmtopgm, pamthreshold, pamtopnm 
     24    command = $ocrad 
     25    args = -s5 -i $input 
     26} 
     27 
     28# Standard Gocr Scanset 
    1429scanset gocr { 
    1530    command = $gocr 
     
    1732} 
    1833 
     34# Tweaked Gocr Scanset 
    1935scanset gocr-180 { 
    2036    command = $gocr 
     
    2238} 
    2339 
    24 scanset gocr-invert { 
    25     preprocessors = normalize, invert, normalize 
    26     command = $gocr 
    27     args = -i $input 
    28 
     40# An example Scanset how to use tesseract (disabled by default) 
     41#scanset tesseract { 
     42#    preprocessors = maketiff 
     43#    command = $tesseract 
     44#    args = $input $output  
     45#    force_output_in = $output.txt 
     46#} 
    2947 
    30 scanset tesseract { 
    31     preprocessors = maketiff 
    32     command = $tesseract 
    33     args = $input $output batch  
    34     force_output_in = $output.txt 
    35 
     48# Another example, this basically does the same as the inverted Ocrad Scanset 
     49# Only listed here to serve as example, ocrad-invert does this already 
     50
     51#scanset gocr-invert { 
     52#    preprocessors = normalize, invert, normalize 
     53#    command = $gocr 
     54#    args = -i $input 
     55#} 
  • trunk/devel/FuzzyOcr/Config.pm

    r99 r100  
    432432    push (@cmds, { 
    433433        setting => 'focr_minimal_scanset', 
    434         default => 0
     434        default => 1
    435435        type => $Mail::SpamAssassin::Conf::CONF_TYPE_BOOL 
    436436    });