root/tags/FuzzyOcr-3.4.2/FuzzyOcr.cf

Revision 32, 6.2 kB (checked in by decoder, 2 years ago)

Added animated gif sample to sample files (to test gifsicle)
Added missing 3.4.1 files which were not comitted before
New version 3.4.2:

  • Fixes configuration facility problems that conflict with other Plugins (Thanks to John Rudd)
  • Added new sample file as mentioned above
  • Fixed fuzzy-find.pl
    • now uses FuzzyOcr?.cf for bin paths
    • removed ImageMagick? dependency, replaced with pamfile (netpbm)
    • Added usage explanation which is displayed when called without any args
  • Fixed typos in INSTALL document
Line 
1 loadplugin FuzzyOcr FuzzyOcr.pm
2 body FUZZY_OCR eval:fuzzyocr_check()
3 describe FUZZY_OCR Mail contains an image with common spam text inside
4 body FUZZY_OCR_WRONG_CTYPE eval:dummy_check()
5 describe FUZZY_OCR_WRONG_CTYPE Mail contains an image with wrong content-type set
6 body FUZZY_OCR_CORRUPT_IMG eval:dummy_check()
7 describe FUZZY_OCR_CORRUPT_IMG Mail contains a corrupted image
8 body FUZZY_OCR_KNOWN_HASH eval:dummy_check()
9 describe FUZZY_OCR_KNOWN_HASH Mail contains an image with known hash
10
11 priority FUZZY_OCR             900
12
13 ########### Plugin Configuration #############
14
15 #### Logging options #####
16 # Verbosity level (see manual) Attention: Don't set to 0, but to 0.0 for quiet operation, or comment out the focr_logfile line. (Default value: 1)
17 #focr_verbose 1
18 #
19 # Logfile (make sure it is writable by the plugin) (Default value: NONE)
20 #focr_logfile /etc/mail/spamassassin/FuzzyOcr.log
21 ##########################
22
23 ##### Wordlists #####
24 # Here we defined the words to scan for (Default value: /etc/mail/spamassassin/FuzzyOcr.words)
25 #focr_global_wordlist /etc/mail/spamassassin/FuzzyOcr.words
26 #
27 # This is the path RELATIVE to the respektive home directory for the personalized list
28 # This list is merged with the global word list on execution (Default value: .spamassassin/fuzzyocr.words)
29 # If focr_personal_wordlist begins with '/', treats option as fixed path and does not search HOME
30 #focr_personal_wordlist .spamassassin/fuzzyocr.words
31 #####################
32
33 # These parameters can be used to change other detection settings
34 # If you leave these commented out, the defaults will be used.
35 # Do not use " " around any parameters!
36 #
37 ##### Location of helper applications (path + binary) (Default values: /usr/bin/<app>) #####
38 #focr_bin_gifsicle /usr/bin/gifsicle
39 #focr_bin_giffix /usr/bin/giffix
40 #focr_bin_giftext /usr/bin/giftext
41 #focr_bin_gifinter /usr/bin/gifinter
42 #focr_bin_giftopnm /usr/bin/giftopnm
43 #focr_bin_jpegtopnm /usr/bin/jpegtopnm
44 #focr_bin_pngtopnm /usr/bin/pngtopnm
45 #focr_bin_bmptopnm /usr/bin/bmptopnm
46 #focr_bin_tifftopnm /usr/bin/tifftopnm
47 #focr_bin_ppmhist /usr/bin/ppmhist
48 #focr_bin_gocr /usr/bin/gocr
49 #focr_bin_ocrad /usr/bin/ocrad
50 #
51 #focr_path_bin /usr/local/netpbm/bin:/usr/local/bin:/usr/bin
52 #
53 ############################################################################################
54
55 ##### Scansets, comma seperated (Default value: $gocr -i -, $gocr -l 180 -d 2 -i -) #####
56 # Each scanset consists of one or more commands which make text out of pnm input.
57 # Each scanset is run seperately on the PNM data, results are combined in scoring.
58 #focr_scansets $gocr -i $pfile, $gocr -l 180 -d 2 -i $pfile
59 #
60 # An example that involves ocrad as well
61 #focr_scansets $gocr -i $pfile, $gocr -l 180 -d 2 -i $pfile, $ocrad -s 0.5 -T 0.5 $pfile
62 #
63 # Another one for ocrad only
64 #focr_scansets $ocrad -s 0.5 -T 0.5 $pfile
65 #
66 # To use only one scan with default values, uncomment the next line instead
67 #focr_scansets $gocr -i $pfile
68 #
69 # Some example for more advanced sets
70 # Thisone uses the first the standard scan, then a scanset which first reduces the image to 3 colors and then scans it with custom settings
71 # and then it scans again only with these custom settings
72 # NOTE: This is for advanced users only, if you have questions how to use this, ask on the ML or on IRC, most times you wont need this at all
73 #focr_scansets $gocr -i $pfile, pnmnorm $pfile 2>$efile | pnmquant 3 2>>$efile | pnmnorm 2>>$efile | $gocr -l 180 -d 2 -i -, $gocr -l 180 -d 2 -i $pfile
74 #########################################################################################
75
76 ##### Various Score/Scan settings #####
77 # Timeout for the plugin, in seconds. (Maximum runtime of the plugin) (Default value: 10)
78 #focr_timeout 15
79 #
80 # Default detection treshold (see manual) (Default value: 0.3) (Can be changed on a per word basis in the wordlist).
81 #focr_threshold 0.3
82 #
83 # This is the score for a hit after focr_counts_required matches
84 #focr_base_score 5
85 #
86 # This is the additional score for every additional match after focr_counts_required matches (Default value: 1)
87 #focr_add_score 0.375
88 #
89 # This is the score to give for a wrong content-type (e.g. JPEG image but content type says GIF) (Default value: 1.5)
90 #focr_wrongctype_score 1.5
91 #
92 # This is the score to give for a corrupted image (This currently affects only GIF images) (Default value: 2.5)
93 #focr_corrupt_score 2.5
94 #
95 # This is the score to give for a corrupted unfixable image (This currently affects only GIF images) (Default value: 5)
96 #focr_corrupt_unfixable_score 5
97 #
98 # This is used to disable the OCR engine if the message has already more points than this value (Default value: 10)
99 #focr_autodisable_score 10
100 #
101 # Number of minimum matches before the rule scores (Default value: 2)
102 #focr_counts_required 3
103 #
104 #######################################
105
106 ##### Image Hash Database settings (Recommended, but disabled by default) #####
107 #
108 # Set this to 2 to enable the Image Hash database feature (Default value: 0.0)
109 # Value = 1 ... use digest_hash only
110 # Value = 2 ... use digest_db w/digest_hash import (Recommended)
111 #focr_enable_image_hashing 0.0
112 #
113 # The score is saved with the hash in the database, so no extra scoring for a db hit is required.
114 #
115 # If the image hash database feature is enabled, specify the file here to use as database
116 # (Default value: /etc/mail/spamassassin/FuzzyOcr.hashdb)
117 #focr_digest_db /etc/mail/spamassassin/FuzzyOcr.hashdb
118 #
119 # If the image hash db feature is enabled, specify the file here to use as database
120 # (Default value: /etc/mail/spamassassin/FuzzyOcr.db)
121 #focr_db_hash /etc/mail/spamassassin/FuzzyOcr.db
122 #
123 # If the image hash db feature is enabled, specify the file here to use as database
124 # (Default value: /etc/mail/spamassassin/FuzzyOcr.safe.db)
125 #focr_db_safe /etc/mail/spamassassin/FuzzyOcr.safe.db
126 #
127 # Expire records from focr_digest_db after (Default: 35) days
128 #focr_db_max_days 15
129 #
130 # Automatically add hashes of spam images recognized by OCR to the Image Hash database, to disable, set to 0.0 (Default value: 1)
131 #focr_hashing_learn_scanned 1
132 #
133 # Keep files that generate errors (Default: 0.0)
134 #  0 = always cleanup
135 #  1 = keep only if error
136 #  2 = always keep
137 #focr_keep_bad_images 0.0
138 #
139 # Score images who's global word count is below focr_counts_required using focr_add_score * word count as score.
140 #focr_score_ham 0.0
141 ######################################################################
Note: See TracBrowser for help on using the browser.