root/tags/FuzzyOcr-3.5.0-rc1/FuzzyOcr.cf

Revision 105, 10.0 kB (checked in by decoder, 2 years ago)

Added FuzzyOcr? 3.5.0-rc1 tag

Line 
1 # Syntax:
2 # loadplugin <Plugin_Name> <Location>
3 #  <Location> path where Plugin resides.
4 loadplugin FuzzyOcr FuzzyOcr.pm
5
6 body     FUZZY_OCR                   eval:fuzzyocr_check()
7 body     FUZZY_OCR_WRONG_CTYPE       eval:dummy_check()
8 body     FUZZY_OCR_CORRUPT_IMG       eval:dummy_check()
9 body     FUZZY_OCR_WRONG_EXTENSION   eval:dummy_check()
10 body     FUZZY_OCR_KNOWN_HASH        eval:dummy_check()
11
12 describe FUZZY_OCR                   Mail contains an image with common spam text inside
13 describe FUZZY_OCR_WRONG_CTYPE       Mail contains an image with wrong content-type set
14 describe FUZZY_OCR_WRONG_EXTENSION   Mail contains an image with wrong file extension
15 describe FUZZY_OCR_CORRUPT_IMG       Mail contains a corrupted image
16 describe FUZZY_OCR_KNOWN_HASH        Mail contains an image with known hash
17
18 priority FUZZY_OCR 900
19
20 ###
21 ### Plugin Configuration
22 ###
23
24 ###
25 ### Logging options
26 ###
27
28 # Verbosity level (see manual)
29 # Level 0 - Errors only
30 # Level 1 - Errors and Warnings
31 # Level 2 - Errors, Warnings and Info Messages
32 # Level 3 - Full debug output
33 # Default value: 1
34 #focr_verbose 3
35
36 # Send logging output to stderr.
37 # Default value: 1
38 #focr_log_stderr 0
39
40 # Logfile (make sure it is writable by the plugin)
41 # Default value: none
42 #focr_logfile /tmp/FuzzyOcr.log
43
44 ###
45 ### Wordlists
46 ###
47
48 # Here we defined the words to scan for
49 # Default value: /etc/mail/spamassassin/FuzzyOcr.words
50 #focr_global_wordlist /etc/mail/spamassassin/FuzzyOcr.words
51 #
52 # This is the path RELATIVE to the respective home directory
53 # for the personalized list. This list is merged with the global
54 # word list on execution.
55 # Default value: ~/.spamassassin/fuzzyocr.words
56 # If value begins with '/', it is treated as fixed path.
57 #focr_personal_wordlist fuzzyocr.words
58 #
59 ## Optionally, disable this option if you want to scan for numbers
60 ## Setting this to 0 will cause FuzzyOcr not to strip numbers from
61 ## both the wordlist and the OCR results
62 #
63 #focr_strip_numbers 1
64
65
66 ###
67 ### Helper Applications
68 ###
69
70 # These parameters can be used to change other detection settings
71 # If you leave these commented out, the defaults will be used.
72 # Do not use " " around any parameters!
73
74 ###
75 ### Step 1:
76 ### Inform the plugin which helper apps are required.
77 ###
78
79 # The following are already included by default:
80 #
81 #focr_bin_helper gifsicle, giffix, giftext, gifinter, giftopnm
82 #focr_bin_helper jpegtopnm, pngtopnm, bmptopnm, tifftopnm, ppmhist
83 #focr_bin_helper gocr, ocrad
84
85 # Include additional scanner/preprocessor commands here:
86 #
87 focr_bin_helper pnmnorm, pnminvert, pamthreshold, ppmtopgm, pamtopnm
88 focr_bin_helper tesseract
89
90 ###
91 ### Step 2:
92 ### Inform the plugin of the search path to find all helper apps.
93 ### Only the first match will be considered, so the order is important.
94 ###
95
96 # Search path for locating helper applications
97 #focr_path_bin /usr/local/netpbm/bin:/usr/local/bin:/usr/bin
98
99 ###
100 ### Step 3:
101 ### You can optionally define a helper application location, bypassing
102 ### the search path algorithm. Please note that if the helper app is not
103 ### previously defined, it will generate an error:
104
105 #focr_bin_gifsicle /usr/bin/gifsicle
106 #focr_bin_giffix /usr/bin/giffix
107 #focr_bin_giftext /usr/bin/giftext
108 #focr_bin_gifinter /usr/bin/gifinter
109 #focr_bin_giftopnm /usr/bin/giftopnm
110 #focr_bin_jpegtopnm /usr/bin/jpegtopnm
111 #focr_bin_pngtopnm /usr/bin/pngtopnm
112 #focr_bin_bmptopnm /usr/bin/bmptopnm
113 #focr_bin_tifftopnm /usr/bin/tifftopnm
114 #focr_bin_ppmhist /usr/bin/ppmhist
115 #focr_bin_gocr /usr/bin/gocr
116 #focr_bin_ocrad /usr/bin/ocrad
117
118 #focr_bin_pnmnorm /usr/bin/pnmnorm
119 #focr_bin_pnminvert /usr/bin/pnminvert
120 #focr_bin_convert /usr/bin/convert
121
122 ###
123 ### Scansets
124 ###
125
126 # Paths to the files containing Scansets and Preprocessors definitions
127 #
128 #focr_preprocessor_file /etc/mail/spamassassin/FuzzyOcr.preps
129 #focr_scanset_file /etc/mail/spamassassin/FuzzyOcr.scansets
130
131 # Setting this to 1 will cause FuzzyOcr to skip all other scansets,
132 # if a scanset has reached the amount of hits specified in
133 # focr_counts_required. (i.e. if the image is detected as spam).
134 # This saves resources, but lowers the scores because not the best,
135 # but the first best scanset is taken as result.
136 # Default value: 1
137 #focr_minimal_scanset 0
138
139 # This option is only used when focr_minimal_scanset is enabled.
140 # Basically, this counts the effectiveness of a scanset on the current
141 # mail traffic and resorts the scansets with the most effective first.
142 # This saves unnecessary scanner passes and saves resources.
143 # Default value: 1.
144 #focr_autosort_scanset 0
145
146 # This is a parameter for the focr_autosort_scanset function, and specifies
147 # the maximum value of the effectiveness counter used in each scanset. If you
148 # increase this, it will take longer until the autosort function adapts to new
149 # types of spam, setting it too low will lower the effectiveness of the
150 # function.
151 # Default value: 10.
152 #focr_autosort_buffer 10
153
154 ###
155 ### Scan Settings
156 ###
157
158 # Timeout for the plugin, in seconds. (Maximum runtime of the plugin)
159 # Default value: 10
160 #focr_timeout 15
161
162 # Use a global timeout value instead of per helper application.
163 # Default value: 0
164 #focr_global_timeout 1
165
166 # Maximum file size for different formats in byte, bigger pictures
167 # will not be scanned
168 # Default values: Unlimited)
169 #focr_max_size_gif 80000
170 #focr_max_size_jpeg 100000
171 #focr_max_size_png 80000
172 #focr_max_size_bmp 500000
173 #focr_max_size_tiff 500000
174
175 # Skip checking the following image types
176 # Default value: 0 (check image type)
177 #focr_skip_gif 1
178 #focr_skip_jpeg 1
179 #focr_skip_png 1
180 #focr_skip_bmp 1
181 #focr_skip_tiff 1
182
183 # Default detection treshold (see manual)
184 # Default value: 0.25 (Can be changed on a per word basis in the wordlist).
185 #focr_threshold 0.20
186
187 # Number of minimum matches before the rule scores (Default value: 2)
188 #focr_counts_required 3
189
190 # Setting this will cause every word to be matched only once per image (Default value: 0)
191 #focr_unique_matches 1
192
193 # This is the score for a hit after focr_counts_required matches
194 # Default value: 5
195 #focr_base_score 5
196
197 # This is the additional score for every additional match after
198 # focr_counts_required matches
199 # Default value: 1
200 #focr_add_score 0.375
201
202 # This option defines the factor, which is multiplied with the number
203 # of matches, that were made without stripping spaces. FuzzyOcr does two
204 # matching attempts on OCR results, one without space strippings and one with.
205 # To weight the first match type more, this factor is applied.
206 # Default value: 1.5
207 #focr_twopass_scoring_factor 1.5
208
209 # This is the score to give for a wrong content-type.
210 # e.g. JPEG image but content type says GIF
211 # Default value: 1.5
212 #focr_wrongctype_score 1.5
213
214 # This is the score to give for a wrong file extension.
215 # e.g. JPEG image but file extension says GIF
216 # Default value: 1.5
217 #focr_wrongext_score 1.5
218
219 # This is the score to give for a corrupted image.
220 # This currently affects only GIF images
221 # Default value: 2.5
222 #focr_corrupt_score 2.5
223
224 # This is the score to give for a corrupted unfixable image.
225 # This currently affects only GIF images.
226 # Default value: 5
227 #focr_corrupt_unfixable_score 5
228
229 # This is used to disable the OCR engine if the message has
230 # already more points than this value
231 # Default value: 10
232 #focr_autodisable_score 30
233
234 # This is used to disable the OCR engine if the message has
235 # already less points than this value
236 # Default value: -5
237 #focr_autodisable_negative_score -5
238
239
240 ###
241 ### Hashing Options (Optional)
242 ###
243
244 # Select which type of image hashing to use:
245 # Default value: 0 (disabled)
246 # Allowed values:
247 #  1 ... use digest_hash only (deprecated)
248 #  2 ... use digest_db w/digest_hash import (see requirements, recommended)
249 #  3 ... use mysql database (see requirements, experimental)
250 #--
251 # The score is saved with the hash in the database, allowing the plugin to
252 # skip the scans when the image is found in the database, using the score
253 # from the previous scans.
254 #--
255 #focr_enable_image_hashing 3
256
257 # Set this to skip updating the hashing database at startup
258 # Default value: 0 (update at startup)
259 #focr_skip_updates 1
260
261 # Automatically add hashes of spam images recognized by OCR to the Image
262 # Hash database, to disable, set to 0
263 # Default value: 1 (learn)
264 #focr_hashing_learn_scanned 1
265
266 # Score images who's global word count is below focr_counts_required using
267 # the following formulae: (focr_add_score * word count) as score.
268 # Default value: 0 (ignore images)
269 #focr_score_ham 1
270
271 # If the image hash database feature is enabled (Type 1 Hashing),
272 # specify the file to use as database
273 # Default value: /etc/mail/spamassassin/FuzzyOcr.hashdb
274 #focr_digest_db /etc/mail/spamassassin/FuzzyOcr.hashdb
275
276 # If the image hash db feature is enabled (Type 2 Hashing),
277 # specify the file to use as the SPAM database
278 # Default value: /etc/mail/spamassassin/FuzzyOcr.db
279 #focr_db_hash /etc/mail/spamassassin/FuzzyOcr.db
280
281 # If the image hash db feature is enabled (Type 2 Hashing),
282 # specify the file to use as the HAM database
283 # Default value: /etc/mail/spamassassin/FuzzyOcr.safe.db
284 #focr_db_safe /etc/mail/spamassassin/FuzzyOcr.safe.db
285
286 # Auto-prune: Expire records from hasing databases after these many days
287 # Default value: 35
288 #focr_db_max_days 15
289
290 ###
291 ### MySQL options (Type 3 Hashing)
292 ###
293
294 #focr_mysql_db FuzzyOcr
295 #focr_mysql_hash Hash
296 #focr_mysql_safe Safe
297 #focr_mysql_user fuzzyocr
298 #focr_mysql_pass fuzzyocr
299 #focr_mysql_host localhost
300 #focr_mysql_port 3306
301 #focr_mysql_socket /tmp/mysql.sock
302
303 # If set, the database table is updated with different data from one of
304 # the following:
305 #  + filename,
306 #  + image-params,
307 #  + content-type,
308 #  + file-type,
309 #  + score,
310 #  + word-info
311 # Default value: 0
312 #focr_mysql_update_hash 1
313
314 ###
315 ### Miscellaneous Options
316 ###
317
318 # The pluging uses a temporary directory to store intermediate information.
319 # In order to Keep these files for debugging purposes use any of these
320 # values:
321 #  0 = always cleanup (default value)
322 #  1 = keep only if error
323 #  2 = always keep
324 #--
325 # Keeping these intermediate files could fill your HDD _very_ fast!
326 # Make shure you periodically empty your temp dir (usually: /tmp) or
327 # suffer the conscecuences.  You've been warned!!
328 #--
329 #focr_keep_bad_images 1
330
331 #################################################################
332 # DO NOT REMOVE THIS LINE, IT IS REQUIRED UNDER ALL CIRCUMSTANCES
333 focr_end_config
Note: See TracBrowser for help on using the browser.