| 1 |
loadplugin FuzzyOcr FuzzyOcr.pm |
|---|
| 2 |
body FUZZY_OCR eval:fuzzyocr_check() |
|---|
| 3 |
describe FUZZY_OCR Mail contains an image with common spam text inside |
|---|
| 4 |
body FUZZY_OCR_WRONG_CTYPE eval:dummy_check() |
|---|
| 5 |
describe FUZZY_OCR_WRONG_CTYPE Mail contains an image with wrong content-type set |
|---|
| 6 |
body FUZZY_OCR_CORRUPT_IMG eval:dummy_check() |
|---|
| 7 |
describe FUZZY_OCR_CORRUPT_IMG Mail contains a corrupted image |
|---|
| 8 |
body FUZZY_OCR_KNOWN_HASH eval:dummy_check() |
|---|
| 9 |
describe FUZZY_OCR_KNOWN_HASH Mail contains an image with known hash |
|---|
| 10 |
|
|---|
| 11 |
priority FUZZY_OCR 900 |
|---|
| 12 |
|
|---|
| 13 |
########### Plugin Configuration ############# |
|---|
| 14 |
|
|---|
| 15 |
#### Logging options ##### |
|---|
| 16 |
# Verbosity level (see manual) Attention: Don't set to 0, but to 0.0 for quiet operation. (Default value: 1) |
|---|
| 17 |
#focr_verbose 2 |
|---|
| 18 |
# |
|---|
| 19 |
# Logfile (make sure it is writable by the plugin) (Default value: /etc/mail/spamassassin/FuzzyOcr.log) |
|---|
| 20 |
#focr_logfile /etc/mail/spamassassin/FuzzyOcr.log |
|---|
| 21 |
########################## |
|---|
| 22 |
|
|---|
| 23 |
##### Wordlists ##### |
|---|
| 24 |
# Here we defined the words to scan for (Default value: /etc/mail/spamassassin/FuzzyOcr.words) |
|---|
| 25 |
#focr_global_wordlist /etc/mail/spamassassin/FuzzyOcr.words |
|---|
| 26 |
# |
|---|
| 27 |
# This is the path RELATIVE to the respektive home directory for the personalized list |
|---|
| 28 |
# This list is merged with the global word list on execution (Default value: .spamassassin/fuzzyocr.words) |
|---|
| 29 |
# If focr_personal_wordlist begins with '/', treats option as fixed path and does not search HOME |
|---|
| 30 |
#focr_personal_wordlist .spamassassin/fuzzyocr.words |
|---|
| 31 |
##################### |
|---|
| 32 |
|
|---|
| 33 |
# These parameters can be used to change other detection settings |
|---|
| 34 |
# If you leave these commented out, the defaults will be used. |
|---|
| 35 |
# Do not use " " around any parameters! |
|---|
| 36 |
# |
|---|
| 37 |
##### Location of helper applications (path + binary) (Default values: /usr/bin/<app>) ##### |
|---|
| 38 |
#focr_bin_giffix /usr/bin/giffix |
|---|
| 39 |
#focr_bin_giftext /usr/bin/giftext |
|---|
| 40 |
#focr_bin_gifinter /usr/bin/gifinter |
|---|
| 41 |
#focr_bin_giftopnm /usr/bin/giftopnm |
|---|
| 42 |
#focr_bin_jpegtopnm /usr/bin/jpegtopnm |
|---|
| 43 |
#focr_bin_pngtopnm /usr/bin/pngtopnm |
|---|
| 44 |
#focr_bin_bmptopnm /usr/bin/bmptopnm |
|---|
| 45 |
#focr_bin_ppmhist /usr/bin/ppmhist |
|---|
| 46 |
#focr_bin_gocr /usr/bin/gocr |
|---|
| 47 |
# |
|---|
| 48 |
#focr_path_bin /usr/local/netpbm/bin:/usr/local/bin:/usr/bin |
|---|
| 49 |
# |
|---|
| 50 |
############################################################################################ |
|---|
| 51 |
|
|---|
| 52 |
##### Scansets, comma seperated (Default value: $gocr -i -, $gocr -l 180 -d 2 -i -) ##### |
|---|
| 53 |
# Each scanset consists of one or more commands which make text out of pnm input. |
|---|
| 54 |
# Each scanset is run seperately on the PNM data, results are combined in scoring. |
|---|
| 55 |
focr_scansets $gocr -i $pfile, $gocr -l 180 -d 2 -i $pfile, $gocr -l 140 -d 2 -i $pfile |
|---|
| 56 |
# |
|---|
| 57 |
# To use only one scan with default values, uncomment the next line instead |
|---|
| 58 |
#focr_scansets $gocr -i $pfile |
|---|
| 59 |
# |
|---|
| 60 |
# Some example for more advanced sets |
|---|
| 61 |
# Thisone uses the first the standard scan, then a scanset which first reduces the image to 3 colors and then scans it with custom settings |
|---|
| 62 |
# and then it scans again only with these custom settings |
|---|
| 63 |
# NOTE: This is for advanced users only, if you have questions how to use this, ask on the ML or on IRC |
|---|
| 64 |
#focr_scansets $gocr -i $pfile, pnmnorm $pfile 2>$efile | pnmquant 3 2>>$efile | pnmnorm 2>>$efile | $gocr -l 180 -d 2 -i -, $gocr -l 180 -d 2 -i $pfile |
|---|
| 65 |
######################################################################################### |
|---|
| 66 |
|
|---|
| 67 |
##### Various Score/Scan settings ##### |
|---|
| 68 |
# Timeout for the plugin, in seconds. (Maximum runtime of the plugin) (Default value: 10) |
|---|
| 69 |
#focr_timeout 15 |
|---|
| 70 |
# |
|---|
| 71 |
# Default detection treshold (see manual) (Default value: 0.3) (Can be changed on a per word basis in the wordlist). |
|---|
| 72 |
#focr_threshold 0.3 |
|---|
| 73 |
# |
|---|
| 74 |
# This is the score for a hit after focr_counts_required matches |
|---|
| 75 |
#focr_base_score 5 |
|---|
| 76 |
# |
|---|
| 77 |
# This is the additional score for every additional match after focr_counts_required matches (Default value: 1) |
|---|
| 78 |
#focr_add_score 0.375 |
|---|
| 79 |
# |
|---|
| 80 |
# This is the score to give for a wrong content-type (e.g. JPEG image but content type says GIF) (Default value: 1.5) |
|---|
| 81 |
#focr_wrongctype_score 1.5 |
|---|
| 82 |
# |
|---|
| 83 |
# This is the score to give for a corrupted image (This currently affects only GIF images) (Default value: 2.5) |
|---|
| 84 |
#focr_corrupt_score 2.5 |
|---|
| 85 |
# |
|---|
| 86 |
# This is the score to give for a corrupted unfixable image (This currently affects only GIF images) (Default value: 5) |
|---|
| 87 |
#focr_corrupt_unfixable_score 5 |
|---|
| 88 |
# |
|---|
| 89 |
# This is used to disable the OCR engine if the message has already more points than this value (Default value: 10) |
|---|
| 90 |
#focr_autodisable_score 20 |
|---|
| 91 |
# |
|---|
| 92 |
# Number of minimum matches before the rule scores (Default value: 2) |
|---|
| 93 |
#focr_counts_required 3 |
|---|
| 94 |
# |
|---|
| 95 |
# Specifies, how many frames an animated gif must contain, so the second (less resource consuming) animated gif test is used. (Default value: 5) |
|---|
| 96 |
#focr_gif_max_frames 5 |
|---|
| 97 |
# |
|---|
| 98 |
# For animated-gif images: |
|---|
| 99 |
# Specifies the threshold for the delay of a frame in an animated-gif image (Default value: 100) |
|---|
| 100 |
# + If the delay is greater than threshold, the frame is considered otherwise, the frame is skipped. |
|---|
| 101 |
#focr_anim_delay 100 |
|---|
| 102 |
# |
|---|
| 103 |
# Specifies the number of frames to consider (Default value: 2) |
|---|
| 104 |
# + Keeps at least the largest N frames. |
|---|
| 105 |
#focr_anim_max_frames 2 |
|---|
| 106 |
# |
|---|
| 107 |
####################################### |
|---|
| 108 |
|
|---|
| 109 |
##### Image Hash Database settings (Experimental, disabled by default) ##### |
|---|
| 110 |
# |
|---|
| 111 |
# Set this to 1 to enable the Image Hash database feature (Default value: 0.0) |
|---|
| 112 |
# Value = 1 ... use digest_hash only |
|---|
| 113 |
# Value = 2 ... use digest_db w/digest_hash import |
|---|
| 114 |
#focr_enable_image_hashing 2 |
|---|
| 115 |
# |
|---|
| 116 |
# The score is saved with the hash in the database, so no extra scoring for a db hit is required. |
|---|
| 117 |
# |
|---|
| 118 |
# If the image hash database feature is enabled, specify the file here to use as database |
|---|
| 119 |
# (Default value: /etc/mail/spamassassin/FuzzyOcr.hashdb) |
|---|
| 120 |
#focr_digest_db /etc/mail/spamassassin/FuzzyOcr.hashdb |
|---|
| 121 |
# |
|---|
| 122 |
# If the image hash db feature is enabled, specify the file here to use as database |
|---|
| 123 |
# (Default value: /etc/mail/spamassassin/FuzzyOcr.db) |
|---|
| 124 |
#focr_db_hash /etc/mail/spamassassin/FuzzyOcr.db |
|---|
| 125 |
# |
|---|
| 126 |
# If the image hash db feature is enabled, specify the file here to use as database |
|---|
| 127 |
# (Default value: /etc/mail/spamassassin/FuzzyOcr.safe.db) |
|---|
| 128 |
#focr_db_safe /etc/mail/spamassassin/FuzzyOcr.safe.db |
|---|
| 129 |
# |
|---|
| 130 |
# Expire records from focr_digest_db after (Default: 35) days |
|---|
| 131 |
#focr_db_max_days 15 |
|---|
| 132 |
# |
|---|
| 133 |
# Automatically add hashes of spam images recognized by OCR to the Image Hash database, to disable, set to 0.0 (Default value: 1) |
|---|
| 134 |
#focr_hashing_learn_scanned 1 |
|---|
| 135 |
# |
|---|
| 136 |
# Keep files that generate errors |
|---|
| 137 |
# 0 = always cleanup |
|---|
| 138 |
# 1 = keep only if error |
|---|
| 139 |
# 2 = always keep |
|---|
| 140 |
#focr_keep_bad_images 1 |
|---|
| 141 |
# |
|---|
| 142 |
# Score images who's global word count is below focr_counts_required using focr_add_score * word count as score. |
|---|
| 143 |
#focr_score_ham 1 |
|---|
| 144 |
###################################################################### |
|---|