cron-deja-vu-0.4/0000755000175000017500000000000011754231630012725 5ustar squatsquatcron-deja-vu-0.4/cron-deja-vu0000755000175000017500000001213611756031237015153 0ustar squatsquat#!/usr/bin/python # todo host aliases missing import logging import os import sys import email import copy import string from optparse import OptionParser import email.header import gdbm import mailbox import ConfigParser import re import hashlib filter_name = 'deja-vu' file_prefix = '.'+filter_name wanted_header_fields = set(['Content-Type','MIME-Version','Content-Transfer-Encoding','Subject','From','X-Mailer']) unwanted_header_fields = set(['X-'+filter_name+'-digest','X-'+filter_name,'X-'+filter_name+'-line']) unify_patterns = [] desc="""%prog is a filter for cron generated mail. it expects mails which only differ in small amounts. Matching is done, by matching line by line against already learned mails. If a mail is matched, the header X-deja-vu is set to yes. Otherwise the header is set to, no and if specified the header X-deja-vu-line will contain lines of the mail which did not match. """ def config_read(config): config = ConfigParser.RawConfigParser() try: config.read(options.config_filename) for key in config.options('unify'): unify_patterns.append(config.get('unify',key)) except: logging.info("no config file found, use default config") unify_patterns.append('[0123456789]') def clean_mail_header(msg): for key in unwanted_header_fields: if (key in msg.keys()): logging.debug("remove header field from mail: " + key) del msg[key] return msg def clean_mail(msg): msg = copy.deepcopy(msg) return clean_mail_header(msg) def unify_string(s): r = s for p in unify_patterns: r = re.sub(p,'x',r) return r def unify_header_line(header_line): intab = "\n\r\t " outtab = " " trantab = string.maketrans(intab, outtab) r = re.sub(' ','',string.translate(header_line,trantab)) # print "x "+ r + "y" return r def flatten_mail_header(msg): h = "" for key in wanted_header_fields: if (key in msg.keys()): h = h + unify_header_line(msg[key]) return h def flatten_mail_body(msg,recurse=0): # TODO error handling # failsafe if recurse>10: return "" if msg.is_multipart(): b = "" for mb in msg.get_payload(): b = b + flatten_mail_body(mb, recurse+1) return b else: return msg.get_payload() def flatten_mail(msg): return unify_string(flatten_mail_body(msg) + flatten_mail_header(msg)) parser = OptionParser(usage="%prog ", version="%prog 0.1",description=desc) parser.add_option("-m", "--mail", dest="mail_filename",metavar="", help="test against mail from filename") parser.add_option("-c", "--config", dest="config_filename",metavar="", help="config", default=os.getenv('HOME')+"/"+file_prefix+".cfg") parser.add_option("-a", "--add", dest="add",metavar="", help="build database from maildir") parser.add_option("-s", "--show", dest="show_header_lines",metavar="",type="int", help="show first of not matched lines in header") parser.add_option("-d", "--debug", dest="debug",action="store_true", help="show debug output") (options, args) = parser.parse_args() config="" config_read(config) db_filename = os.getenv('HOME')+'/'+file_prefix+'.dbm' if options.debug: logging.basicConfig(level=logging.DEBUG) else: logging.basicConfig(level=logging.WARNING) if (options.add): db = gdbm.open(db_filename,'n') for message in mailbox.Maildir(options.add,factory=None): message_out = clean_mail_header(message) logging.debug("process mail msgid: " + message_out['Message-Id']) for line in flatten_mail(clean_mail(message_out)).splitlines(): db[hashlib.sha256(line).hexdigest()] = '1' db.close else: if options.mail_filename: f = open(options.mail_filename, "r") try: message = email.message_from_file(f) except: logging.error("file could not be read") sys.exit() f.close() else: try: message = email.message_from_file(sys.stdin) except: logging.error("pipe canceled") sys.exit() message_out = clean_mail_header(message) db = gdbm.open(db_filename,'r') match = True for line in flatten_mail(clean_mail(message_out)).splitlines(): if not hashlib.sha256(line).hexdigest() in db: logging.debug("not match:" + line) if options.show_header_lines: message_out['X-'+filter_name+'-line'] = line options.show_header_lines = options.show_header_lines - 1 match = False else: logging.debug(" match:" + line) if match: message_out['X-'+filter_name] = 'yes' else: message_out['X-'+filter_name] = 'no' logging.debug("------------ OUTPUT MAIL START ------------------") print message_out.as_string(False), logging.debug("------------ OUTPUT MAIL END ------------------") db.close # vim:set et: # vim:set ts=4: # vim:set shiftwidth=4: cron-deja-vu-0.4/deja-vu.cfg0000644000175000017500000000035311644330454014744 0ustar squatsquat# place into $HOME/.deja-vu.cfg # the regex below stating which texts should be unified [unify] regex_num = [0123456789] regex_weekday = (Mon|Tue|Wed|Thu|Fri|Sat|Sun) regex_month = (Jun|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) cron-deja-vu-0.4/cron-deja-vu.10000644000175000017500000000214611614525444015310 0ustar squatsquat.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.40.4. .TH CRON-DEJA-VU "1" "July 2011" "cron-deja-vu 0.1" "User Commands" .SH NAME cron-deja-vu \- is a mail filter designed to handle recurring cron mails .SH SYNOPSIS .B cron-deja-vu \fI\fR .SH DESCRIPTION cron\-deja\-vu is a filter for cron generated mail. it expects mails which only differ in small amounts. Matching is done, by matching line by line against already learned mails. If a mail is matched, the header X\-deja\-vu is set to yes. Otherwise the header is set to, no and if specified the header X\-deja\-vuline will contain lines of the mail which did not match. .SH OPTIONS .TP \fB\-\-version\fR show program's version number and exit .TP \fB\-h\fR, \fB\-\-help\fR show this help message and exit .TP \fB\-m\fR , \fB\-\-mail=\fR test against mail from filename .TP \fB\-a\fR , \fB\-\-add=\fR build database from maildir .TP \fB\-s\fR , \fB\-\-show=\fR show first of not matched lines in header .TP \fB\-d\fR, \fB\-\-debug\fR show debug output