#!/usr/bin/env python # encoding=utf8 import sys import re __version__ = '0.1' VERBOSITY = 1 class Event: def __init__(self, line): self.raw = line try: parts = line.split(" ") self.timestamp = parts[0] self.timezone = parts[0][-6:] self.host = parts[1] try: self.pid = parts[2].split("[")[1].split("]")[0] self.program = parts[2].split("[")[0] except IndexError: # If program contains no PID, we need to split by ':' instead. Example: # 2016-04-05T18:27:31.885916+02:00 fer.adora.dk sudo: pam_unix(sudo:session): session opened for user root by root(uid=0) self.pid = None self.program = parts[2].split(":")[0] self.message = " ".join(parts[3:]) except IndexError: print "WARNING: Could not parse: %s" % line sys.exit(2) def __repr__(self): return "%(classname)s(timestamp='%(timestamp)s' timezone='%(timezone)s' host='%(host)s' program='%(program)s' pid='%(pid)s' message='%(message)s')" % { 'classname': self.__class__.__name__, 'timestamp': self.timestamp, 'timezone': self.timezone, 'host': self.host, 'program': self.program, 'pid': self.pid, 'message': self.message } class Pattern: def __init__(self, message=None, timestamp=None, timezone=None, host=None, program=None, pid=None): if message: self.message = re.compile(message) else: self.message = None if timestamp: self.timestamp = re.compile(timestamp) else: self.timestamp = timestamp if timezone: self.timezone = re.compile(timezone) else: self.timezone = timezone if host: self.host = re.compile(host) else: self.host = host if program: self.program = re.compile(program) else: self.program = pid if pid: self.pid = re.compile(pid) else: self.pid = pid self.match_message = (message is None) self.match_timestamp = (timestamp is None) self.match_timezone = (timezone is None) self.match_host = (host is None) self.match_program = (program is None) self.match_pid = (pid is None) self.num_matches = 0 def match(self, event): required = [] if self.timestamp: self.match_timestamp = bool(re.search(self.timestamp, event.timestamp)) if self.timezone: self.match_timezone = bool(re.search(self.timezone, event.timezone)) if self.host: self.match_host = bool(re.search(self.host, event.host)) if self.pid: self.match_pid = bool(re.search(self.pid, event.pid)) if self.program: self.match_program = bool(re.search(self.program, event.program)) # If this criterium is required and it does not match, no need to continue if not self.match_program: return False if self.message: self.match_message = bool(re.search(self.message, event.message)) matches = self.match_program and self.match_pid and self.match_host and self.match_timezone and self.match_message and self.match_timestamp if matches: self.num_matches += 1 return matches def __repr__(self): return "%s(message='%s')" % (self.__class__.__name__, self.message) def usage(): print "Usage: %s " % (sys.argv[0]) sys.exit(1) def filter_line(e, exclude_patterns): match = False for ep in exclude_patterns: if ep.match(e): match = True break if not match: # print repr(e) return e def load_rules(): exclude_patterns = [] exclude_patterns.append(Pattern(program='^(/USR/SBIN/CRON|CRON)$', message='^\(root\) CMD \(test -x /usr/sbin/anacron')) exclude_patterns.append(Pattern(program='^(/USR/SBIN/CRON|CRON)$', message='^\(root\) CMD \(invoke-rc.d atop _cron\)')) exclude_patterns.append(Pattern(program='^crontab$', message='^\(.+\) LIST \(.+')) # We do not care about messages from 'links' exclude_patterns.append(Pattern(program='^links$')) # Ignore common systemd messages exclude_patterns.append(Pattern(program='^systemd', message="^(Stopped|Stopping|Started|Starting|Reached|New|Removed|Startup|Received|Reloading|Reloaded|Created)")) # We do not want to hear about avahi exclude_patterns.append(Pattern(program='^avahi-daemon$')) # normal postfix operations exclude_patterns.append(Pattern(program="^postfix/smtpd$", message="^connect from ")) exclude_patterns.append(Pattern(program="^postfix/smtpd$", message="SASL authentication failed")) exclude_patterns.append(Pattern(program="^postfix/smtp$", message="Temporary System Problem")) exclude_patterns.append(Pattern(program="^postfix/", message="relay=.*status=sent")) exclude_patterns.append(Pattern(program="^postfix/qmgr$")) exclude_patterns.append(Pattern(program="^postfix/cleanup$")) exclude_patterns.append(Pattern(program="^postfix/pickup$")) exclude_patterns.append(Pattern(program="^postfix/anvil$")) exclude_patterns.append(Pattern(program="^postfix/smtpd$", message="^disconnect from ")) exclude_patterns.append(Pattern(program="^postfix/smtpd$", message="^warning: hostname .* Name or service not known")) exclude_patterns.append(Pattern(program="^postfix/smtpd$", message="^warning: hostname .* does not resolve")) exclude_patterns.append(Pattern(program="^postfix/smtpd$", message="^lost connection after .+")) exclude_patterns.append(Pattern(program="^postfix/smtpd$", message="^timeout after .+")) exclude_patterns.append(Pattern(program="^postfix/smtpd$", message="^NOQUEUE: reject:")) exclude_patterns.append(Pattern(program="^postfix/smtpd$", message="^too many errors after DATA from")) exclude_patterns.append(Pattern(program="^postfix/smtp$", message="Host or domain name not found.")) exclude_patterns.append(Pattern(program="^postfix/smtp$", message="^connect to .* Network is unreachable$")) # Ignore MARKS exclude_patterns.append(Pattern(program='^--$', message='^MARK --$')) return exclude_patterns def main(): exclude_patterns = load_rules() num_lines_total=0 num_lines_excluded=0 num_lines_included=0 num_patterns=len(exclude_patterns) # Contains a key for each observed program string, value is number of times this program was observed programs={} with open(infile) as f: for line in f: num_lines_total+=1 event = Event(line.strip()) try: programs[event.program] += 1 except KeyError: programs[event.program] = 1 #print event if filter_line(event, exclude_patterns): print event.raw num_lines_included += 1 else: #print "X> ", event.program,":", event.message num_lines_excluded += 1 pass # Only print stats if any lines were found: if num_lines_included: print """ Statistics: num_lines_total =%12s num_lines_excluded =%12s num_lines_included =%12s num_patterns =%12s """ % (num_lines_total, num_lines_excluded, num_lines_included, num_patterns) if VERBOSITY > 0: print "Programs:" import operator programs_total = 0 for p, num in reversed(sorted(programs.items(), key=operator.itemgetter(1))): programs_total += 1 print "%16s : %16s" %(p, num) print programs_total, "programs in total" if VERBOSITY > 2: print "Patterns: " for ep in exclude_patterns: try: print ep.num_matches, ep.message.pattern except: print ep.num_matches, ep if __name__ == '__main__': try: infile = sys.argv[1] except IndexError: usage() main()