Pastebin
Paste #6985: log_analyzer.py
< previous paste - next paste>
Pasted by tdn
#!/usr/bin/env python
# encoding=utf8
import sys
import re
__version__ = '0.1'
VERBOSITY = 1
class Event:
def __init__(self, line):
self.raw = line
try:
parts = line.split(" ")
self.timestamp = parts[0]
self.timezone = parts[0][-6:]
self.host = parts[1]
try:
self.pid = parts[2].split("[")[1].split("]")[0]
self.program = parts[2].split("[")[0]
except IndexError:
# If program contains no PID, we need to split by ':' instead. Example:
# 2016-04-05T18:27:31.885916+02:00 fer.adora.dk sudo: pam_unix(sudo:session): session opened for user root by root(uid=0)
self.pid = None
self.program = parts[2].split(":")[0]
self.message = " ".join(parts[3:])
except IndexError:
print "WARNING: Could not parse: %s" % line
sys.exit(2)
def __repr__(self):
return "%(classname)s(timestamp='%(timestamp)s' timezone='%(timezone)s' host='%(host)s' program='%(program)s' pid='%(pid)s' message='%(message)s')" % {
'classname': self.__class__.__name__,
'timestamp': self.timestamp,
'timezone': self.timezone,
'host': self.host,
'program': self.program,
'pid': self.pid,
'message': self.message
}
class Pattern:
def __init__(self, message=None, timestamp=None, timezone=None, host=None, program=None, pid=None):
if message:
self.message = re.compile(message)
else:
self.message = None
if timestamp:
self.timestamp = re.compile(timestamp)
else:
self.timestamp = timestamp
if timezone:
self.timezone = re.compile(timezone)
else:
self.timezone = timezone
if host:
self.host = re.compile(host)
else:
self.host = host
if program:
self.program = re.compile(program)
else:
self.program = pid
if pid:
self.pid = re.compile(pid)
else:
self.pid = pid
self.match_message = (message is None)
self.match_timestamp = (timestamp is None)
self.match_timezone = (timezone is None)
self.match_host = (host is None)
self.match_program = (program is None)
self.match_pid = (pid is None)
self.num_matches = 0
def match(self, event):
required = []
if self.timestamp:
self.match_timestamp = bool(re.search(self.timestamp, event.timestamp))
if self.timezone:
self.match_timezone = bool(re.search(self.timezone, event.timezone))
if self.host:
self.match_host = bool(re.search(self.host, event.host))
if self.pid:
self.match_pid = bool(re.search(self.pid, event.pid))
if self.program:
self.match_program = bool(re.search(self.program, event.program))
# If this criterium is required and it does not match, no need to continue
if not self.match_program:
return False
if self.message:
self.match_message = bool(re.search(self.message, event.message))
matches = self.match_program and self.match_pid and self.match_host and self.match_timezone and self.match_message and self.match_timestamp
if matches:
self.num_matches += 1
return matches
def __repr__(self):
return "%s(message='%s')" % (self.__class__.__name__, self.message)
def usage():
print "Usage: %s <infile>" % (sys.argv[0])
sys.exit(1)
def filter_line(e, exclude_patterns):
match = False
for ep in exclude_patterns:
if ep.match(e):
match = True
break
if not match:
# print repr(e)
return e
def load_rules():
exclude_patterns = []
exclude_patterns.append(Pattern(program='^(/USR/SBIN/CRON|CRON)$', message='^\(root\) CMD \(test -x /usr/sbin/anacron'))
exclude_patterns.append(Pattern(program='^(/USR/SBIN/CRON|CRON)$', message='^\(root\) CMD \(invoke-rc.d atop _cron\)'))
exclude_patterns.append(Pattern(program='^crontab$', message='^\(.+\) LIST \(.+'))
# We do not care about messages from 'links'
exclude_patterns.append(Pattern(program='^links$'))
# Ignore common systemd messages
exclude_patterns.append(Pattern(program='^systemd', message="^(Stopped|Stopping|Started|Starting|Reached|New|Removed|Startup|Received|Reloading|Reloaded|Created)"))
# We do not want to hear about avahi
exclude_patterns.append(Pattern(program='^avahi-daemon$'))
# normal postfix operations
exclude_patterns.append(Pattern(program="^postfix/smtpd$", message="^connect from "))
exclude_patterns.append(Pattern(program="^postfix/smtpd$", message="SASL authentication failed"))
exclude_patterns.append(Pattern(program="^postfix/smtp$", message="Temporary System Problem"))
exclude_patterns.append(Pattern(program="^postfix/", message="relay=.*status=sent"))
exclude_patterns.append(Pattern(program="^postfix/qmgr$"))
exclude_patterns.append(Pattern(program="^postfix/cleanup$"))
exclude_patterns.append(Pattern(program="^postfix/pickup$"))
exclude_patterns.append(Pattern(program="^postfix/anvil$"))
exclude_patterns.append(Pattern(program="^postfix/smtpd$", message="^disconnect from "))
exclude_patterns.append(Pattern(program="^postfix/smtpd$", message="^warning: hostname .* Name or service not known"))
exclude_patterns.append(Pattern(program="^postfix/smtpd$", message="^warning: hostname .* does not resolve"))
exclude_patterns.append(Pattern(program="^postfix/smtpd$", message="^lost connection after .+"))
exclude_patterns.append(Pattern(program="^postfix/smtpd$", message="^timeout after .+"))
exclude_patterns.append(Pattern(program="^postfix/smtpd$", message="^NOQUEUE: reject:"))
exclude_patterns.append(Pattern(program="^postfix/smtpd$", message="^too many errors after DATA from"))
exclude_patterns.append(Pattern(program="^postfix/smtp$", message="Host or domain name not found."))
exclude_patterns.append(Pattern(program="^postfix/smtp$", message="^connect to .* Network is unreachable$"))
# Ignore MARKS
exclude_patterns.append(Pattern(program='^--$', message='^MARK --$'))
return exclude_patterns
def main():
exclude_patterns = load_rules()
num_lines_total=0
num_lines_excluded=0
num_lines_included=0
num_patterns=len(exclude_patterns)
# Contains a key for each observed program string, value is number of times this program was observed
programs={}
with open(infile) as f:
for line in f:
num_lines_total+=1
event = Event(line.strip())
try:
programs[event.program] += 1
except KeyError:
programs[event.program] = 1
#print event
if filter_line(event, exclude_patterns):
print event.raw
num_lines_included += 1
else:
#print "X> ", event.program,":", event.message
num_lines_excluded += 1
pass
# Only print stats if any lines were found:
if num_lines_included:
print """
Statistics:
num_lines_total =%12s
num_lines_excluded =%12s
num_lines_included =%12s
num_patterns =%12s
""" % (num_lines_total, num_lines_excluded, num_lines_included, num_patterns)
if VERBOSITY > 0:
print "Programs:"
import operator
programs_total = 0
for p, num in reversed(sorted(programs.items(), key=operator.itemgetter(1))):
programs_total += 1
print "%16s : %16s" %(p, num)
print programs_total, "programs in total"
if VERBOSITY > 2:
print "Patterns: "
for ep in exclude_patterns:
try:
print ep.num_matches, ep.message.pattern
except:
print ep.num_matches, ep
if __name__ == '__main__':
try:
infile = sys.argv[1]
except IndexError:
usage()
main()
New Paste
Go to most recent paste.