Pastebin

Paste #6985: log_analyzer.py

< previous paste - next paste>

Pasted by tdn

Download View as text

#!/usr/bin/env python
# encoding=utf8
import sys
import re

__version__ = '0.1'

VERBOSITY = 1

class Event:
    def __init__(self, line):
        self.raw = line
        try:

            parts = line.split(" ")
            self.timestamp = parts[0]
            self.timezone = parts[0][-6:]
            self.host = parts[1]
            try:
                self.pid = parts[2].split("[")[1].split("]")[0]
                self.program = parts[2].split("[")[0]
            except IndexError:
                # If program contains no PID, we need to split by ':' instead. Example:
                # 2016-04-05T18:27:31.885916+02:00 fer.adora.dk sudo: pam_unix(sudo:session): session opened for user root by root(uid=0)
                self.pid = None
                self.program = parts[2].split(":")[0]
            self.message = " ".join(parts[3:])
        except IndexError:
            print "WARNING: Could not parse: %s" % line
            sys.exit(2)

    def __repr__(self):
        return "%(classname)s(timestamp='%(timestamp)s' timezone='%(timezone)s' host='%(host)s' program='%(program)s' pid='%(pid)s' message='%(message)s')" % {

            'classname': self.__class__.__name__,
            'timestamp': self.timestamp,
            'timezone': self.timezone,
            'host': self.host,
            'program': self.program,
            'pid': self.pid,
            'message': self.message
        }


class Pattern:
    def __init__(self, message=None, timestamp=None, timezone=None, host=None, program=None, pid=None):
        if message:
            self.message = re.compile(message)
        else:
            self.message = None
        if timestamp:
            self.timestamp = re.compile(timestamp)
        else:
            self.timestamp = timestamp
        if timezone:
            self.timezone = re.compile(timezone)
        else:
            self.timezone = timezone
        if host:
            self.host = re.compile(host)
        else:
            self.host = host
        if program:
            self.program = re.compile(program)
        else:
            self.program = pid
        if pid:
            self.pid = re.compile(pid)
        else:
            self.pid = pid

        self.match_message = (message is None)
        self.match_timestamp = (timestamp is None)
        self.match_timezone = (timezone is None)
        self.match_host = (host is None)
        self.match_program = (program is None)
        self.match_pid = (pid is None)
        self.num_matches = 0

    def match(self, event):
        required = []
        if self.timestamp:
            self.match_timestamp = bool(re.search(self.timestamp, event.timestamp))
        if self.timezone:
            self.match_timezone = bool(re.search(self.timezone, event.timezone))
        if self.host:
            self.match_host = bool(re.search(self.host, event.host))
        if self.pid:
            self.match_pid = bool(re.search(self.pid, event.pid))
        if self.program:
            self.match_program = bool(re.search(self.program, event.program))
            # If this criterium is required and it does not match, no need to continue
            if not self.match_program:
                return False
        if self.message:
            self.match_message = bool(re.search(self.message, event.message))
        matches = self.match_program and self.match_pid and self.match_host and self.match_timezone and self.match_message and self.match_timestamp
        if matches:
            self.num_matches += 1
        return matches

    def __repr__(self):
        return "%s(message='%s')" % (self.__class__.__name__, self.message)


def usage():
    print "Usage: %s <infile>" % (sys.argv[0])
    sys.exit(1)

def filter_line(e, exclude_patterns):
    match = False
    for ep in exclude_patterns:
        if ep.match(e):
            match = True
            break
    if not match:
        # print repr(e)
        return e


def load_rules():
    exclude_patterns = []
    exclude_patterns.append(Pattern(program='^(/USR/SBIN/CRON|CRON)$', message='^\(root\) CMD \(test -x /usr/sbin/anacron'))
    exclude_patterns.append(Pattern(program='^(/USR/SBIN/CRON|CRON)$', message='^\(root\) CMD \(invoke-rc.d atop _cron\)'))
    exclude_patterns.append(Pattern(program='^crontab$', message='^\(.+\) LIST \(.+'))

    # We do not care about messages from 'links'
    exclude_patterns.append(Pattern(program='^links$'))

    # Ignore common systemd messages
    exclude_patterns.append(Pattern(program='^systemd', message="^(Stopped|Stopping|Started|Starting|Reached|New|Removed|Startup|Received|Reloading|Reloaded|Created)"))

    # We do not want to hear about avahi
    exclude_patterns.append(Pattern(program='^avahi-daemon$'))

    # normal postfix operations
    exclude_patterns.append(Pattern(program="^postfix/smtpd$", message="^connect from "))
    exclude_patterns.append(Pattern(program="^postfix/smtpd$", message="SASL authentication failed"))
    exclude_patterns.append(Pattern(program="^postfix/smtp$", message="Temporary System Problem"))
    exclude_patterns.append(Pattern(program="^postfix/", message="relay=.*status=sent"))
    exclude_patterns.append(Pattern(program="^postfix/qmgr$"))
    exclude_patterns.append(Pattern(program="^postfix/cleanup$"))
    exclude_patterns.append(Pattern(program="^postfix/pickup$"))
    exclude_patterns.append(Pattern(program="^postfix/anvil$"))
    exclude_patterns.append(Pattern(program="^postfix/smtpd$", message="^disconnect from "))
    exclude_patterns.append(Pattern(program="^postfix/smtpd$", message="^warning: hostname .* Name or service not known"))
    exclude_patterns.append(Pattern(program="^postfix/smtpd$", message="^warning: hostname .* does not resolve"))
    exclude_patterns.append(Pattern(program="^postfix/smtpd$", message="^lost connection after .+"))
    exclude_patterns.append(Pattern(program="^postfix/smtpd$", message="^timeout after .+"))
    exclude_patterns.append(Pattern(program="^postfix/smtpd$", message="^NOQUEUE: reject:"))
    exclude_patterns.append(Pattern(program="^postfix/smtpd$", message="^too many errors after DATA from"))
    exclude_patterns.append(Pattern(program="^postfix/smtp$", message="Host or domain name not found."))
    exclude_patterns.append(Pattern(program="^postfix/smtp$", message="^connect to .* Network is unreachable$"))

    # Ignore MARKS
    exclude_patterns.append(Pattern(program='^--$', message='^MARK --$'))
     
    return exclude_patterns


def main():
    exclude_patterns = load_rules()
    num_lines_total=0
    num_lines_excluded=0
    num_lines_included=0
    num_patterns=len(exclude_patterns)

    # Contains a key for each observed program string, value is number of times this program was observed
    programs={}

    with open(infile) as f:
        for line in f:
            num_lines_total+=1
            event = Event(line.strip())
            try:
                programs[event.program] += 1
            except KeyError:
                 programs[event.program] = 1

            #print event
            if filter_line(event, exclude_patterns):
                print event.raw
                num_lines_included += 1
            else:
                #print "X> ", event.program,":", event.message
                num_lines_excluded += 1
                pass

    # Only print stats if any lines were found:
    if num_lines_included:
        print """
    Statistics:
        num_lines_total     =%12s
        num_lines_excluded  =%12s
        num_lines_included  =%12s
        num_patterns        =%12s
        """ % (num_lines_total, num_lines_excluded, num_lines_included, num_patterns)

        if VERBOSITY > 0:
            print "Programs:"
            import operator
            programs_total = 0
            for p, num in reversed(sorted(programs.items(), key=operator.itemgetter(1))):
                programs_total += 1
                print "%16s : %16s" %(p, num)
            print programs_total, "programs in total"


        if VERBOSITY > 2:
            print "Patterns: "
            for ep in exclude_patterns:
                try:
                    print ep.num_matches, ep.message.pattern
                except:
                    print ep.num_matches, ep

if __name__ == '__main__':
    try:
        infile = sys.argv[1]
    except IndexError:
        usage()
    main()

New Paste


Do not write anything in this field if you're a human.

Go to most recent paste.