./0000755000000000000000000000000013361140050007702 5ustar rootrootdeb-build/root/0000755000000000000000000000000013352673207012376 5ustar rootrootdeb-build/root/lib/0000755000000000000000000000000013352673207013144 5ustar rootrootdeb-build/root/lib/systemd/0000755000000000000000000000000013352673207014634 5ustar rootrootdeb-build/root/lib/systemd/system/0000755000000000000000000000000013354621264016156 5ustar rootrootdeb-build/root/lib/systemd/system/aminer.service0000644000000000000000000000075613326562314021022 0ustar rootroot[Unit] Description=AMiner log data mining server Documentation=man:AMiner(1) [Service] Type=simple ExecStart=/usr/lib/logdata-anomaly-miner/AMiner --Foreground KillMode=control-group # Write everything to /dev/null: if AMiner is misconfigured, it # may detect anonamies in its own log data, thus creating a logging # loop. You may prefer logging to journal only, which needs journald # to be reconfigured with "ForwardToSyslog=false". StandardOutput=null [Install] WantedBy=multi-user.target deb-build/root/usr/0000755000000000000000000000000013352673207013207 5ustar rootrootdeb-build/root/usr/lib/0000755000000000000000000000000013352673207013755 5ustar rootrootdeb-build/root/usr/lib/logdata-anomaly-miner/0000755000000000000000000000000013354627662020144 5ustar rootrootdeb-build/root/usr/lib/logdata-anomaly-miner/AMinerRemoteControl0000755000000000000000000001006713353200252023743 0ustar rootroot#!/usr/bin/python3 -BbbEIsSttW all """This tool allows to connect to a remote control socket, send requests and retrieve the responses. To allow remote use of this tool, e.g. via SSH forwarding, the remote control address can be set on the command line, no configuration is read.""" import sys # Get rid of the default sys path immediately. Otherwise Python # also attempts to load the following imports from e.g. directory # where this binary resides. sys.path = sys.path[1:]+['/usr/lib/logdata-anomaly-miner'] import json import os import socket import traceback from aminer.AnalysisChild import AnalysisChildRemoteControlHandler remoteControlSocketName = None remoteControlData = None argPos = 1 commandList = [] stringResponseFlag = False while argPos < len(sys.argv): paramName = sys.argv[argPos] argPos += 1 if paramName == '--ControlSocket': if remoteControlSocketName != None: print('%s: %s parameter given twice' % (sys.argv[0], paramName)) sys.exit(1) remoteControlSocketName = sys.argv[argPos] argPos += 1 continue if paramName == '--Data': remoteControlData = json.loads(sys.argv[argPos]) argPos += 1 continue if paramName == '--Exec': commandList.append((sys.argv[argPos].encode(), remoteControlData)) argPos += 1 continue if paramName == '--ExecFile': if not os.path.exists(sys.argv[argPos]): print('File %s does not exit' % sys.argv[argPos]) sys.exit(1) execData = None with open(sys.argv[argPos], 'rb') as execFile: execData = execFile.read() commandList.append((execData, remoteControlData)) argPos += 1 continue if paramName == '--Help': if len(sys.argv) != 2: print('Ignoring all other arguments with --Help') print("""Usage: %s [arguments] --ControlSocket [socketpath]: when given, use nonstandard control socket. --Data [data]: provide this json serialized data within execution environment as 'remoteControlData' (see man page). --Exec [command]: add command to the execution list, can be used more than once. --ExecFile [file]: add commands from file to the execution list in same way as if content would have been used with "--Exec". --Help: this output --StringResponse: if set, print the response just as string instead of passing it to repr.""" % sys.argv[0]) sys.exit(0) if paramName == '--StringResponse': stringResponseFlag = True continue print('Unknown parameter "%s", use --Help for overview' % paramName) sys.exit(1) if remoteControlSocketName is None: remoteControlSocketName = '/var/run/aminer-remote.socket' if not commandList: print('No commands given, use --Exec [cmd]') sys.exit(1) remoteControlSocket = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) try: remoteControlSocket.connect(remoteControlSocketName) except socket.error as connectException: print('Failed to connect to socket %s, AMiner ' \ 'might not be running or remote control is disabled in ' \ 'configuration: %s' % (remoteControlSocketName, str(connectException))) sys.exit(1) remoteControlSocket.setblocking(1) controlHandler = AnalysisChildRemoteControlHandler(remoteControlSocket) for remoteControlCode, remoteControlData in commandList: controlHandler.putExecuteRequest(remoteControlCode, remoteControlData) # Send data until we are ready for receiving. while not controlHandler.mayReceive(): controlHandler.doSend() while not controlHandler.mayGet(): controlHandler.doReceive() requestData = controlHandler.doGet() requestType = requestData[4:8] if requestType == b'RRRR': try: remoteData = json.loads(requestData[8:]) if remoteData[0] != None: print('Remote execution exception:\n%s' % remoteData[0]) if stringResponseFlag: print('Remote execution response: %s' % str(remoteData[1])) else: print('Remote execution response: %s' % repr(remoteData[1])) except: print('Failed to process response %s' % repr(requestData)) traceback.print_exc() else: raise Exception('Invalid request type %s' % repr(requestType)) remoteControlSocket.close() deb-build/root/usr/lib/logdata-anomaly-miner/aminer/0000755000000000000000000000000013354643774021421 5ustar rootrootdeb-build/root/usr/lib/logdata-anomaly-miner/aminer/parsing/0000755000000000000000000000000013354623223023047 5ustar rootrootdeb-build/root/usr/lib/logdata-anomaly-miner/aminer/parsing/IpAddressDataModelElement.py0000600000000000000000000000327213351165351030360 0ustar rootroot"""This module defines a model element that represents an IP address.""" from aminer.parsing.MatchElement import MatchElement class IpAddressDataModelElement: """This class defines a model element that matches an IPv4 IP address.""" def __init__(self, elementId): """Create an element to match IPv4 IP addresses.""" self.elementId = elementId def getChildElements(self): """Get all possible child model elements of this element. @return None as there are no children of this element.""" return None def getMatchElement(self, path, matchContext): """Read an IP address at the current data position. When found, the matchObject will be """ data = matchContext.matchData numberCount = 0 digitCount = 0 matchLen = 0 extractedAddress = 0 for testByte in data: matchLen += 1 if testByte in b'0123456789': digitCount += 1 continue if digitCount == 0: return None ipBits = int(data[matchLen-digitCount-1:matchLen-1]) if ipBits > 0xff: return None extractedAddress = (extractedAddress << 8)|ipBits digitCount = 0 numberCount += 1 if numberCount == 4: # We are now after the first byte not belonging to the IP. So # go back one step matchLen -= 1 break if testByte != ord(b'.'): return None if digitCount != 0: ipBits = int(data[matchLen-digitCount:matchLen]) if ipBits > 0xff: return None extractedAddress = (extractedAddress << 8)|ipBits matchString = data[:matchLen] matchContext.update(matchString) return MatchElement("%s/%s" % (path, self.elementId), \ matchString, extractedAddress, None) deb-build/root/usr/lib/logdata-anomaly-miner/aminer/parsing/MatchElement.py0000600000000000000000000000633413354621576025776 0ustar rootroot"""This module provides only the MatchElement class to store results from parser element matching process.""" class MatchElement(object): """This class allows storage and handling of data related to a match found by a model element.""" def __init__(self, path, matchString, matchObject, children): """Initialize the MatchElement. @param path when None, this element is anonymous. Hence it cannot be added to the result data and cannot have children. @param matchString the part of the input bytes string covered by the given match. @param matchObject the matchString converted to an object for matchers detecting more complex data types, e.g., integer numbers or IP addresses.""" if (path is None) and children: raise Exception("Anonymous match may not have children") self.path = path self.matchString = matchString self.matchObject = matchObject self.children = children def getPath(self): """Get the path of this element. @return the path string.""" return self.path def getMatchString(self): """Get the logatom string part this match element is matching.""" return self.matchString def getMatchObject(self): """Get the matched data converted to an object of suitable type.""" return self.matchObject def getChildren(self): """Get the submatch children of this match, if any. @return a list of submatches or None""" return self.children def annotateMatch(self, indentStr): """Annotate a given match element showing the match path elements and the parsed values. @param indentStr if None, all elements are separated just with a single space, no matter how deep the nesting level of those elements is. If not None, all elements are put into an own lines, that is prefixed by the given indentStr and indenting is increased by two spaces for earch level.""" nextIndent = None result = None if indentStr is None: result = '%s: %s (%s)' % (self.path, repr(self.matchObject), repr(self.matchString)) else: result = '%s%s: %s (%s)' % (indentStr, self.path, repr(self.matchObject), \ repr(self.matchString)) nextIndent = indentStr+' ' if self.children != None: for childMatch in self.children: if nextIndent is None: result += ' '+childMatch.annotateMatch(None) else: result += '\n'+childMatch.annotateMatch(nextIndent) return result def serializeObject(self): """Create a serialization of this match element and all the children. With sane and unique path elements, the serialized object will also be unique.""" chld = [] if self.children: for childMatch in self.children: chld.append(childMatch.serializeObject()) return { "path": self.path, "matchobject": self.matchObject, "matchString": self.matchString, "children": chld} def __str__(self): """Get a string representation of this match element excluding the children""" numChildren = 0 if self.children != None: numChildren = len(self.children) return 'MatchElement: path = %s, string = %s, object = %s, children = %d' % ( self.path, repr(self.matchString), repr(self.matchObject), numChildren) deb-build/root/usr/lib/logdata-anomaly-miner/aminer/parsing/__init__.py0000600000000000000000000000542313352705645025163 0ustar rootroot"""This module defines various interfaces for log atom parsing and namespace shortcuts to the ModelElements.""" class ModelElementInterface(object): """This is the superinterface of all model elements.""" def getId(self): """Get the element ID.""" raise Exception('Interface method called') def getChildElements(self): """Get all possible child model elements of this element. If this element implements a branching model element, then not all child element IDs will be found in matches produced by getMatchElement. @return a list with all children""" raise Exception('Interface method called') def getMatchElement(self, path, matchContext): """Try to find a match on given data for this model element and all its children. When a match is found, the matchContext is updated accordingly. @param path the model path to the parent model element invoking this method. @param matchContext an instance of MatchContext class holding the data context to match against. @return the matchElement or None if model did not match.""" from aminer.parsing.AnyByteDataModelElement import AnyByteDataModelElement from aminer.parsing.Base64StringModelElement import Base64StringModelElement from aminer.parsing.DateTimeModelElement import DateTimeModelElement from aminer.parsing.DebugModelElement import DebugModelElement from aminer.parsing.DecimalFloatValueModelElement import DecimalFloatValueModelElement from aminer.parsing.DecimalIntegerValueModelElement import DecimalIntegerValueModelElement from aminer.parsing.DelimitedDataModelElement import DelimitedDataModelElement from aminer.parsing.ElementValueBranchModelElement import ElementValueBranchModelElement from aminer.parsing.FirstMatchModelElement import FirstMatchModelElement from aminer.parsing.FixedDataModelElement import FixedDataModelElement from aminer.parsing.FixedWordlistDataModelElement import FixedWordlistDataModelElement from aminer.parsing.HexStringModelElement import HexStringModelElement from aminer.parsing.IpAddressDataModelElement import IpAddressDataModelElement from aminer.parsing.MatchContext import DebugMatchContext from aminer.parsing.MatchContext import MatchContext from aminer.parsing.MatchElement import MatchElement from aminer.parsing.MultiLocaleDateTimeModelElement import MultiLocaleDateTimeModelElement from aminer.parsing.OptionalMatchModelElement import OptionalMatchModelElement from aminer.parsing.ParserMatch import ParserMatch from aminer.parsing.RepeatedElementDataModelElement import RepeatedElementDataModelElement from aminer.parsing.SequenceModelElement import SequenceModelElement from aminer.parsing.VariableByteDataModelElement import VariableByteDataModelElement from aminer.parsing.WhiteSpaceLimitedDataModelElement import WhiteSpaceLimitedDataModelElement deb-build/root/usr/lib/logdata-anomaly-miner/aminer/parsing/MultiLocaleDateTimeModelElement.py0000600000000000000000000004550413352420605031540 0ustar rootroot"""This module defines a model element representing date or datetime from sources with different locales.""" import datetime import locale import sys from aminer.parsing.MatchElement import MatchElement class MultiLocaleDateTimeModelElement: """This class defines a model element to parse date or datetime values from log sources containing timestamps encoded in different locales or on machines, where host/service locale does not match data locale(s). CAVEAT: Unlike other model elements, this element is not completely stateless! As parsing of semiqualified date values without any year information may produce wrong results, e.g. wrong year or 1 day off due to incorrect leap year handling, this object will keep track of the most recent timestamp parsed and will use it to regain information about the year in semiqualified date values. Still this element will not complain when parsed timestamp values are not strictly sorted, this should be done by filtering modules later on. The sorting requirements here are only, that each new timestamp value may not be more than 2 days before and 1 month after the most recent one observer. Internal operation: * When creating the object, make sure that there are no ambiguous dateFormats in the list, e.g. one with "day month" and another one with "month day". * To avoid decoding of binary input data in all locales before searching for e.g. month names, convert all possible month names to bytes during object creation and just keep the lookup list.""" def __init__(self, elementId, dateFormats, startYear=None): """Create a new MultiLocaleDateTimeModelElement object. @param dateFormats this parameter is a list of tuples, each tuple containing information about one date format to support. The tuple structure is (formatString, formatLocale, formatTimezone). The formatString may contain the same elements as supported by strptime from datetime.datetime. The formatLocale defines the locale for the string content, e.g. de_DE for german, but also the data IO encoding, e.g. ISO-8859-1. The locale information has to be available, e.g. using "locale-gen" on Debian systems. The formatTimezone can be used to define the timezone of the timestamp parsed. When None, UTC is used. The timezone support may only be sufficient for very simple usecases, e.g. all data from one source configured to create timestamps in that timezone. This may still fail, e.g. when daylight savings changes make timestamps ambiguous during a short window of time. In all those cases, timezone should be left empty here and a separate filtering component should be used to apply timestamp corrections afterwards. See the FIXME-Filter component for that. Also having the same formatString for two different timezones will result in an error as correct timezone to apply cannot be distinguished just from format. @param startYear when given, parsing will use this year value for semiqualified timestamps to add correct year information. This is especially relevant for historic datasets as otherwise leap year handling may fail. The startYear parameter will only take effect when the first timestamp to be parsed by this object is also semiqualified. Otherwise the year information is extracted from this record. When empty and first parsing invocation involves a semiqualified date, the current year in UTC timezone is used.""" self.elementId = elementId self.startYear = startYear # The latest parsed timestamp value. self.latestParsedTimestamp = None self.totalSecondsStartTime = datetime.datetime(1970, 1, 1) self.dateFormats = DateFormatComponent(-1, None, -1, None, None) defaultLocale = locale.getlocale() # Build a decision tree for all format variants describing how # to analyze a given timestamp. The tree is created containing # nodes of form (separator, digitsOnlyFlag, length) for formatString, formatLocale, formatTimezone in dateFormats: self.dateFormats.addFormat(formatString, formatLocale, formatTimezone) # Restore previous locale settings. There seems to be no way in # python to get back to the exact same state. Hence perform the # reset only when locale has changed. This would also change the # locale from (None, None) to some system-dependent locale. if locale.getlocale() != defaultLocale: locale.resetlocale() def getChildElements(self): """Get all possible child model elements of this element. @return empty list as there are no children of this element.""" return None def getMatchElement(self, path, matchContext): """This method checks if the data to match within the content is suitable to be parsed by any of the supplied date formats. @return On match return a matchObject containing a tuple of the datetime object and the seconds since 1970. When not matching, None is returned. When the timestamp data parsed would be far off from the last ones parsed, so that correction may not be applied correctly, then the method will also return None.""" # Convert the head of the matchData to a timestamp value. parsedData = self.dateFormats.parse(matchContext.matchData, 0) if parsedData is None: return None parsedFields = parsedData[0] timeZoneInfo = parsedData[2] dateStr = matchContext.matchData[0:parsedData[1]] if parsedFields[COMPONENT_TYPE_MICROSECOND] is None: parsedFields[COMPONENT_TYPE_MICROSECOND] = 0 # FIXME: Values without day/month not handled yet parsedValue = None if parsedFields[COMPONENT_TYPE_YEAR] is None: if self.latestParsedTimestamp is not None: parsedFields[COMPONENT_TYPE_YEAR] = self.latestParsedTimestamp.year elif self.startYear is not None: parsedFields[COMPONENT_TYPE_YEAR] = self.startYear else: parsedFields[COMPONENT_TYPE_YEAR] = datetime.datetime.utcnow().year # Around new year, the year correction could change a semiqualified # date to the beginning of the year or could change a semiqualified # date lagging behind the latest date seen to the end of the following # year. parsedValue = datetime.datetime(parsedFields[COMPONENT_TYPE_YEAR], \ parsedFields[COMPONENT_TYPE_MONTH], \ parsedFields[COMPONENT_TYPE_DAY], \ parsedFields[COMPONENT_TYPE_HOUR], \ parsedFields[COMPONENT_TYPE_MINUTE], \ parsedFields[COMPONENT_TYPE_SECOND], \ parsedFields[COMPONENT_TYPE_MICROSECOND], \ timeZoneInfo) if not self.checkTimestampValueInRange(): parsedValue = datetime.datetime(parsedFields[COMPONENT_TYPE_YEAR]+1, \ parsedFields[COMPONENT_TYPE_MONTH], \ parsedFields[COMPONENT_TYPE_DAY], \ parsedFields[COMPONENT_TYPE_HOUR], \ parsedFields[COMPONENT_TYPE_MINUTE], \ parsedFields[COMPONENT_TYPE_SECOND], \ parsedFields[COMPONENT_TYPE_MICROSECOND], \ timeZoneInfo) if not self.checkTimestampValueInRange(): parsedValue = datetime.datetime(parsedFields[COMPONENT_TYPE_YEAR]-1, \ parsedFields[COMPONENT_TYPE_MONTH], \ parsedFields[COMPONENT_TYPE_DAY], \ parsedFields[COMPONENT_TYPE_HOUR], \ parsedFields[COMPONENT_TYPE_MINUTE], \ parsedFields[COMPONENT_TYPE_SECOND], \ parsedFields[COMPONENT_TYPE_MICROSECOND], \ timeZoneInfo) if not self.checkTimestampValueInRange(): print('Delta to last timestamp out of range for %s' % repr(dateStr), file=sys.stderr) return None self.checkTimestampValueInRange() if self.latestParsedTimestamp is not None: delta = (self.latestParsedTimestamp-self.latestParsedTimestamp) deltaSeconds = (delta.days*86400+delta.seconds+delta.microseconds) if (deltaSeconds < -86400) or (deltaSeconds > 86400*30): print('Delta to last timestamp out of range for %s' % repr(dateStr), file=sys.stderr) return None else: parsedValue = datetime.datetime(parsedFields[COMPONENT_TYPE_YEAR], \ parsedFields[COMPONENT_TYPE_MONTH], \ parsedFields[COMPONENT_TYPE_DAY], \ parsedFields[COMPONENT_TYPE_HOUR], \ parsedFields[COMPONENT_TYPE_MINUTE], \ parsedFields[COMPONENT_TYPE_SECOND], \ parsedFields[COMPONENT_TYPE_MICROSECOND], \ timeZoneInfo) if not self.checkTimestampValueInRange(): print('Delta to last timestamp out of range for %s' % repr(dateStr), file=sys.stderr) return None parsedValue = parsedValue.replace(tzinfo=None) matchContext.update(dateStr) delta = (parsedValue-self.totalSecondsStartTime) totalSeconds = (delta.days*86400+delta.seconds+delta.microseconds) if (self.latestParsedTimestamp is None) or (self.latestParsedTimestamp < parsedValue): self.latestParsedTimestamp = parsedValue return MatchElement("%s/%s" % (path, self.elementId), dateStr, (parsedValue, totalSeconds,), \ None) def checkTimestampValueInRange(self): """Return True if value is None.""" if self.latestParsedTimestamp is None: return True delta = (self.latestParsedTimestamp-self.latestParsedTimestamp) deltaSeconds = (delta.days*86400+delta.seconds+delta.microseconds) return (deltaSeconds >= -86400) and (deltaSeconds < 86400*30) COMPONENT_TYPE_YEAR = 0 COMPONENT_TYPE_MONTH = 1 COMPONENT_TYPE_DAY = 2 COMPONENT_TYPE_HOUR = 3 COMPONENT_TYPE_MINUTE = 4 COMPONENT_TYPE_SECOND = 5 COMPONENT_TYPE_MICROSECOND = 6 COMPONENT_TYPE_LENGTH = 7 class DateFormatComponent: """This class defines a component in the date format.""" def __init__(self, componentType, endSeparator, componentLength, translationDictionary, parentComponent): """Create the component object. @param endSeparator when not none, this component is separated from the next by the given separator. @param componentLength length of component for fixed length components, 0 otherwise. @param translationDictionary a dictionary describing how the bytes of a formatted date component should be translated into a number by plain lookup. When None, the component will be treated as normal number.""" self.componentType = componentType if (endSeparator is not None) and not endSeparator: raise Exception('Invalid zero-length separator string') self.endSeparator = endSeparator if (endSeparator is None) and (componentLength == 0) and (translationDictionary is None): raise Exception('Invalid parameters to determine the length of the field') self.componentLength = componentLength self.translationDictionary = translationDictionary self.parentComponent = parentComponent self.formatTimezone = None self.nextComponents = {} def addFormat(self, formatString, formatLocale, formatTimezone): """Add a new format to be parsed.""" if formatString[0] != '%': raise Exception('Format string has to start with "%", strip away all static data outside \ this formatter before starting to parse') if self.formatTimezone is not None: raise Exception('Current node is already an end node, no format adding any more') parsePos = 1 componentType = -1 componentLength = -1 translationDictionary = None if formatString[parsePos] == 'b': # Month name parsePos += 1 componentType = COMPONENT_TYPE_MONTH componentLength = 0 locale.setlocale(locale.LC_ALL, formatLocale) translationDictionary = {} for monthNum in range(1, 13): # As we have switched locale before, this will return the byte # string for the month name encoded using the correct encoding. newValue = datetime.datetime(1970, monthNum, 1).strftime('%b') for oldValue in translationDictionary: if (oldValue.startswith(newValue)) or (newValue.startswith(oldValue)): raise Exception('Strange locale with month names too similar') translationDictionary[newValue] = monthNum if len(translationDictionary) != 12: raise Exception('Internal error: less than 12 month a year') elif formatString[parsePos] == 'd': # Day number parsePos += 1 componentType = COMPONENT_TYPE_DAY componentLength = 2 elif formatString[parsePos] == 'H': # Hour 0..23 parsePos += 1 componentType = COMPONENT_TYPE_HOUR componentLength = 2 elif formatString[parsePos] == 'M': # Minute parsePos += 1 componentType = COMPONENT_TYPE_MINUTE componentLength = 2 elif formatString[parsePos] == 'S': # Second parsePos += 1 componentType = COMPONENT_TYPE_SECOND componentLength = 2 else: raise Exception('Unsupported date format code "%s"' % formatString[parsePos]) endPos = formatString.find('%', parsePos) endSeparator = None if endPos < 0: endSeparator = formatString[parsePos:] parsePos = len(formatString) else: endSeparator = formatString[parsePos:endPos] parsePos = endPos if not endSeparator: endSeparator = None # Make sure all values are sane. # Make sure no parent component is parsing the same type. checkComponent = self while checkComponent is not None: if checkComponent.componentType == componentType: raise Exception('Current format defines component of type %d twice' % componentType) checkComponent = checkComponent.parentComponent lookupKey = None if translationDictionary is None: lookupKey = '%sn%d' % (endSeparator, componentLength) else: lookupKey = '%st%d' % (endSeparator, componentLength) nextComponent = self.nextComponents.get(lookupKey, None) if nextComponent is None: nextComponent = DateFormatComponent(componentType, endSeparator, \ componentLength, translationDictionary, self) self.nextComponents[lookupKey] = nextComponent else: # Merge needed. nextComponent.mergeComponentData(componentType, componentLength, \ translationDictionary) if parsePos != len(formatString): nextComponent.addFormat(formatString[parsePos:], formatLocale, \ formatTimezone) else: # Import in constructor to avoid failures reading the class in # module initialization on setups without pytz. import pytz nextComponent.makeEndNode(pytz.timezone(formatTimezone)) def mergeComponentData(self, componentType, componentLength, translationDictionary): """Merge data of given component type, length and lookup information into the current dataset.""" if (self.componentType != componentType) or (self.componentLength != componentLength): raise Exception('Cannot merge data with different type or length') if (self.translationDictionary is not None) != (translationDictionary is not None): raise Exception('Cannot merge digit and translated data') if translationDictionary is None: # Without dictionary, we are done here: length and type are matching. return for key in translationDictionary: for oldKey in self.translationDictionary: if ((key.startswith(oldKey)) or (oldKey.startswith(key))) and (key != oldKey): raise Exception('Translation strings from different locales too similar for \ unambiguous parsing') value = translationDictionary.get(key) currentValue = self.translationDictionary.get(key, None) if currentValue is None: self.translationDictionary[key] = value elif currentValue != value: raise Exception('Conflict in translation dictionary for %s: %s vs %s' % ( key, value, currentValue)) def makeEndNode(self, formatTimezone): """Make this DateFormatComponent an end node. When reached during parsing, calculation of the timestamp value within the given is triggered.""" if (self.formatTimezone is not None) and (self.formatTimezone != formatTimezone): raise Exception('Node is already an end node for different timezone') elif self.nextComponents: raise Exception('Cannot make node with subcomponents an end node') self.formatTimezone = formatTimezone def parse(self, dateString, parsePos): """Parse the supplied dateString starting from the given position. @return a triple containing the field list, the parsing end position and the target timezone for parsed fields.""" componentValue = None # Position after value the value but before an optional separator. endPos = -1 if self.componentType >= 0: if self.endSeparator is not None: if self.componentLength == 0: endPos = dateString.find(self.endSeparator, parsePos) else: endPos = parsePos+self.componentLength if not dateString.find(self.endSeparator, endPos): endPos = -1 if endPos < 0: return None elif self.componentLength != 0: endPos = parsePos+self.componentLength else: return None if endPos != -1: valueStr = dateString[parsePos:endPos] if self.translationDictionary is None: componentValue = int(valueStr.strip()) else: componentValue = self.translationDictionary.get(valueStr) if componentValue is None: return None else: # Without length, we need to got through all the dictionary components # and see if the dateString starts with that key. As keys were # already verified, that no key is starting portion of other key, # that does not need to be checked. checkString = dateString[parsePos:] for key in self.translationDictionary: if checkString.startswith(key): componentValue = self.translationDictionary.get(key) endPos = parsePos+len(key) break if componentValue is None: return None # Now after parsing of value, add the length of the separator # but make sure, it is really present. if self.endSeparator is not None: if dateString.find(self.endSeparator, endPos) != endPos: return None endPos += len(self.endSeparator) else: # Negative componentType means, that this node is just a collector # of subcomponents so do not change the parsing position for the # next round. endPos = 0 if self.formatTimezone is not None: # This is the end node, return the results. fields = [None]*COMPONENT_TYPE_LENGTH fields[self.componentType] = componentValue return (fields, endPos, self.formatTimezone) # So this is no end node. Search the list of next components and # continue parsing the next component. for key in self.nextComponents: nextComponent = self.nextComponents.get(key) result = nextComponent.parse(dateString, endPos) if result is not None: if componentValue is not None: result[0][self.componentType] = componentValue return result return None # Todos: # * Add unit-test with # * leap year # * dst hour gain/loose deb-build/root/usr/lib/logdata-anomaly-miner/aminer/parsing/DecimalIntegerValueModelElement.py0000600000000000000000000000543613351151374031565 0ustar rootroot"""This module defines an model element for integer number parsing.""" from aminer.parsing import ModelElementInterface from aminer.parsing.MatchElement import MatchElement class DecimalIntegerValueModelElement(ModelElementInterface): """This class defines a model to parse integer values with optional signum or padding. If both are present, it is signum has to be before the padding characters.""" SIGN_TYPE_NONE = 'none' SIGN_TYPE_OPTIONAL = 'optional' SIGN_TYPE_MANDATORY = 'mandatory' PAD_TYPE_NONE = 'none' PAD_TYPE_ZERO = 'zero' PAD_TYPE_BLANK = 'blank' def __init__( self, pathId, valueSignType=SIGN_TYPE_NONE, valuePadType=PAD_TYPE_NONE): self.pathId = pathId self.startCharacters = None if valueSignType == DecimalIntegerValueModelElement.SIGN_TYPE_NONE: self.startCharacters = b'0123456789' elif valueSignType == DecimalIntegerValueModelElement.SIGN_TYPE_OPTIONAL: self.startCharacters = b'-0123456789' elif valueSignType == DecimalIntegerValueModelElement.SIGN_TYPE_MANDATORY: self.startCharacters = b'+-' else: raise Exception('Invalid valueSignType "%s"' % valueSignType) self.padCharacters = b'' if valuePadType == DecimalIntegerValueModelElement.PAD_TYPE_NONE: pass elif valuePadType == DecimalIntegerValueModelElement.PAD_TYPE_ZERO: self.padCharacters = b'0' elif valuePadType == DecimalIntegerValueModelElement.PAD_TYPE_BLANK: self.padCharacters = b' ' else: raise Exception('Invalid valuePadType "%s"' % valueSignType) self.valuePadType = valuePadType def getChildElements(self): """Get all possible child model elements of this element. @return empty list as there are no children of this element.""" return [] def getMatchElement(self, path, matchContext): """Find the maximum number of bytes forming a integer number according to the parameters specified. @return a match when at least one byte being a digit was found""" data = matchContext.matchData allowedCharacters = self.startCharacters if not data or (data[0] not in allowedCharacters): return None matchLen = 1 allowedCharacters = self.padCharacters for testByte in data[matchLen:]: if testByte not in allowedCharacters: break matchLen += 1 numStartPos = matchLen allowedCharacters = b'0123456789' for testByte in data[matchLen:]: if testByte not in allowedCharacters: break matchLen += 1 if matchLen == 1: if data[0] not in b'0123456789': return None elif numStartPos == matchLen: return None matchString = data[:matchLen] matchValue = int(matchString) matchContext.update(matchString) return MatchElement( '%s/%s' % (path, self.pathId), matchString, matchValue, None) deb-build/root/usr/lib/logdata-anomaly-miner/aminer/parsing/FirstMatchModelElement.py0000600000000000000000000000176713354622207027764 0ustar rootroot"""This module defines a model element that allows branches. The first matching branch is taken.""" class FirstMatchModelElement: """This class defines a model element to return the match from the the first matching child model within a given list.""" def __init__(self, elementId, children): self.elementId = elementId self.children = children if (children is None) or (None in children): raise Exception('Invalid children list') def getChildElements(self): """Get all possible child model elements of this element.""" return self.children def getMatchElement(self, path, matchContext): """@return None when there is no match, MatchElement otherwise.""" currentPath = "%s/%s" % (path, self.elementId) matchData = matchContext.matchData for childElement in self.children: childMatch = childElement.getMatchElement(currentPath, matchContext) if childMatch != None: return childMatch matchContext.matchData = matchData return None deb-build/root/usr/lib/logdata-anomaly-miner/aminer/parsing/RepeatedElementDataModelElement.py0000600000000000000000000000235113350456656031554 0ustar rootroot"""This module defines a model element that repeats a number of times.""" from aminer.parsing.MatchElement import MatchElement class RepeatedElementDataModelElement: """Objects of this class match on repeats of a given element.""" def __init__(self, elementId, repeatedElement, minRepeat=-1, maxRepeat=-1, repeatRef=None): self.elementId = elementId self.repeatedElement = repeatedElement def getChildElements(self): """Return a list of all children model elements.""" return [self.repeatedElement] def getMatchElement(self, path, matchContext): """Find a suitable number of repeats.""" currentPath = "%s/%s" % (path, self.elementId) minRepeat = 0 maxRepeat = 0x100000 startData = matchContext.matchData matches = [] matchCount = 0 while matchCount != maxRepeat: childMatch = self.repeatedElement.getMatchElement( '%s/%s' % (currentPath, matchCount), matchContext) if childMatch is None: break matches += [childMatch] matchCount += 1 if matchCount < minRepeat: matchContext.matchData = startData return None return MatchElement(currentPath, \ startData[:len(startData)-len(matchContext.matchData)], None, matches) deb-build/root/usr/lib/logdata-anomaly-miner/aminer/parsing/MatchContext.py0000600000000000000000000000535413351134056026017 0ustar rootroot"""This module defines the match context.""" from aminer.parsing.MatchElement import MatchElement class MatchContext(object): """This class allows storage of data relevant during the matching process, e.g. the root node and the remaining unmatched data. Then searching for non-atomic matches, e.g. sequences, the context might be modified by model subelements, even if the main model element will not return a match. In that case, those non-atomic model elements have to care to restore the context before returning.""" def __init__(self, matchData): """Create a MatchContext with the full unmatched string data. @param matchData the data that will be tested by the next model element.""" self.matchData = matchData self.rootMatchElement = MatchElement('/', None, None, []) def update(self, matchString): """Update the match context by removing the given matched string data from the context data still to be matched. This method does not check, if the removed data is the same as the trailing match data for performance reasons. This is done only in the DebugMatchContext class.""" self.matchData = self.matchData[len(matchString):] class DebugMatchContext(MatchContext): """This class defines a slower MatchContext for debugging purposes.""" def __init__(self, matchData): self.debugInfo = '' self.lastMatchData = None self.shortestUnmatchedData = None super(DebugMatchContext, self).__init__(matchData) def update(self, matchString): """Update the context and store debugging information.""" if self.lastMatchData != self.matchData: self.lastMatchData = self.matchData self.debugInfo += 'Starting match update on %s\n' % repr(self.matchData) if not self.matchData.startswith(matchString): self.debugInfo += 'Current data %s does not start with %s\n' % ( repr(self.matchData), repr(matchString)) raise Exception('Illegal state') self.matchData = self.matchData[len(matchString):] self.lastMatchData = self.matchData if (self.shortestUnmatchedData is None) or ( len(self.matchData) < len(self.shortestUnmatchedData)): self.shortestUnmatchedData = self.matchData self.debugInfo += 'Removed %s, remaining %d bytes\n' % (repr(matchString), len(self.matchData)) def getDebugInfo(self): """Get the current debugging information and reset it.""" result = self.debugInfo self.debugInfo = '' result += 'Shortest unmatched data was %s\n' % repr(self.shortestUnmatchedData) return result def getshortestUnmatchedData(self): """Get shortest matchData found while updating the internal state. This is useful to find out where the parsing process has terminated.""" return self.shortestUnmatchedData deb-build/root/usr/lib/logdata-anomaly-miner/aminer/parsing/Base64StringModelElement.py0000600000000000000000000000266213350457132030125 0ustar rootroot"""This module provides base64 string matching.""" import base64 from aminer.parsing import ModelElementInterface from aminer.parsing.MatchElement import MatchElement class Base64StringModelElement(ModelElementInterface): """This class just tries to strip off as many base64 bytes as possible from a given data string.""" def __init__(self, pathId): self.pathId = pathId def getChildElements(self): return None def getMatchElement(self, path, matchContext): """Find the maximum number of bytes forming a integer number according to the parameters specified @return a match when at least one byte being a digit was found""" data = matchContext.matchData matchLen = 0 atEndFlag = False for testByte in data: bVal = ord(testByte) if atEndFlag: if ((matchLen&0x3) == 0) or (bVal != 0x3d): break elif (not ((bVal >= 0x30) and (bVal <= 0x39)) and not ((bVal >= 0x41) and (bVal <= 0x5a)) and not ((bVal >= 0x61) and (bVal <= 0x7a)) and (bVal not in [0x2b, 0x2f])): if (bVal != 0x3d) or ((matchLen&0x2) == 0): break atEndFlag = True matchLen += 1 matchLen = matchLen&(-4) if matchLen == 0: return None matchString = data[:matchLen] matchContext.update(matchString) return MatchElement( "%s/%s" % (path, self.pathId), matchString, base64.b64decode(matchString), None) deb-build/root/usr/lib/logdata-anomaly-miner/aminer/parsing/VariableByteDataModelElement.py0000600000000000000000000000210713350456715031055 0ustar rootroot"""This module defines a model element for a variable amount of bytes.""" from aminer.parsing.MatchElement import MatchElement class VariableByteDataModelElement: """This class defines a model element that takes any string that only contains characters of a given alphabet.""" def __init__(self, elementId, alphabet): self.elementId = elementId self.alphabet = alphabet def getChildElements(self): """Get all possible child model elements of this element. @return None as there are no children of this element.""" return None def getMatchElement(self, path, matchContext): """Find the maximum number of bytes matching the given alphabet. @return a match when at least one byte was found within alphabet.""" data = matchContext.matchData matchLen = 0 for testByte in data: if testByte not in self.alphabet: break matchLen += 1 if matchLen == 0: return None matchData = data[:matchLen] matchContext.update(matchData) return MatchElement("%s/%s" % (path, self.elementId), matchData, matchData, None) deb-build/root/usr/lib/logdata-anomaly-miner/aminer/parsing/DebugModelElement.py0000600000000000000000000000250613352420036026730 0ustar rootroot"""This moduel defines a debug model element that can be used to check whether a specific poistion in the parsing tree is reached by log atoms.""" import sys from aminer.parsing.MatchElement import MatchElement class DebugModelElement: """This class defines a model element matching any data of length zero at any position. Thus it can never fail to match and can be inserted at any position in the parsing tree, where matching itself does not alter parsing flow (see e.g. FirstMatchModelElement). It will immediately write the current state of the match to stderr for inspection.""" def __init__(self, elementId): self.elementId = elementId # To avoid having those elements hidden in production configuration, # write a line every time the class is instantiated. print('DebugModelElement %s added' % elementId, file=sys.stderr) def getChildElements(self): """Get all possible child model elements of this element. @return empty list as there are no children of this element.""" return None def getMatchElement(self, path, matchContext): """@return Always return a match.""" print('DebugModelElement path = "%s/%s", unmatched = "%s"' % \ (path, self.elementId, repr(matchContext.matchData)), file=sys.stderr) return MatchElement('%s/%s' % (path, self.elementId), \ '', '', None) deb-build/root/usr/lib/logdata-anomaly-miner/aminer/parsing/ParserMatch.py0000600000000000000000000000302013346651135025621 0ustar rootroot"""This module defines a matching parser model element.""" from collections import deque class ParserMatch: """Objects of this class store information about a complete model match. Unlike the MatchElement, this class also provides fields to store information commonly used when dealing with the match.""" def __init__(self, matchElement, parsingProcessData=None): """Initialize the match. @param matchElement the root MatchElement from the parsing process. @param parsingProcessData this parameter might provide more information about the parsing process, e.g. when parsing produced warnings. The data is specific for the source producing the match.""" self.matchElement = matchElement self.parsingProcessData = parsingProcessData self.matchDictionary = None def getMatchElement(self): """Return the matching element.""" return self.matchElement def getMatchDictionary(self): """Return a dictionary of all children matches.""" if self.matchDictionary is not None: return self.matchDictionary stack = deque() stack.append([self.matchElement]) resultDict = {} while stack: matchList = stack.pop() for testMatch in matchList: resultDict[testMatch.path] = testMatch children = testMatch.children if (children is not None) and children: stack.append(children) self.matchDictionary = resultDict return resultDict def __str__(self): return 'ParserMatch: %s' % (self.matchElement.annotateMatch(' ')) deb-build/root/usr/lib/logdata-anomaly-miner/aminer/parsing/AnyByteDataModelElement.py0000600000000000000000000000150713354622646030064 0ustar rootroot"""This module defines a model element that matches any byte.""" from aminer.parsing.MatchElement import MatchElement class AnyByteDataModelElement: """This class matches any byte but at least one. Thus a match will always span the complete data from beginning to end.""" def __init__(self, elementId): self.elementId = elementId def getChildElements(self): """Get all possible child model elements of this element. @return None as there are no children of this element.""" return None def getMatchElement(self, path, matchContext): """Just return a match including all data from the context""" matchData = matchContext.matchData if not matchData: return None matchContext.update(matchData) return MatchElement("%s/%s" % (path, self.elementId), \ matchData, matchData, None) deb-build/root/usr/lib/logdata-anomaly-miner/aminer/parsing/WhiteSpaceLimitedDataModelElement.py0000600000000000000000000000206413351151470032041 0ustar rootroot"""This module defines a model element that takes any string up to the next white space.""" from aminer.parsing.MatchElement import MatchElement class WhiteSpaceLimitedDataModelElement: """This class defines a model element that represents a variable amount of characters delimited by a white space.""" def __init__(self, elementId): self.elementId = elementId def getChildElements(self): """Get all possible child model elements of this element. @return None as there are no children of this element.""" return None def getMatchElement(self, path, matchContext): """Find the maximum number of bytes before encountering whitespace or end of data. @return a match when at least one byte was found.""" data = matchContext.matchData matchLen = 0 for testByte in data: if testByte in b' \t': break matchLen += 1 if matchLen == 0: return None matchData = data[:matchLen] matchContext.update(matchData) return MatchElement("%s/%s" % (path, self.elementId), matchData, matchData, None) deb-build/root/usr/lib/logdata-anomaly-miner/aminer/parsing/FixedDataModelElement.py0000600000000000000000000000202613352716762027546 0ustar rootroot"""This module defines a model element representing a fixed string.""" from aminer.parsing.MatchElement import MatchElement class FixedDataModelElement: """This class defines a model element of a fixed string. The model element is considered a match if the fixed string is found at this position in the log atom.""" def __init__(self, elementId, fixedData): if not isinstance(fixedData, bytes): raise Exception('fixedData has to be byte string') self.elementId = elementId self.fixedData = fixedData def getChildElements(self): """Get all possible child model elements of this element. @return None as there are no children of this element.""" return None def getMatchElement(self, path, matchContext): """@return None when there is no match, MatchElement otherwise.""" if not matchContext.matchData.startswith(self.fixedData): return None matchContext.update(self.fixedData) return MatchElement("%s/%s" % (path, self.elementId), \ self.fixedData, self.fixedData, None) deb-build/root/usr/lib/logdata-anomaly-miner/aminer/parsing/SequenceModelElement.py0000600000000000000000000000264413350456675027475 0ustar rootroot"""This module defines a model element that consists of a sequence of model elements that all have to match.""" from aminer.parsing.MatchElement import MatchElement class SequenceModelElement: """This class defines an element to find matches that comprise matches of all given child model elements.""" def __init__(self, elementId, children): self.elementId = elementId self.children = children def getChildElements(self): """Return all model elements of the sequence.""" return self.children def getMatchElement(self, path, matchContext): """Try to find a match on given data for this model element and all its children. When a match is found, the matchContext is updated accordingly. @param path the model path to the parent model element invoking this method. @param matchContext an instance of MatchContext class holding the data context to match against. @return the matchElement or None if model did not match.""" currentPath = "%s/%s" % (path, self.elementId) startData = matchContext.matchData matches = [] for childElement in self.children: childMatch = childElement.getMatchElement(currentPath, matchContext) if childMatch is None: matchContext.matchData = startData return None matches += [childMatch] return MatchElement(currentPath, \ startData[:len(startData)-len(matchContext.matchData)], None, matches) deb-build/root/usr/lib/logdata-anomaly-miner/aminer/parsing/DecimalFloatValueModelElement.py0000600000000000000000000001015013351151755031225 0ustar rootroot"""This module defines an model element for decimal number parsing as float.""" from aminer.parsing import ModelElementInterface from aminer.parsing.MatchElement import MatchElement class DecimalFloatValueModelElement(ModelElementInterface): """This class defines a model to parse decimal values with optional signum, padding or exponent. With padding, the signum has to be found before the padding characters.""" SIGN_TYPE_NONE = 'none' SIGN_TYPE_OPTIONAL = 'optional' SIGN_TYPE_MANDATORY = 'mandatory' PAD_TYPE_NONE = 'none' PAD_TYPE_ZERO = 'zero' PAD_TYPE_BLANK = 'blank' EXP_TYPE_NONE = 'none' EXP_TYPE_OPTIONAL = 'optional' EXP_TYPE_MANDATORY = 'mandatory' def __init__( self, pathId, valueSignType=SIGN_TYPE_NONE, valuePadType=PAD_TYPE_NONE, exponentType=EXP_TYPE_NONE): self.pathId = pathId self.startCharacters = None if valueSignType == DecimalFloatValueModelElement.SIGN_TYPE_NONE: self.startCharacters = b'0123456789' elif valueSignType == DecimalFloatValueModelElement.SIGN_TYPE_OPTIONAL: self.startCharacters = b'-0123456789' elif valueSignType == DecimalFloatValueModelElement.SIGN_TYPE_MANDATORY: self.startCharacters = b'+-' else: raise Exception('Invalid valueSignType "%s"' % valueSignType) self.padCharacters = b'' if valuePadType == DecimalFloatValueModelElement.PAD_TYPE_NONE: pass elif valuePadType == DecimalFloatValueModelElement.PAD_TYPE_ZERO: self.padCharacters = b'0' elif valuePadType == DecimalFloatValueModelElement.PAD_TYPE_BLANK: self.padCharacters = b' ' else: raise Exception('Invalid valuePadType "%s"' % valueSignType) self.valuePadType = valuePadType if exponentType not in [ DecimalFloatValueModelElement.EXP_TYPE_NONE, DecimalFloatValueModelElement.EXP_TYPE_OPTIONAL, DecimalFloatValueModelElement.EXP_TYPE_MANDATORY]: raise Exception('Invalid exponentType "%s"' % exponentType) self.exponentType = exponentType def getChildElements(self): """Get all possible child model elements of this element. @return empty list as there are no children of this element.""" return None def getMatchElement(self, path, matchContext): """Find the maximum number of bytes forming a decimal number according to the parameters specified. @return a match when at least one byte being a digit was found""" data = matchContext.matchData allowedCharacters = self.startCharacters if not data or (data[0] not in allowedCharacters): return None matchLen = 1 allowedCharacters = self.padCharacters for testByte in data[matchLen:]: if testByte not in allowedCharacters: break matchLen += 1 numStartPos = matchLen allowedCharacters = b'0123456789' for testByte in data[matchLen:]: if testByte not in allowedCharacters: break matchLen += 1 if matchLen == 1: if data[0] not in b'0123456789': return None elif numStartPos == matchLen: return None # See if there is decimal part after decimal point. if (matchLen < len(data)) and (data[matchLen] == '.'): matchLen += 1 postPointStart = matchLen for testByte in data[matchLen:]: if testByte not in b'0123456789': break matchLen += 1 if matchLen == postPointStart: # There has to be at least one digit after the decimal point. return None # See if there could be any exponent following the number. if ((self.exponentType != DecimalFloatValueModelElement.EXP_TYPE_NONE) and (matchLen+1 < len(data)) and (data[matchLen] in b'eE')): matchLen += 1 if data[matchLen] in b'+-': matchLen += 1 expNumberStart = matchLen for testByte in data[matchLen:]: if testByte not in b'0123456789': break matchLen += 1 if matchLen == expNumberStart: # No exponent number found. return None matchString = data[:matchLen] matchValue = float(matchString) matchContext.update(matchString) return MatchElement( '%s/%s' % (path, self.pathId), matchString, matchValue, None) deb-build/root/usr/lib/logdata-anomaly-miner/aminer/parsing/HexStringModelElement.py0000600000000000000000000000240113352412453027613 0ustar rootroot"""This module defines a model element that represents a hex string of arbitrary length.""" from aminer.parsing.MatchElement import MatchElement class HexStringModelElement: """This class just tries to strip off as many hex bytes as possible from a given data string.""" def __init__(self, elementId, upperCase=False): self.elementId = elementId if upperCase: self.charStart = ord('A') else: self.charStart = ord('a') def getChildElements(self): """Get all possible child model elements of this element. @return None as there are no children of this element.""" return None def getMatchElement(self, path, matchContext): """Find the maximum number of bytes forming a integer number according to the parameters specified @return a match when at least one byte being a digit was found""" data = matchContext.matchData matchLen = 0 for bVal in data: if ((bVal < 0x30) or (bVal > 0x39)) and ((bVal < self.charStart) or ( bVal-self.charStart > 5)): break matchLen += 1 if matchLen == 0: return None matchString = data[:matchLen] matchContext.update(matchString) return MatchElement("%s/%s" % (path, self.elementId), \ matchString, matchString, None) deb-build/root/usr/lib/logdata-anomaly-miner/aminer/parsing/OptionalMatchModelElement.py0000600000000000000000000000200713354623077030454 0ustar rootroot"""This module defines a model element that is optional.""" from aminer.parsing.MatchElement import MatchElement class OptionalMatchModelElement: """This class defines a model element tries to match against a given model element and if that fails returns a zero length match anyway.""" def __init__(self, elementId, optionalElement): self.elementId = elementId self.optionalElement = optionalElement def getChildElements(self): """Return all optional elements.""" return [self.optionalElement] def getMatchElement(self, path, matchContext): """@return the embedded child match or an empty match.""" currentPath = "%s/%s" % (path, self.elementId) startData = matchContext.matchData match = self.optionalElement.getMatchElement(currentPath, matchContext) if match is None: return MatchElement("%s/%s" % (path, self.elementId), \ '', None, None) return MatchElement(currentPath, \ startData[:len(startData)-len(matchContext.matchData)], None, [match]) deb-build/root/usr/lib/logdata-anomaly-miner/aminer/parsing/DelimitedDataModelElement.py0000600000000000000000000000204213350456001030366 0ustar rootroot"""This module defines a model element that takes any string up to a specific delimiter string.""" from aminer.parsing.MatchElement import MatchElement class DelimitedDataModelElement: """Find a string delimited by given delimiter string, possibly a match of zero byte length""" def __init__(self, elementId, delimiter): self.elementId = elementId self.delimiter = delimiter def getChildElements(self): """Get all possible child model elements of this element. @return None as there are no children of this element.""" return None def getMatchElement(self, path, matchContext): """Find the maximum number of bytes before encountering the delimiter. @return a match when at least one byte was found but not the delimiter itself.""" data = matchContext.matchData matchLen = data.find(self.delimiter) if matchLen < 0: return None matchData = data[:matchLen] matchContext.update(matchData) return MatchElement("%s/%s" % (path, self.elementId), \ matchData, matchData, None) deb-build/root/usr/lib/logdata-anomaly-miner/aminer/parsing/FixedWordlistDataModelElement.py0000600000000000000000000000353413350457331031273 0ustar rootroot"""This module defines a model element to detect fixed strings from a list of words.""" from aminer.parsing import ModelElementInterface from aminer.parsing.MatchElement import MatchElement class FixedWordlistDataModelElement(ModelElementInterface): """This class defines a model element to detect fixed strings from a list of words. The match will return the position of the word in the search list, thus the sorting of the list is important. Apart from that, the wordlist must not contain any words, that are identical to the beginning of words later in the list. In that case, the longer match could never be detected.""" def __init__(self, pathId, wordlist): """Create the model element. @param wordlist the list of words to search for. If it does not fulfill the sorting criteria mentioned in the class documentation, an Exception will be raised.""" self.pathId = pathId self.wordlist = wordlist for testPos, refWord in enumerate(wordlist): for testWord in wordlist[testPos+1:]: if testWord.startswith(refWord): raise Exception( 'Word %s would be shadowed by word %s at lower position' % ( repr(testWord), repr(refWord))) def getChildElements(self): """Get all possible child model elements of this element. @return None as there are no children of this element.""" return None def getMatchElement(self, path, matchContext): """@return None when there is no match, MatchElement otherwise.""" data = matchContext.matchData matchData = None wordPos = 0 for word in self.wordlist: if data.startswith(word): matchData = word break wordPos += 1 if matchData is None: return None matchContext.update(matchData) return MatchElement( "%s/%s" % (path, self.pathId), matchData, wordPos, None) deb-build/root/usr/lib/logdata-anomaly-miner/aminer/parsing/DateTimeModelElement.py0000600000000000000000000002704413352676105027414 0ustar rootroot"""This module contains a datetime parser and helper classes for parsing.""" import datetime import sys import time from aminer.parsing import ModelElementInterface from aminer.parsing.MatchElement import MatchElement class DateTimeModelElement(ModelElementInterface): """This class defines a model element to parse date or datetime values. The element is similar to the strptime function but does not use it due to the numerous problems associated with it, e.g. no leap year support for semiqualified years, no %s (seconds since epoch) format in Python strptime, no %f support in libc strptime, no support to determine the length of the parsed string.""" def __init__( self, pathId, dateFormat, timeZone=None, textLocale=None, startYear=None, maxTimeJumpSeconds=86400): """Create a DateTimeModelElement to parse dates using a custom, timezone and locale-aware implementation similar to strptime. @param dateFormat, is a byte string that represents the date format for parsing, see Python strptime specification for available formats. Supported format specifiers are: * %b: month name in current locale * %d: day in month, can be space or zero padded when followed by separator or at end of string. * %f: fraction of seconds (the digits after the the '.') * %H: hours from 00 to 23 * %M: minutes * %m: two digit month number * %S: seconds * %s: seconds since the epoch (1970-01-01) * %Y: 4 digit year number Common formats are: * '%b %d %H:%M:%S' e.g. for 'Nov 19 05:08:43' @param timeZone the timezone for parsing the values or UTC when None. @param textLocale the locale to use for parsing the day, month names or None to use the default locale. Locale changing is not yet implemented, use locale.setlocale() in global configuration. @param startYear when parsing date records without any year information, assume this is the year of the first value parsed. @param maxTimeJumpSeconds for detection of year wraps with date formats missing year information, also the current time of values has to be tracked. This value defines the window within that the time may jump between two matches. When not within that window, the value is still parsed, corrected to the most likely value but does not change the detection year.""" self.pathId = pathId self.timeZone = timeZone # Make sure that dateFormat is valid and extract the relevant # parts from it. self.formatHasYearFlag = False self.dateFormatParts = None self.scanDateFormat(dateFormat) self.startYear = startYear if (not self.formatHasYearFlag) and (startYear is None): self.startYear = time.gmtime(None).tm_year self.maxTimeJumpSeconds = maxTimeJumpSeconds self.lastParsedSeconds = 0 self.epochStartTime = datetime.datetime.fromtimestamp(0, self.timeZone) def scanDateFormat(self, dateFormat): """Scan the date format.""" if self.dateFormatParts != None: raise Exception('Cannot rescan date format after initialization') dateFormatParts = [] dateFormatTypeSet = set() scanPos = 0 while scanPos < len(dateFormat): nextParamPos = dateFormat.find(b'%', scanPos) if nextParamPos < 0: nextParamPos = len(dateFormat) newElement = None if nextParamPos != scanPos: newElement = dateFormat[scanPos:nextParamPos] else: paramTypeCode = dateFormat[nextParamPos+1:nextParamPos+2] nextParamPos = scanPos+2 if paramTypeCode == b'%': newElement = b'%' elif paramTypeCode == b'b': import calendar nameDict = {} for monthPos in range(1, 13): nameDict[calendar.month_name[monthPos][:3].encode()] = monthPos newElement = (1, 3, nameDict) elif paramTypeCode == b'd': newElement = (2, 2, int) elif paramTypeCode == b'f': newElement = (6, -1, DateTimeModelElement.parseFraction) elif paramTypeCode == b'H': newElement = (3, 2, int) elif paramTypeCode == b'M': newElement = (4, 2, int) elif paramTypeCode == b'm': newElement = (1, 2, int) elif paramTypeCode == b'S': newElement = (5, 2, int) elif paramTypeCode == b's': newElement = (7, -1, int) elif paramTypeCode == b'Y': newElement = (0, 4, int) else: raise Exception('Unknown dateformat specifier %s' % repr(paramTypeCode)) if isinstance(newElement, bytes): if dateFormatParts and (isinstance(dateFormatParts[-1], bytes)): dateFormatParts[-1] += newElement else: dateFormatParts.append(newElement) else: if newElement[0] in dateFormatTypeSet: raise Exception('Multiple format specifiers for type %d' % newElement[0]) dateFormatTypeSet.add(newElement[0]) dateFormatParts.append(newElement) scanPos = nextParamPos if (7 in dateFormatTypeSet) and (not dateFormatTypeSet.isdisjoint(set(range(0, 6)))): raise Exception('Cannot use %%s (seconds since epoch) with other non-second format types') self.dateFormatParts = dateFormatParts def getChildElements(self): """Get all possible child model elements of this element. @return None as no children are allowed.""" return None def getMatchElement(self, path, matchContext): """Try to find a match on given data for this model element and all its children. When a match is found, the matchContext is updated accordingly. @return None when there is no match, MatchElement otherwise. The matchObject returned is a tuple containing the datetime object and the seconds since 1970""" parsePos = 0 # Year, month, day, hour, minute, second, fraction, gmt-seconds: result = [None, None, None, None, None, None, None, None] for partPos in range(0, len(self.dateFormatParts)): dateFormatPart = self.dateFormatParts[partPos] if isinstance(dateFormatPart, bytes): if not matchContext.matchData[parsePos:].startswith(dateFormatPart): return None parsePos += len(dateFormatPart) continue nextLength = dateFormatPart[1] nextData = None if nextLength < 0: # No length given: this is only valid for integer fields or fields # followed by a separator string. if (partPos+1) < len(self.dateFormatParts): nextPart = self.dateFormatParts[partPos+1] if isinstance(nextPart, bytes): endPos = matchContext.matchData.find(nextPart, parsePos) if endPos < 0: return None nextLength = endPos-parsePos if nextLength < 0: # No separator, so get the number of decimal digits. nextLength = 0 for digitChar in matchContext.matchData[parsePos:]: digitOrd = ord(digitChar) if (digitOrd < 0x30) or (digitOrd > 0x39): break nextLength += 1 if nextLength == 0: return None nextData = matchContext.matchData[parsePos:parsePos+nextLength] else: nextData = matchContext.matchData[parsePos:parsePos+nextLength] if len(nextData) != nextLength: return None parsePos += nextLength transformFunction = dateFormatPart[2] if isinstance(transformFunction, dict): value = None try: value = transformFunction.get(nextData, None) except ValueError: pass if value is None: return None result[dateFormatPart[0]] = value else: try: result[dateFormatPart[0]] = transformFunction(nextData) except: # Parsing failed, most likely due to wrong format. return None dateStr = matchContext.matchData[:parsePos] # Now combine the values and build the final value. parsedDateTime = None totalSeconds = result[7] if totalSeconds != None: if result[6] != None: totalSeconds += result[6] # For epoch second formats, the datetime value usually is not # important. So stay with parsedDateTime to none. else: if not self.formatHasYearFlag: result[0] = self.startYear microseconds = 0 if result[6] != None: microseconds = int(result[6]*1000000) try: parsedDateTime = datetime.datetime( result[0], result[1], result[2], result[3], result[4], result[5], microseconds, self.timeZone) except: # The values did not form a valid datetime object, e.g. when the # day of month is out of range. The rare case where dates without # year are parsed and the last parsed timestamp was from the previous # non-leap year but the current timestamp is it, is ignored. Values # that sparse and without a year number are very likely to result # in invalid data anyway. return None # Avoid timedelta.total_seconds(), not supported in Python 2.6. delta = parsedDateTime-self.epochStartTime totalSeconds = (delta.days*86400+delta.seconds) # See if this is change from one year to next. if not self.formatHasYearFlag: if self.lastParsedSeconds == 0: # There cannot be a wraparound if we do not know any previous # time values yet. self.lastParsedSeconds = totalSeconds else: delta = self.lastParsedSeconds-totalSeconds if abs(delta) <= self.maxTimeJumpSeconds: self.lastParsedSeconds = totalSeconds else: # This might be the first date value for the next year or one # from the previous. Test both cases and see, what is more likely. nextYearDateTime = parsedDateTime.replace(self.startYear+1) delta = nextYearDateTime-self.epochStartTime nextYearTotalSeconds = (delta.days*86400+delta.seconds) if nextYearTotalSeconds-self.lastParsedSeconds <= self.maxTimeJumpSeconds: self.startYear += 1 parsedDateTime = nextYearDateTime totalSeconds = nextYearTotalSeconds self.lastParsedSeconds = totalSeconds print('WARNING: DateTimeModelElement unqualified ' \ 'timestamp year wraparound detected from %s to %s' % ( datetime.datetime.fromtimestamp( self.lastParsedSeconds, self.timeZone).isoformat(), parsedDateTime.isoformat()), file=sys.stderr) else: lastYearDateTime = parsedDateTime.replace(self.startYear-1) delta = lastYearDateTime-self.epochStartTime lastYearTotalSeconds = (delta.days*86400+delta.seconds) if self.lastParsedSeconds-lastYearTotalSeconds <= self.maxTimeJumpSeconds: parsedDateTime = lastYearDateTime totalSeconds = lastYearTotalSeconds self.lastParsedSeconds = totalSeconds else: # None of both seems correct, just report that. print('WARNING: DateTimeModelElement ' \ 'time inconsistencies parsing %s, expecting value ' \ 'around %d. Check your settings!' % ( repr(dateStr), self.lastParsedSeconds), file=sys.stderr) # We discarded the parsedDateTime microseconds beforehand, use # the full float value here instead of the rounded integer. if result[6] != None: totalSeconds += result[6] matchContext.update(dateStr) return MatchElement( "%s/%s" % (path, self.pathId), dateStr, (parsedDateTime, totalSeconds,), None) @staticmethod def parseFraction(valueStr): """This method is just required to pass it as function pointer to the parsing logic.""" return float('0.'+valueStr) deb-build/root/usr/lib/logdata-anomaly-miner/aminer/parsing/ElementValueBranchModelElement.py0000600000000000000000000000732513352701034031412 0ustar rootroot"""This module defines a model element that allows branches depending on the value of the previous model value.""" from aminer.parsing import ModelElementInterface from aminer.parsing.MatchElement import MatchElement class ElementValueBranchModelElement(ModelElementInterface): """This class defines an element that selects a branch path based on a previous model value.""" def __init__(self, elementId, valueModel, valuePath, branchModelDict, defaultBranch=None): """Create the branch model element. @param valuePath the relative path to the target value from the valueModel element on. When the path does not resolve to a value, this model element will not match. A path value of None indicates, that the match element of the valueModel should be used directly. @param branchModelDict a dictionary to select a branch for a the value identified by valuePath. @param defaultBranch when lookup in branchModelDict fails, use this as default branch or fail when None.""" self.elementId = elementId self.valueModel = valueModel self.valuePath = valuePath self.branchModelDict = branchModelDict self.defaultBranch = defaultBranch def getId(self): """Get the element ID.""" return self.elementId def getChildElements(self): """Get all possible child model elements of this element. If this element implements a branching model element, then not all child element IDs will be found in mathces produced by getMatchElement. @return a list with all children""" allChildren = [self.valueModel]+self.branchModelDict.values() if self.defaultBranch is not None: allChildren.append(self.defaultBranch) return allChildren def getMatchElement(self, path, matchContext): """Try to find a match on given data for the test model and the selected branch. @param path the model path to the parent model element invoking this method. @param matchContext an instance of MatchContext class holding the data context to match against. @return the matchElement or None if the test model did not match, no branch was selected or the branch did not match.""" currentPath = "%s/%s" % (path, self.elementId) startData = matchContext.matchData modelMatch = self.valueModel.getMatchElement(currentPath, matchContext) if modelMatch is None: return None # Now extract the test path value from the modelMatch. From here # on, the matchContext is already modified so we must NEVER just # return but revert the changes in the context first. remainingValuePath = self.valuePath testMatch = modelMatch currentTestPath = testMatch.getPath() while remainingValuePath is not None: nextPartPos = remainingValuePath.find('/') if nextPartPos <= 0: currentTestPath += '/'+remainingValuePath remainingValuePath = None else: currentTestPath += '/'+remainingValuePath[:nextPartPos] remainingValuePath = remainingValuePath[nextPartPos+1:] matchChildren = testMatch.getChildren() testMatch = None if matchChildren is None: break for child in matchChildren: if child.getPath() == currentTestPath: testMatch = child break branchMatch = None if testMatch is not None: branchModel = self.branchModelDict.get(testMatch.getMatchObject().decode(), \ self.defaultBranch) if branchModel is not None: branchMatch = branchModel.getMatchElement(currentPath, matchContext) if branchMatch is None: matchContext.matchData = startData return None return MatchElement(currentPath, \ startData[:len(startData)-len(matchContext.matchData)], \ None, [modelMatch, branchMatch]) deb-build/root/usr/lib/logdata-anomaly-miner/aminer/AMinerConfig.py0000600000000000000000000000244413354643774024270 0ustar rootroot"""This module collects static configuration item keys and configuration loading and handling functions.""" import os import sys import importlib from importlib import util KEY_LOG_SOURCES_LIST = 'LogResourceList' KEY_AMINER_USER = 'AMinerUser' KEY_AMINER_GROUP = 'AMinerGroup' KEY_ANALYSIS_CONFIG_FILE = 'AnalysisConfigFile' KEY_PERSISTENCE_DIR = 'Core.PersistenceDir' DEFAULT_PERSISTENCE_DIR = '/var/lib/aminer' KEY_REMOTE_CONTROL_SOCKET_PATH = 'RemoteControlSocket' def loadConfig(configFileName): """Load the configuration file using the import module.""" aminerConfig = None try: spec = importlib.util.spec_from_file_location('aminerConfig', configFileName) aminerConfig = importlib.util.module_from_spec(spec) spec.loader.exec_module(aminerConfig) except: print('Failed to load configuration from %s' % configFileName, file=sys.stderr) exceptionInfo = sys.exc_info() raise Exception(exceptionInfo[0], exceptionInfo[1], exceptionInfo[2]) return aminerConfig def buildPersistenceFileName(aminerConfig, *args): """Build the full persistency file name from persistency directory configuration and path parts.""" persistenceDirName = aminerConfig.configProperties.get( KEY_PERSISTENCE_DIR, DEFAULT_PERSISTENCE_DIR) return os.path.join(persistenceDirName, *args) deb-build/root/usr/lib/logdata-anomaly-miner/aminer/input/0000755000000000000000000000000013354644133022546 5ustar rootrootdeb-build/root/usr/lib/logdata-anomaly-miner/aminer/input/SimpleByteStreamLineAtomizerFactory.py0000600000000000000000000000250313350426347032215 0ustar rootroot"""This module defines a factory for instanciating line atomizers.""" from aminer.input import AtomizerFactory from aminer.input.ByteStreamLineAtomizer import ByteStreamLineAtomizer class SimpleByteStreamLineAtomizerFactory(AtomizerFactory): """This factory just creates the same atomizer for each new resource. All parsed and unparsed atoms are delivered via two lists of handlers.""" def __init__(self, parsingModel, atomHandlerList, eventHandlerList, defaultTimestampPath=None): """Create the factory to forward data and events to the given lists for each newly created atomizer. @param defaultTimestampPath if not None, the value of this timestamp field is extracted from parsed atoms and stored as default timestamp for that atom.""" self.parsingModel = parsingModel self.atomHandlerList = atomHandlerList self.eventHandlerList = eventHandlerList self.defaultTimestampPath = defaultTimestampPath def getAtomizerForResource(self, resourceName): """Get an atomizer for a given resource. @param resourceName the resource name for atomizer selection is ignored in this type of factory. @return a StreamAtomizer object""" return ByteStreamLineAtomizer(self.parsingModel, self.atomHandlerList, \ self.eventHandlerList, 1 << 16, self.defaultTimestampPath) deb-build/root/usr/lib/logdata-anomaly-miner/aminer/input/ByteStreamLineAtomizer.py0000600000000000000000000001332113354644133027512 0ustar rootroot"""This module provides support for splitting a data stream into atoms, perform parsing and forward the results.""" from aminer.input import LogAtom from aminer.input import StreamAtomizer from aminer.parsing import MatchContext from aminer.parsing import ParserMatch class ByteStreamLineAtomizer(StreamAtomizer): """This atomizer consumes binary data from a stream to break it into lines, removing the line separator at the end. With a parsing model, it will also perform line parsing. Failures in atomizing or parsing will cause events to be generated and sent to event handler. Data will be consumed only when there was no downstream handler registered (the data will be discarded in that case) or when at least one downstream consumed the data.""" def __init__( self, parsingModel, atomHandlerList, eventHandlerList, maxLineLength, defaultTimestampPath): """Create the atomizer. @param eventHandlerList when not None, send events to those handlers. The list might be empty at invocation and populated later on. @param maxLineLength the maximal line length including the final line separator.""" self.parsingModel = parsingModel self.atomHandlerList = atomHandlerList self.eventHandlerList = eventHandlerList self.maxLineLength = maxLineLength self.defaultTimestampPath = defaultTimestampPath self.inOverlongLineFlag = False # If consuming of data was already attempted but the downstream # handlers refused to handle it, keep the data and the parsed # object to avoid expensive duplicate parsing operation. The data # does not include the line separators any more. self.lastUnconsumedLogAtom = None def consumeData(self, streamData, endOfStreamFlag=False): """Consume data from the underlying stream for atomizing. @return the number of consumed bytes, 0 if the atomizer would need more data for a complete atom or -1 when no data was consumed at the moment but data might be consumed later on.""" # Loop until as much streamData as possible was processed and # then return a result. The correct processing of endOfStreamFlag # is tricky: by default, even when all data was processed, do # one more iteration to handle also the flag. consumedLength = 0 while True: if self.lastUnconsumedLogAtom != None: # Keep length before dispatching: dispatch will reset the field. dataLength = len(self.lastUnconsumedLogAtom.rawData) if self.dispatchAtom(self.lastUnconsumedLogAtom): consumedLength += dataLength+1 continue # Nothing consumed, tell upstream to wait if appropriate. if consumedLength == 0: consumedLength = -1 break lineEnd = streamData.find(b'\n', consumedLength) if self.inOverlongLineFlag: if lineEnd < 0: consumedLength = len(streamData) if endOfStreamFlag: self.dispatchEvent('Overlong line terminated by end of stream', streamData) self.inOverlongLineFlag = False break consumedLength = lineEnd+1 self.inOverlongLineFlag = False continue # This is the valid start of a normal/incomplete/overlong line. if lineEnd < 0: tailLength = len(streamData)-consumedLength if tailLength > self.maxLineLength: self.dispatchEvent( 'Start of overlong line detected', streamData[consumedLength:]) self.inOverlongLineFlag = True consumedLength = len(streamData) # Stay in loop to handle also endOfStreamFlag! continue if endOfStreamFlag and (tailLength != 0): self.dispatchEvent('Incomplete last line', streamData[consumedLength:]) consumedLength = len(streamData) break # This is at least a complete/overlong line. lineLength = lineEnd+1-consumedLength if lineLength > self.maxLineLength: self.dispatchEvent('Overlong line detected', streamData[consumedLength:lineEnd]) consumedLength = lineEnd+1 continue # This is a normal line. lineData = streamData[consumedLength:lineEnd] logAtom = LogAtom.LogAtom(lineData, None, None, self) if self.parsingModel != None: matchContext = MatchContext(lineData) matchElement = self.parsingModel.getMatchElement('', matchContext) if (matchElement != None) and not matchContext.matchData: logAtom.parserMatch = ParserMatch(matchElement) if self.defaultTimestampPath != None: tsMatch = logAtom.parserMatch.getMatchDictionary().get(self.defaultTimestampPath, None) if tsMatch != None: logAtom.setTimestamp(tsMatch.matchObject[1]) if self.dispatchAtom(logAtom): consumedLength = lineEnd+1 continue if consumedLength == 0: # Downstream did not want the data, so tell upstream to block # for a while. consumedLength = -1 break return consumedLength def dispatchAtom(self, logAtom): """Dispatch the data using the appropriate handlers. Also clean or set lastUnconsumed fields depending on outcome of dispatching.""" wasConsumedFlag = False if not self.atomHandlerList: wasConsumedFlag = True else: for handler in self.atomHandlerList: if handler.receiveAtom(logAtom): wasConsumedFlag = True if wasConsumedFlag: self.lastUnconsumedLogAtom = None else: self.lastUnconsumedLogAtom = logAtom return wasConsumedFlag def dispatchEvent(self, message, lineData): """Dispatch an event with given message and line data to all event handlers.""" if self.eventHandlerList is None: return for handler in self.eventHandlerList: handler.receiveEvent( 'Input.%s' % self.__class__.__name__, message, [lineData], None, self) deb-build/root/usr/lib/logdata-anomaly-miner/aminer/input/__init__.py0000600000000000000000000000651613352705615024660 0ustar rootroot"""This file contains interface definition useful implemented by classes in this directory and for use from code outside this directory. All classes are defined in separate files, only the namespace references are added here to simplify the code.""" class AtomizerFactory(object): """This is the common interface of all factories to create atomizers for new data sources and integrate them into the downstream processing pipeline.""" def getAtomizerForResource(self, resourceName): """Get an atomizer for a given resource. @return a StreamAtomizer object""" raise Exception('Interface method called') class StreamAtomizer(object): """This is the common interface of all binary stream atomizers. Atomizers in general should be good detecting and reporting malformed atoms but continue to function by attempting error correction or resynchronization with the stream after the bad atom. This type of atomizer also signals a stream source when the stream data cannot be handled at the moment to throttle reading of the underlying stream.""" def consumeData(self, streamData, endOfStreamFlag=False): """Consume data from the underlying stream for atomizing. Data should only be consumed after splitting of an atom. The caller has to keep unconsumed data till the next invocation. @param streamData the data offered to be consumed or zero length data when endOfStreamFlag is True (see below). @param endOfStreamFlag this flag is used to indicate, that the streamData offered is the last from the input stream. If the streamData does not form a complete atom, no rollover is expected or rollover would have honoured the atom boundaries, then the StreamAtomizer should treat that as an error. With rollover, consuming of the stream end data will signal the invoker to continue with data from next stream. When end of stream was reached but invoker has no streamData to send, it will invoke this method with zero-length data, which has to be consumed with a zero-length reply. @return the number of consumed bytes, 0 if the atomizer would need more data for a complete atom or -1 when no data was consumed at the moment but data might be consumed later on. The only situation where 0 is not an allowed return value is when endOfStreamFlag is set and streamData not empty.""" raise Exception('Interface method called') class AtomHandlerInterface(object): """This is the common interface of all handlers suitable for receiving log atoms.""" def receiveAtom(self, logAtom): """Receive a log atom from a source. @param atomData binary raw atom data @return True if this handler was really able to handle and process the atom. Depending on this information, the caller may decide if it makes sense passing the atom also to other handlers or to retry later. This behaviour has to be documented at each source implementation sending LogAtoms.""" raise Exception('Interface method called') from aminer.input.ByteStreamLineAtomizer import ByteStreamLineAtomizer from aminer.input.LogAtom import LogAtom from aminer.input.SimpleByteStreamLineAtomizerFactory import SimpleByteStreamLineAtomizerFactory from aminer.input.SimpleMultisourceAtomSync import SimpleMultisourceAtomSync from aminer.input.SimpleUnparsedAtomHandler import SimpleUnparsedAtomHandler deb-build/root/usr/lib/logdata-anomaly-miner/aminer/input/LogAtom.py0000600000000000000000000000215313346670035024454 0ustar rootroot"""This module defines a log atom.""" class LogAtom: """This class defines a log atom used for parsing.""" def __init__(self, rawData, parserMatch, atomTime, source): """Create a log atom from scratch.""" self.rawData = rawData self.parserMatch = parserMatch self.atomTime = atomTime self.source = source def getParserMatch(self): """Get the parser match associated with this LogAtom. @return the match or None for (yet) unparsed LogAtoms.""" return self.parserMatch def setTimestamp(self, timestamp): """Update the default timestamp value associated with this LogAtom. The method can be called more than once to allow correction of fine-adjusting of timestamps by analysis filters after initial parsing procedure.""" self.atomTime = timestamp def getTimestamp(self): """Get the default timestamp value for this LogAtom. @return the timestamp as number of seconds since 1970.""" return self.atomTime def isParsed(self): """Check if this atom is parsed by checking if parserMatch object is attached.""" return self.parserMatch is not None deb-build/root/usr/lib/logdata-anomaly-miner/aminer/input/SimpleMultisourceAtomSync.py0000600000000000000000000000772113330315305030250 0ustar rootroot"""This module defines a handler that synchronizes different streams.""" import time from aminer.input import AtomHandlerInterface class SimpleMultisourceAtomSync(AtomHandlerInterface): """This class synchronizes different atom streams by forwarding the atoms only from the source delivering the oldest ones. This is done using the atom timestamp value. Atoms without a timestamp are forwarded immediately. When no atoms are received from a source for some time, no more atoms are expected from that source. This will allow forwarding of blocked atoms from other sources afterwards.""" def __init__(self, atomHandlerList, syncWaitTime=5): """@param atomHandlerList forward atoms to all handlers in the list, no matter if the logAtom was handled or not. @return true as soon as forwarding was attempted, no matter if one downstream handler really consumed the atom.""" self.atomHandlerList = atomHandlerList self.syncWaitTime = syncWaitTime # Last forwarded log atom timestamp self.lastForwardTimestamp = 0 # The dictionary containing the currently active sources. Each # entry is a list with two values: # * the largest timestamp of a LogAtom forwarded from this source # so far. # * the current LogAtom pending to be forwarded or None if all # atoms were forwarded self.sourcesDict = {} # The local clock time when blocking was enabled for any source. # Start in blocking mode to have chance to see atom from each # available source before forwarding the first ones. self.blockingEndTime = time.time()+self.syncWaitTime self.blockingSources = 0 self.timestampsUnsortedFlag = False def receiveAtom(self, logAtom): timestamp = logAtom.atomTime if timestamp is None: self.forwardAtom(logAtom) return True sourceInfo = self.sourcesDict.get(logAtom.source, None) if sourceInfo is None: sourceInfo = [timestamp, logAtom] self.sourcesDict[logAtom.source] = sourceInfo else: if timestamp < sourceInfo[0]: # Atoms not sorted, not our problem. Forward it immediately. self.timestampsUnsortedFlag = True self.forwardAtom(logAtom) return True if sourceInfo[1] is None: sourceInfo[1] = logAtom # Source information with the oldest pending atom. oldestSourceInfo = None hasIdleSourcesFlag = False for sourceInfo in self.sourcesDict.values(): if sourceInfo[1] is None: hasIdleSourcesFlag = True continue if oldestSourceInfo is None: oldestSourceInfo = sourceInfo continue if sourceInfo[1].atomTime < oldestSourceInfo[1].atomTime: oldestSourceInfo = sourceInfo if self.blockingEndTime != 0: # We cannot do anything while blocking to catch more atoms. if self.blockingEndTime > time.time(): return False # Blocking has expired, cleanup the blockers. expiredSources = [] for source, sourceInfo in self.sourcesDict.items(): if sourceInfo[1] is None: expiredSources.append(source) for source in expiredSources: del self.sourcesDict[source] self.blockingEndTime = 0 self.blockingSources = 0 hasIdleSourcesFlag = False if hasIdleSourcesFlag: # We cannot let this item pass. Before entering blocking state, # give all other sources also the chance to submit an atom. if self.blockingSources == len(self.sourcesDict): self.blockingEndTime = time.time()+self.syncWaitTime else: self.blockingSources += 1 return False # No idle sources, just forward atom from the oldest one if that # is really the currently active source. if logAtom != oldestSourceInfo[1]: return False self.forwardAtom(logAtom) oldestSourceInfo[1] = None if timestamp > oldestSourceInfo[0]: oldestSourceInfo[0] = timestamp self.blockingSources = 0 return True def forwardAtom(self, logAtom): """Forward atom to all atom handlers.""" for handler in self.atomHandlerList: handler.receiveAtom(logAtom) deb-build/root/usr/lib/logdata-anomaly-miner/aminer/input/SimpleUnparsedAtomHandler.py0000600000000000000000000000130113330315706030150 0ustar rootroot"""This module defines a handler that forwards unparsed atoms to the event handlers.""" from aminer.input import AtomHandlerInterface class SimpleUnparsedAtomHandler(AtomHandlerInterface): """Handlers of this class will just forward received unparsed atoms to the registered event handlers.""" def __init__(self, eventHandlers): self.eventHandlers = eventHandlers def receiveAtom(self, logAtom): """Receive an unparsed atom to create events for each.""" if logAtom.isParsed(): return False for listener in self.eventHandlers: listener.receiveEvent('Input.UnparsedAtomHandler', \ 'Unparsed atom received', [logAtom.rawData], logAtom, self) return True deb-build/root/usr/lib/logdata-anomaly-miner/aminer/input/LogStream.py0000600000000000000000000004152013352411140024773 0ustar rootroot"""This module contains interfaces and classes for logdata resource handling and combining them to resumable virtual LogStream objects.""" import base64 import errno import hashlib import os import socket import stat import sys from aminer.util import SecureOSFunctions from aminer.util import encodeByteStringAsString class LogDataResource(object): """This is the superinterface of each logdata resource monitored by AMiner. The interface is designed in a way, that instances of same subclass can be used both on AMiner parent process side for keeping track of the resources and forwarding the file descriptors to the child, but also on child side for the same purpose. The only difference is, that on child side, the stream reading and read continuation features are used also. After creation on child side, this is the sole place for reading and closing the streams. An external process may use the file descriptor only to wait for input via select.""" def __init__( self, logResourceName, logStreamFd, defaultBufferSize=1 << 16, repositioningData=None): """Create a new LogDataResource. Object creation must not touch the logStreamFd or read any data, unless repositioningData was given. In the later case, the stream has to support seek operation to reread data. @param logResourceName the unique encoded name of this source as byte array. @param logStreamFd the stream for reading the resource or -1 if not yet opened. @param repositioningData if not None, attemt to position the the stream using the given data.""" raise Exception('Interface method called') def open(self, reopenFlag=False): """Open the given resource. @param reopenFlag when True, attempt to reopen the same resource and check if it differs from the previously opened one. @raise Exception if valid logStreamFd was already provided, is still open and reopenFlag is False. @raise OSError when opening failed with unexpected error. @return True if the resource was really opened or False if opening was not yet possible but should be attempted again.""" raise Exception('Interface method called') def getResourceName(self): """Get the name of this log resoruce.""" raise Exception('Interface method called') def getFileDescriptor(self): """Get the file descriptor of this open resource.""" raise Exception('Interface method called') def fillBuffer(self): """Fill the buffer data of this resource. The repositioning information is not updated, updatePosition() has to be used. @return the number of bytes read or -1 on error or end.""" raise Exception('Interface method called') def updatePosition(self, length): """Update the positioning information and discard the buffer data afterwards.""" raise Exception('Interface method called') def getRepositioningData(self): """Get the data for repositioning the stream. The returned structure has to be JSON serializable.""" raise Exception('Interface method called') def close(self): """Close this logdata resource. Data access methods will not work any more afterwards.""" raise Exception('Interface method called') class FileLogDataResource(LogDataResource): """This class defines a single log data resource using an underlying file accessible via the file descriptor. The characteristics of this type of resource is, that reopening and repositioning of the stream has to be possible.""" def __init__( self, logResourceName, logStreamFd, defaultBufferSize=1 << 16, repositioningData=None): """Create a new file type resource. @param logResourceName the unique name of this source as bytes array, has to start with "file://" before the file path. @param logStreamFd the stream for reading the resource or -1 if not yet opened. @param repositioningData if not None, attemt to position the the stream using the given data.""" if not logResourceName.startswith(b'file://'): raise Exception('Attempting to create different type resource as file') self.logResourceName = logResourceName self.logFileFd = logStreamFd self.statData = None if self.logFileFd >= 0: self.statData = os.fstat(logStreamFd) self.buffer = b'' self.defaultBufferSize = defaultBufferSize self.totalConsumedLength = 0 # Create a hash for repositioning. There is no need to be cryptographically # secure here: if upstream can manipulate the content, to provoke # hash collisions, correct positioning would not matter anyway. self.repositioningDigest = hashlib.md5() if (logStreamFd != -1) and (repositioningData != None): if repositioningData[0] != self.statData.st_ino: print('Not attempting to reposition on %s,' \ 'inode number mismatch' % encodeByteStringAsString(self.logResourceName), file=sys.stderr) elif repositioningData[1] > self.statData.st_size: print('Not attempting to reposition on %s,' \ 'file size too small' % encodeByteStringAsString(self.logResourceName), file=sys.stderr) else: hashAlgo = hashlib.md5() length = repositioningData[1] while length != 0: block = None if length < defaultBufferSize: block = os.read(self.logFileFd, length) else: block = os.read(self.logFileFd, defaultBufferSize) if not block: print('Not attempting to reposition ' \ 'on %s, file shrunk while reading' % encodeByteStringAsString(self.logResourceName), file=sys.stderr) break hashAlgo.update(block) length -= len(block) digest = hashAlgo.digest() if length == 0: if digest == base64.b64decode(repositioningData[2]): # Repositioning is OK, keep current digest and length data. self.totalConsumedLength = repositioningData[1] self.repositioningDigest = hashAlgo else: print('Not attempting to reposition ' \ 'on %s, digest changed' % encodeByteStringAsString(self.logResourceName), file=sys.stderr) length = -1 if length != 0: # Repositioning failed, go back to the beginning of the stream. os.lseek(self.logFileFd, 0, os.SEEK_SET) def open(self, reopenFlag=False): """Open the given resource. @param reopenFlag when True, attempt to reopen the same resource and check if it differs from the previously opened one. @raise Exception if valid logStreamFd was already provided, is still open and reopenFlag is False. @raise OSError when opening failed with unexpected error. @return True if the resource was really opened or False if opening was not yet possible but should be attempted again.""" if not reopenFlag and (self.logFileFd != -1): raise Exception('Cannot reopen stream still open when not instructed to do so') logFileFd = -1 statData = None try: logFileFd = SecureOSFunctions.secureOpenFile( self.logResourceName[7:], os.O_RDONLY) statData = os.fstat(logFileFd) except OSError as openOsError: if logFileFd != -1: os.close(logFileFd) if openOsError.errno == errno.ENOENT: return False raise if not stat.S_ISREG(statData.st_mode): os.close(logFileFd) raise Exception('Attempting to open non-regular file %s ' \ 'as file' % encodeByteStringAsString(self.logResourceName)) if (reopenFlag and (self.statData != None) and (statData.st_ino == self.statData.st_ino) and (statData.st_dev == self.statData.st_dev)): # Reopening was requested, but we would reopen the file already # opened, which is of no use. os.close(logFileFd) return False # This is a new file or a successful reopen attempt. self.logFileFd = logFileFd self.statData = statData return True def getResourceName(self): """Get the name of this log resoruce.""" return self.logResourceName def getFileDescriptor(self): """Get the file descriptor of this open resource.""" return self.logFileFd def fillBuffer(self): """Fill the buffer data of this resource. The repositioning information is not updated, updatePosition() has to be used. @return the number of bytes read or -1 on error or end.""" data = os.read(self.logFileFd, self.defaultBufferSize) self.buffer += data return len(data) def updatePosition(self, length): """Update the positioning information and discard the buffer data afterwards.""" self.repositioningDigest.update(self.buffer[:length]) self.totalConsumedLength += length self.buffer = self.buffer[length:] def getRepositioningData(self): """Get the data for repositioning the stream. The returned structure has to be JSON serializable.""" return [ self.statData.st_ino, self.totalConsumedLength, base64.b64encode(self.repositioningDigest.digest())] def close(self): os.close(self.logFileFd) self.logFileFd = -1 class UnixSocketLogDataResource(LogDataResource): """This class defines a single log data resource connecting to a local UNIX socket. The characteristics of this type of resource is, that reopening works only after end of stream of was reached.""" def __init__( self, logResourceName, logStreamFd, defaultBufferSize=1 << 16, repositioningData=None): """Create a new unix socket type resource. @param logResourceName the unique name of this source as byte array, has to start with "unix://" before the file path. @param logStreamFd the stream for reading the resource or -1 if not yet opened. @param repositioningData has to be None for this type of resource.""" if not logResourceName.startswith(b'unix://'): raise Exception('Attempting to create different type resource as unix') self.logResourceName = logResourceName self.logStreamFd = logStreamFd self.buffer = '' self.defaultBufferSize = defaultBufferSize self.totalConsumedLength = 0 def open(self, reopenFlag=False): """Open the given resource. @param reopenFlag when True, attempt to reopen the same resource and check if it differs from the previously opened one. @raise Exception if valid logStreamFd was already provided, is still open and reopenFlag is False. @raise OSError when opening failed with unexpected error. @return True if the resource was really opened or False if opening was not yet possible but should be attempted again.""" if reopenFlag: if self.logStreamFd != -1: return False elif self.logStreamFd != -1: raise Exception('Cannot reopen stream still open when not instructed to do so') logSocket = None try: logSocket = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) logSocket.connect(self.logResourceName[7:]) except socket.error as socketError: if logSocket != None: logSocket.close() if (socketError.errno == errno.ENOENT) or (socketError.errno == errno.ECONNREFUSED): return False # Transform exception to OSError as caller does not expect something # else. raise OSError(socketError[0], socketError[1]) self.logStreamFd = os.dup(logSocket.fileno()) logSocket.close() return True def getResourceName(self): """Get the name of this log resoruce.""" return self.logResourceName def getFileDescriptor(self): """Get the file descriptor of this open resource.""" return self.logStreamFd def fillBuffer(self): """Fill the buffer data of this resource. The repositioning information is not updated, updatePosition() has to be used. @return the number of bytes read or -1 on error or end.""" data = os.read(self.logStreamFd, self.defaultBufferSize) self.buffer += data return len(data) def updatePosition(self, length): """Update the positioning information and discard the buffer data afterwards.""" self.totalConsumedLength += length self.buffer = self.buffer[length:] def getRepositioningData(self): """Get the data for repositioning the stream. The returned structure has to be JSON serializable.""" return None def close(self): os.close(self.logStreamFd) self.logStreamFd = -1 class LogStream(object): """This class defines a continuous stream of logging data from a given source. This class also handles rollover from one file descriptor to a new one.""" def __init__(self, logDataResource, streamAtomizer): """Create a new logstream with an initial logDataResource. @param streamAtomizer the atomizer to forward data to.""" # The resource currently processed. Might also be None when previous # resource was read till end and no rollover to new one had occured. self.logDataResource = logDataResource self.streamAtomizer = streamAtomizer # Last reading state, those are the same as returned by StreamAtomizer # consumeData() method. Start with state 0 (more data required). self.lastConsumeState = 0 self.nextResources = [] def addNextResource(self, nextLogDataResource): """Roll over from one fd to another one pointing to the newer version of the same file. This will also change reading behaviour of current resource to await EOF or stop as soon as first blocking read does not return any data.""" # Just append the resource to the list of next resources. The # next read operation without any input from the primary resource # will pick it up automatically. if self.logDataResource is None: self.logDataResource = nextLogDataResource else: self.nextResources.append(nextLogDataResource) def handleStream(self): """Handle data from this stream by forwarding it to the atomizer. @return the file descriptor to monitoring for new input or -1 if there is no new data or atomizer was not yet ready to consume data. Handling should be tried again later on.""" if self.logDataResource is None: return -1 if self.lastConsumeState == 0: # We need more data, read it. readLength = self.logDataResource.fillBuffer() if readLength == -1: self.lastConsumeState = self.rollOver() return self.lastConsumeState if readLength == 0: if not self.nextResources: # There is just no input, but we still need more since last round # as indicated by lastConsumeState. We would not have been called # if this is a blocking stream, so this must be the preliminiary # end of the file. Tell caller to wait and retry read later on. # Keep lastConsumeState value, consume still wants more data. return -1 # This seems to EOF for rollover. self.lastConsumeState = self.rollOver() return self.lastConsumeState # So there was something read, process it the same way as if data # was already available in previous round. self.lastConsumeState = self.streamAtomizer.consumeData( self.logDataResource.buffer, False) if self.lastConsumeState < 0: return -1 if self.lastConsumeState != 0: self.logDataResource.updatePosition(self.lastConsumeState) return self.logDataResource.getFileDescriptor() def rollOver(self): """End reading of the current resource and switch to the next. This method does not handle lastConsumeState, that has to be done outside. @return state in same manner as handleStream()""" consumedLength = self.streamAtomizer.consumeData( self.logDataResource.buffer, True) if consumedLength < 0: # Consumer is not ready to consume yet. Retry later on. return -1 if consumedLength != len(self.logDataResource.buffer): if consumedLength != 0: # Some data consumed, unclear why not all when already at end # of stream. Retry again immediately to find out why. self.logDataResource.updatePosition(consumedLength) return self.logDataResource.getFileDescriptor() # This is a clear protocol violation (see StreamAtomizer documentaion): # When at EOF, 0 is no valid return value. print('FATAL: Procotol violation by %s detected, ' \ 'flushing data' % self.streamAtomizer.__class__.__name__, file=sys.stderr) consumedLength = len(self.logDataResource.buffer) # Everything consumed, so now ready for rollover. self.logDataResource.updatePosition(consumedLength) self.logDataResource.close() if not self.nextResources: self.logDataResource = None return -1 self.logDataResource = self.nextResources[0] del self.nextResources[0] return self.logDataResource.getFileDescriptor() def getCurrentFd(self): """Get the file descriptor for reading the currently active logdata resource.""" if self.logDataResource is None: return -1 return self.logDataResource.getFileDescriptor() def getRepositioningData(self): """Get the respositioning information from the currently active underlying logdata resource.""" if self.logDataResource is None: return None return self.logDataResource.getRepositioningData() deb-build/root/usr/lib/logdata-anomaly-miner/aminer/__init__.py0000600000000000000000000000000013330037067023472 0ustar rootrootdeb-build/root/usr/lib/logdata-anomaly-miner/aminer/analysis/0000755000000000000000000000000013354634416023235 5ustar rootrootdeb-build/root/usr/lib/logdata-anomaly-miner/aminer/analysis/TimestampCorrectionFilters.py0000600000000000000000000000233113330052114031101 0ustar rootroot"""This file collects various classes useful to filter and correct the timestamp associated with a received parsed atom.""" from aminer.input import AtomHandlerInterface class SimpleMonotonicTimestampAdjust(AtomHandlerInterface): """Handlers of this class compare the timestamp of a newly received atom with the largest timestamp seen so far. When below, the timestamp of this atom is adjusted to the largest value seen, otherwise the largest value seen is updated.""" def __init__(self, subhandlerList, stopWhenHandledFlag=False): self.subhandlerList = subhandlerList self.stopWhenHandledFlag = stopWhenHandledFlag self.latestTimestampSeen = 0 def receiveAtom(self, logAtom): """Pass the atom to the subhandlers. @return false when no subhandler was able to handle the atom.""" timestamp = logAtom.getTimestamp() if timestamp < self.latestTimestampSeen: logAtom.setTimestamp(self.latestTimestampSeen) else: self.latestTimestampSeen = timestamp result = False for handler in self.subhandlerList: handlerResult = handler.receiveAtom(logAtom) if handlerResult is True: result = True if self.stopWhenHandledFlag: break return result deb-build/root/usr/lib/logdata-anomaly-miner/aminer/analysis/__init__.py0000644000000000000000000000247113352711250025340 0ustar rootroot"""This file contains interface definition useful implemented by classes in this directory and for use from code outside this directory. All classes are defined in separate files, only the namespace references are added here to simplify the code. No generic interfaces here yet. Add also the namespace references to classes defined in this directory.""" # AtomFilters.py from aminer.analysis.MatchValueAverageChangeDetector import MatchValueAverageChangeDetector from aminer.analysis.MatchValueStreamWriter import MatchValueStreamWriter from aminer.analysis.MissingMatchPathValueDetector import MissingMatchPathValueDetector from aminer.analysis.MissingMatchPathValueDetector import MissingMatchPathListValueDetector from aminer.analysis.NewMatchPathDetector import NewMatchPathDetector from aminer.analysis.NewMatchPathValueComboDetector import NewMatchPathValueComboDetector from aminer.analysis.NewMatchPathValueDetector import NewMatchPathValueDetector # Rules.py from aminer.analysis.TimeCorrelationDetector import TimeCorrelationDetector from aminer.analysis.TimeCorrelationViolationDetector import TimeCorrelationViolationDetector from aminer.analysis.TimestampsUnsortedDetector import TimestampsUnsortedDetector # TimestampCorrectionFilters.py from aminer.analysis.WhitelistViolationDetector import WhitelistViolationDetector deb-build/root/usr/lib/logdata-anomaly-miner/aminer/analysis/NewMatchPathValueComboDetector.py0000600000000000000000000001174713330040250031560 0ustar rootroot"""This file defines the basic NewMatchPathValueComboDetector detector to extract values from LogAtoms and check, if the value combination was already seen before.""" import time from aminer import AMinerConfig from aminer.AnalysisChild import AnalysisContext from aminer.events import EventSourceInterface from aminer.input import AtomHandlerInterface from aminer.util import PersistencyUtil from aminer.util import TimeTriggeredComponentInterface class NewMatchPathValueComboDetector( AtomHandlerInterface, TimeTriggeredComponentInterface, EventSourceInterface): """This class creates events when a new value combination for a given list of match data pathes were found.""" def __init__( self, aminerConfig, targetPathList, anomalyEventHandlers, peristenceId='Default', allowMissingValuesFlag=False, autoIncludeFlag=False): """Initialize the detector. This will also trigger reading or creation of persistence storage location. @param targetPathList the list of values to extract from each match to create the value combination to be checked. @param allowMissingValuesFlag when set to True, the detector will also use matches, where one of the pathes from targetPathList does not refer to an existing parsed data object. @param autoIncludeFlag when set to True, this detector will report a new value only the first time before including it in the known values set automatically.""" self.targetPathList = targetPathList self.anomalyEventHandlers = anomalyEventHandlers self.allowMissingValuesFlag = allowMissingValuesFlag self.autoIncludeFlag = autoIncludeFlag self.persistenceFileName = AMinerConfig.buildPersistenceFileName( aminerConfig, self.__class__.__name__, peristenceId) self.nextPersistTime = None self.loadPersistencyData() PersistencyUtil.addPersistableComponent(self) def loadPersistencyData(self): """Load the persistency data from storage.""" persistenceData = PersistencyUtil.loadJson(self.persistenceFileName) if persistenceData is None: self.knownValuesSet = set() else: # Set and tuples were stored as list of lists. Transform the inner # lists to tuples to allow hash operation needed by set. self.knownValuesSet = set([tuple(record) for record in persistenceData]) def receiveAtom(self, logAtom): """Receive on parsed atom and the information about the parser match. @return True if a value combination was extracted and checked against the list of known combinations, no matter if the checked values were new or not.""" matchDict = logAtom.parserMatch.getMatchDictionary() matchValueList = [] for targetPath in self.targetPathList: matchElement = matchDict.get(targetPath, None) if matchElement is None: if not self.allowMissingValuesFlag: return False matchValueList.append(None) else: matchValueList.append(matchElement.matchObject) matchValueTuple = tuple(matchValueList) if matchValueTuple not in self.knownValuesSet: if self.autoIncludeFlag: self.knownValuesSet.add(matchValueTuple) if self.nextPersistTime is None: self.nextPersistTime = time.time()+600 for listener in self.anomalyEventHandlers: listener.receiveEvent( 'Analysis.%s' % self.__class__.__name__, 'New value combination for path(es) %s: %s' % ( ', '.join(self.targetPathList), repr(matchValueTuple)), [logAtom.rawData], (logAtom, matchValueTuple), self) return True def getTimeTriggerClass(self): """Get the trigger class this component should be registered for. This trigger is used only for persistency, so real-time triggering is needed.""" return AnalysisContext.TIME_TRIGGER_CLASS_REALTIME def doTimer(self, triggerTime): """Check current ruleset should be persisted""" if self.nextPersistTime is None: return 600 delta = self.nextPersistTime-triggerTime if delta < 0: self.doPersist() delta = 600 return delta def doPersist(self): """Immediately write persistence data to storage.""" PersistencyUtil.storeJson( self.persistenceFileName, list(self.knownValuesSet)) self.nextPersistTime = None def whitelistEvent( self, eventType, sortedLogLines, eventData, whitelistingData): """Whitelist an event generated by this source using the information emitted when generating the event. @return a message with information about whitelisting @throws Exception when whitelisting of this special event using given whitelistingData was not possible.""" if eventType != 'Analysis.%s' % self.__class__.__name__: raise Exception('Event not from this source') if whitelistingData != None: raise Exception('Whitelisting data not understood by this detector') self.knownValuesSet.add(eventData[1]) return 'Whitelisted path(es) %s with %s in %s' % ( ', '.join(self.targetPathList), eventData[1], sortedLogLines[0]) ././@LongLink0000644000000000000000000000014700000000000011605 Lustar rootrootdeb-build/root/usr/lib/logdata-anomaly-miner/aminer/analysis/EnhancedNewMatchPathValueComboDetector.pydeb-build/root/usr/lib/logdata-anomaly-miner/aminer/analysis/EnhancedNewMatchPathValueComboDetector.0000644000000000000000000001255313326562314032660 0ustar rootroot"""This file defines the EnhancedNewMatchPathValueComboDetector detector to extract values from LogAtoms and check, if the value combination was already seen before.""" import time from aminer.analysis.NewMatchPathValueComboDetector import NewMatchPathValueComboDetector from aminer.util import PersistencyUtil class EnhancedNewMatchPathValueComboDetector(NewMatchPathValueComboDetector): """This class creates events when a new value combination for a given list of match data pathes were found. It is similar to the NewMatchPathValueComboDetector basic detector but also provides support for storing meta information about each detected value combination, e.g. * the first time a tuple was detected using the LogAtom default timestamp. * the last time a tuple was seen * the number of times the tuple was seen * user data for annotation. Due to the additional features, this detector is slower than the basic detector.""" def __init__( self, aminerConfig, targetPathList, anomalyEventHandlers, peristenceId='Default', allowMissingValuesFlag=False, autoIncludeFlag=False, tupleTransformationFunction=None): """Initialize the detector. This will also trigger reading or creation of persistence storage location. @param targetPathList the list of values to extract from each match to create the value combination to be checked. @param allowMissingValuesFlag when set to True, the detector will also use matches, where one of the pathes from targetPathList does not refer to an existing parsed data object. @param autoIncludeFlag when set to True, this detector will report a new value only the first time before including it in the known values set automatically. @param tupleTransformationFunction when not None, this function will be invoked on each extracted value combination list to transform it. It may modify the list directly or create a new one to return it.""" super(EnhancedNewMatchPathValueComboDetector, self).__init__( aminerConfig, targetPathList, anomalyEventHandlers, peristenceId, allowMissingValuesFlag, autoIncludeFlag) self.tupleTransformationFunction = tupleTransformationFunction def loadPersistencyData(self): """Load the persistency data from storage.""" self.knownValuesDict = {} persistenceData = PersistencyUtil.loadJson(self.persistenceFileName) if persistenceData != None: # Dictionary and tuples were stored as list of lists. Transform # the first lists to tuples to allow hash operation needed by set. for valueTuple, extraData in persistenceData: self.knownValuesDict[tuple(valueTuple)] = extraData def receiveAtom(self, logAtom): """Receive on parsed atom and the information about the parser match. @return True if a value combination was extracted and checked against the list of known combinations, no matter if the checked values were new or not.""" matchDict = logAtom.parserMatch.getMatchDictionary() matchValueList = [] for targetPath in self.targetPathList: matchElement = matchDict.get(targetPath, None) if matchElement is None: if not self.allowMissingValuesFlag: return False matchValueList.append(None) else: matchValueList.append(matchElement.matchObject) if self.tupleTransformationFunction != None: matchValueList = self.tupleTransformationFunction(matchValueList) matchValueTuple = tuple(matchValueList) currentTimestamp = logAtom.getTimestamp() extraData = self.knownValuesDict.get(matchValueTuple, None) if extraData != None: extraData[1] = currentTimestamp extraData[2] += 1 else: if self.autoIncludeFlag: self.knownValuesDict[matchValueTuple] = [ currentTimestamp, currentTimestamp, 1, None] if self.nextPersistTime is None: self.nextPersistTime = time.time()+600 for listener in self.anomalyEventHandlers: listener.receiveEvent( 'Analysis.%s' % self.__class__.__name__, 'New value combination for path(es) %s: %s' % ( ', '.join(self.targetPathList), repr(matchValueTuple)), [logAtom.rawData], (logAtom, matchValueTuple), self) return True def doPersist(self): """Immediately write persistence data to storage.""" persistencyData = [] for dictRecord in self.knownValuesDict.items(): persistencyData.append(dictRecord) PersistencyUtil.storeJson(self.persistenceFileName, persistencyData) self.nextPersistTime = None def whitelistEvent( self, eventType, sortedLogLines, eventData, whitelistingData): """Whitelist an event generated by this source using the information emitted when generating the event. @return a message with information about whitelisting @throws Exception when whitelisting of this special event using given whitelistingData was not possible.""" if eventType != 'Analysis.%s' % self.__class__.__name__: raise Exception('Event not from this source') if whitelistingData != None: raise Exception('Whitelisting data not understood by this detector') currentTimestamp = eventData[0].getTimestamp() self.knownValuesDict[eventData[1]] = [ currentTimestamp, currentTimestamp, 1, None] return 'Whitelisted path(es) %s with %s in %s' % ( ', '.join(self.targetPathList), eventData[1], sortedLogLines[0]) deb-build/root/usr/lib/logdata-anomaly-miner/aminer/analysis/TimeCorrelationViolationDetector.py0000600000000000000000000002520413347665470032267 0ustar rootroot"""This module defines a detector for time correlation rules.""" import time from aminer import AMinerConfig from aminer.AnalysisChild import AnalysisContext from aminer.input import AtomHandlerInterface from aminer.util import LogarithmicBackoffHistory from aminer.util import PersistencyUtil from aminer.util import TimeTriggeredComponentInterface from aminer.analysis import Rules class TimeCorrelationViolationDetector(AtomHandlerInterface, TimeTriggeredComponentInterface): """This class creates events when one of the given time correlation rules is violated. This is used to implement checks as depicted in http://dx.doi.org/10.1016/j.cose.2014.09.006""" def __init__(self, aminerConfig, ruleset, anomalyEventHandlers, peristenceId='Default'): """Initialize the detector. This will also trigger reading or creation of persistence storage location. @param ruleset a list of MatchRule rules with appropriate CorrelationRules attached as actions.""" self.eventClassificationRuleset = ruleset self.anomalyEventHandlers = anomalyEventHandlers self.nextPersistTime = time.time()+600.0 self.historyAEvents = [] self.historyBEvents = [] eventCorrelationSet = set() for rule in self.eventClassificationRuleset: if rule.matchAction.artefactARules is not None: eventCorrelationSet |= set(rule.matchAction.artefactARules) if rule.matchAction.artefactBRules is not None: eventCorrelationSet |= set(rule.matchAction.artefactBRules) self.eventCorrelationRuleset = list(eventCorrelationSet) PersistencyUtil.addPersistableComponent(self) self.persistenceFileName = AMinerConfig.buildPersistenceFileName( aminerConfig, 'TimeCorrelationViolationDetector', peristenceId) # persistenceData = PersistencyUtil.loadJson(self.persistenceFileName) # if persistenceData is None: # self.knownPathSet = set() # else: # self.knownPathSet = set(persistenceData) def receiveAtom(self, logAtom): """Receive a parsed atom and check all the classification rules, that will trigger correlation rule evaluation and event triggering on violations.""" for rule in self.eventClassificationRuleset: rule.match(logAtom.parserMatch) def getTimeTriggerClass(self): """Get the trigger class this component should be registered for. This trigger is used mainly for persistency, so real-time triggering is needed. Use also real-time triggering for analysis: usually events for violations (timeouts) are generated when receiving newer atoms. This is just the fallback periods of input silence.""" return AnalysisContext.TIME_TRIGGER_CLASS_REALTIME def doTimer(self, triggerTime): """Check for any rule violations and if the current ruleset should be persisted.""" # Persist the state only quite infrequently: As most correlation # rules react in timeline of seconds, the persisted data will most # likely be unsuitable to catch lost events. So persistency is # mostly to capture the correlation rule context, e.g. the history # of loglines matched before. if self.nextPersistTime-triggerTime < 0: self.doPersist() # Check all correlation rules, generate single events for each # violated rule, possibly containing multiple records. As we might # be processing historic data, the timestamp last seen is unknown # here. Hence rules not receiving newer events might not notice # for a long time, that they hold information about correlation # impossible to fulfil. Take the newest timestamp of any rule # and use it for checking. newestTimestamp = 0.0 for rule in self.eventCorrelationRuleset: newestTimestamp = max(newestTimestamp, rule.lastTimestampSeen) for rule in self.eventCorrelationRuleset: checkResult = rule.checkStatus(newestTimestamp) if checkResult is None: continue for listener in self.anomalyEventHandlers: listener.receiveEvent('Analysis.%s' % self.__class__.__name__, \ 'Correlation rule "%s" violated' % rule.id, checkResult[1], \ checkResult[0], self) return 10.0 def doPersist(self): """Immediately write persistence data to storage.""" # PersistencyUtil.storeJson(self.persistenceFileName, list(self.knownPathSet)) self.nextPersistTime = time.time()+600.0 class EventClassSelector(Rules.MatchAction): """This match action selects one event class by adding it to to a MatchRule. Itthen triggers the appropriate CorrelationRules.""" def __init__(self, actionId, artefactARules, artefactBRules): self.actionId = actionId self.artefactARules = artefactARules self.artefactBRules = artefactBRules def matchAction(self, logAtom): """This method is invoked if a rule rule has matched. @param logAtom the parser MatchElement that was also matching the rules.""" if self.artefactARules is not None: for aRule in self.artefactARules: aRule.updateArtefactA(self, logAtom) if self.artefactBRules is not None: for bRule in self.artefactBRules: bRule.updateArtefactB(self, logAtom) class CorrelationRule: """This class defines a correlation rule to match artefacts A and B, where a hidden event A* always triggers at least one artefact A and the the hidden event B*, thus triggering also at least one artefact B.""" def __init__(self, ruleId, minTimeDelta, maxTimeDelta, maxArtefactsAForSingleB=1, artefactMatchParameters=None): """Create the correlation rule. @param artefactMatchParameters if not none, two artefacts A and B will be only treated as correlated when the all the parsed artefact attributes identified by the list of attribute path tuples match. @param minTimeDelta minimal delta in seconds, that artefact B may be observed after artefact A. Negative values are allowed as artefact B may be found before A. """ self.ruleId = ruleId self.minTimeDelta = minTimeDelta self.maxTimeDelta = maxTimeDelta self.maxArtefactsAForSingleB = maxArtefactsAForSingleB self.artefactMatchParameters = artefactMatchParameters self.historyAEvents = [] self.historyBEvents = [] self.lastTimestampSeen = 0.0 self.correlationHistory = LogarithmicBackoffHistory(10) def updateArtefactA(self, selector, parserMatch): """Append entry to the event history A.""" historyEntry = self.prepareHistoryEntry(selector, parserMatch) # FIXME: Check if event A could be discarded immediately. self.historyAEvents.append(historyEntry) def updateArtefactB(self, selector, parserMatch): """Append entry to the event history B.""" historyEntry = self.prepareHistoryEntry(selector, parserMatch) # FIXME: Check if event B could be discarded immediately. self.historyBEvents.append(historyEntry) def checkStatus(self, newestTimestamp, maxViolations=20): """@return None if status is OK. Returns a tuple containing a descriptive message and a list of violating log data lines on error.""" # FIXME: This part of code would be good target to be implemented # as native library with optimized algorithm in future. aPos = 0 bPosStart = 0 for aPos in range(0, len(self.historyAEvents)): aEvent = self.historyAEvents[aPos] aEventTime = aEvent[0] if newestTimestamp-aEventTime <= self.maxTimeDelta: # This event is so new, that timewindow for related event has # not expired yet. break for bPos in range(bPosStart, len(self.historyBEvents)): bEvent = self.historyBEvents[bPos] if bEvent is None: continue bEventTime = bEvent[0] delta = bEventTime-aEventTime if delta < self.minTimeDelta: # See if too early, if yes go to next element. As we will not # check again any older aEvents in this loop, skip all bEvents # up to this position in future runs. bPosStart = bPos+1 continue # Too late, no other bEvent may match this aEvent if delta > self.maxTimeDelta: break # So time range is OK, see if match parameters are also equal. checkPos = 4 for checkPos in range(4, len(aEvent)): if aEvent[checkPos] != bEvent[checkPos]: break if checkPos != len(aEvent): continue # We found the match. Mark aEvent as done. self.historyAEvents[aPos] = None # See how many eEvents this bEvent might collect. Clean it also # when limit was reached. bEvent[1] += 1 if bEvent[1] == self.maxArtefactsAForSingleB: self.historyBEvents[bPos] = None # We want to keep a history of good matches to ease diagnosis # of correlation failures. Keep information about current line # for reference. self.correlationHistory.addObject((aEvent[3].matchElement.matchString, aEvent[2].id, \ bEvent[3].matchElement.matchString, bEvent[2].id)) aPos += 1 break # After checking all aEvents before aPos were cleared, otherwise # they violate a correlation rule. checkRange = aPos violationLogs = [] violationMessage = '' numViolations = 0 for aPos in range(0, checkRange): aEvent = self.historyAEvents[aPos] if aEvent is None: continue numViolations += 1 if numViolations > maxViolations: continue violationLine = aEvent[3].matchElement.matchString violationMessage += 'FAIL: \"%s\" (%s)\n' % (violationLine, aEvent[2].id) violationLogs.append(violationLine) if numViolations > maxViolations: violationMessage += '... (%d more)\n' % (numViolations-maxViolations) if numViolations != 0: violationMessage += 'Historic examples:\n' for record in self.correlationHistory.getHistory(): violationMessage += ' "%s" (%s) ==> "%s" (%s)\n' % record # Prune out all handled event records self.historyAEvents = self.historyAEvents[checkRange:] self.historyBEvents = self.historyBEvents[bPosStart:] if numViolations == 0: return None return (violationMessage, violationLogs) def prepareHistoryEntry(self, selector, parserMatch): """Return a history entry for a parser match.""" length = 4 if self.artefactMatchParameters is not None: length += len(self.artefactMatchParameters) result = [None]*length result[0] = parserMatch.getDefaultTimestamp() result[1] = 0 result[2] = selector result[3] = parserMatch if result[0] < self.lastTimestampSeen: raise Exception('Unsorted!') self.lastTimestampSeen = result[0] if self.artefactMatchParameters is not None: pos = 4 vDict = parserMatch.getMatchDictionary() for paramPath in self.artefactMatchParameters: matchElement = vDict.get(paramPath, None) if matchElement is not None: result[pos] = matchElement.matchObject pos += 1 return result deb-build/root/usr/lib/logdata-anomaly-miner/aminer/analysis/NewMatchPathValueDetector.py0000600000000000000000000000534613352177136030620 0ustar rootroot"""This module defines a detector for new values in a data path.""" import time from aminer import AMinerConfig from aminer.AnalysisChild import AnalysisContext from aminer.input import AtomHandlerInterface from aminer.util import PersistencyUtil from aminer.util import TimeTriggeredComponentInterface class NewMatchPathValueDetector(AtomHandlerInterface, TimeTriggeredComponentInterface): """This class creates events when new values for a given data path were found.""" def __init__(self, aminerConfig, targetPathList, anomalyEventHandlers, \ peristenceId='Default', autoIncludeFlag=False): """Initialize the detector. This will also trigger reading or creation of persistence storage location.""" self.targetPathList = targetPathList self.anomalyEventHandlers = anomalyEventHandlers self.autoIncludeFlag = autoIncludeFlag self.nextPersistTime = None PersistencyUtil.addPersistableComponent(self) self.persistenceFileName = AMinerConfig.buildPersistenceFileName( aminerConfig, self.__class__.__name__, peristenceId) persistenceData = PersistencyUtil.loadJson(self.persistenceFileName) if persistenceData is None: self.knownPathSet = set() else: self.knownPathSet = set(persistenceData) def receiveAtom(self, logAtom): matchDict = logAtom.parserMatch.getMatchDictionary() for targetPath in self.targetPathList: match = matchDict.get(targetPath, None) if match is None: continue if match.matchObject not in self.knownPathSet: if self.autoIncludeFlag: self.knownPathSet.add(match.matchObject) if self.nextPersistTime is None: self.nextPersistTime = time.time()+600 for listener in self.anomalyEventHandlers: listener.receiveEvent('Analysis.%s' % self.__class__.__name__, \ 'New value for path %s: %s ' % (targetPath, repr(match.matchObject)), \ [logAtom.rawData.decode], logAtom, self) def getTimeTriggerClass(self): """Get the trigger class this component should be registered for. This trigger is used only for persistency, so real-time triggering is needed.""" return AnalysisContext.TIME_TRIGGER_CLASS_REALTIME def doTimer(self, triggerTime): """Check current ruleset should be persisted""" if self.nextPersistTime is None: return 600 delta = self.nextPersistTime-triggerTime if delta < 0: PersistencyUtil.storeJson(self.persistenceFileName, list(self.knownPathSet)) self.nextPersistTime = None delta = 600 return delta def doPersist(self): """Immediately write persistence data to storage.""" PersistencyUtil.storeJson(self.persistenceFileName, list(self.knownPathSet)) self.nextPersistTime = None deb-build/root/usr/lib/logdata-anomaly-miner/aminer/analysis/MatchValueAverageChangeDetector.py0000600000000000000000000001551713354634416031734 0ustar rootroot"""This module defines a detector that reports diverges from an average.""" import time from aminer import AMinerConfig from aminer.AnalysisChild import AnalysisContext from aminer.input import AtomHandlerInterface from aminer.util import PersistencyUtil from aminer.util import TimeTriggeredComponentInterface class MatchValueAverageChangeDetector(AtomHandlerInterface, TimeTriggeredComponentInterface): """This detector calculates the average of a given list of values to monitor and reports if the average of the latest diverges significantly from the values observed before.""" def __init__(self, aminerConfig, anomalyEventHandlers, timestampPath, analyzePathList, minBinElements, minBinTime, syncBinsFlag=True, debugMode=False, peristenceId='Default'): """Initialize the detector. This will also trigger reading or creation of persistence storage location. @param timestampPath if not None, use this path value for timestamp based bins. @param analyzePathList list of match pathes to analyze in this detector. @param minBinElements evaluate the latest bin only after at least that number of elements was added to it. @param minBinTime evaluate the latest bin only when the first element is received after minBinTime has elapsed. @param syncBinsFlag if true the bins of all analyzed path values have to be filled enough to trigger analysis. @param debugMode if true, generate an analysis report even when average of last bin was within expected range.""" self.anomalyEventHandlers = anomalyEventHandlers self.timestampPath = timestampPath self.minBinElements = minBinElements self.minBinTime = minBinTime self.syncBinsFlag = syncBinsFlag self.debugMode = debugMode self.nextPersistTime = None PersistencyUtil.addPersistableComponent(self) self.persistenceFileName = AMinerConfig.buildPersistenceFileName(aminerConfig, \ 'MatchValueAverageChangeDetector', peristenceId) persistenceData = PersistencyUtil.loadJson(self.persistenceFileName) if persistenceData is None: self.statData = [] for path in analyzePathList: self.statData.append((path, [],)) # else: # self.knownPathSet = set(persistenceData) def receiveAtom(self, logAtom): """Sends summary to all event handlers.""" parserMatch = logAtom.parserMatch valueDict = parserMatch.getMatchDictionary() timestampValue = logAtom.getTimestamp() if self.timestampPath is not None: matchValue = valueDict.get(self.timestampPath) if matchValue is None: return timestampValue = matchValue.matchObject[1] analysisSummary = '' if self.syncBinsFlag: readyForAnalysisFlag = True for (path, statData) in self.statData: match = valueDict.get(path, None) if match is None: readyForAnalysisFlag = (readyForAnalysisFlag and self.update(statData, \ timestampValue, None)) else: readyForAnalysisFlag = (readyForAnalysisFlag and self.update(statData, \ timestampValue, match.matchObject)) if readyForAnalysisFlag: for (path, statData) in self.statData: analysisData = self.analyze(statData) if analysisData is not None: analysisSummary += '"%s": %s' % (path, analysisData) if self.nextPersistTime is None: self.nextPersistTime = time.time()+600 else: raise Exception('FIXME: not implemented') if analysisSummary: for listener in self.anomalyEventHandlers: listener.receiveEvent('Analysis.%s' % self.__class__.__name__, \ 'Statistical data report\n%s' % analysisSummary, [logAtom.rawData], match, \ self) def getTimeTriggerClass(self): """Get the trigger class this component should be registered for. This trigger is used only for persistency, so real-time triggering is needed.""" return AnalysisContext.TIME_TRIGGER_CLASS_REALTIME def doTimer(self, triggerTime): """Check current ruleset should be persisted""" if self.nextPersistTime is None: return 600 delta = self.nextPersistTime-triggerTime if delta < 0: # PersistencyUtil.storeJson(self.persistenceFileName, list(self.knownPathSet)) self.nextPersistTime = None delta = 600 return delta def doPersist(self): """Immediately write persistence data to storage.""" # PersistencyUtil.storeJson(self.persistenceFileName, list(self.knownPathSet)) self.nextPersistTime = None def update(self, statData, timestampValue, value): """Update the collected statistics data. @param value if value not None, check only conditions if current bin is full enough. @return true if the bin is full enough to perform an analysis.""" if value is not None: if not statData: # Append timestamp, k-value, old-bin (n, sum, sum2, avg, variance), # current-bin (n, sum, sum2) statData.append(timestampValue) statData.append(value) statData.append(None) statData.append((1, 0.0, 0.0,)) else: delta = value-statData[1] binValues = statData[3] statData[3] = (binValues[0]+1, binValues[1]+delta, binValues[2]+delta*delta) if not statData: return False if statData[3][0] < self.minBinElements: return False if self.timestampPath is not None: return timestampValue-statData[0] >= self.minBinTime return True def analyze(self, statData): """Perform the analysis and progress from the last bin to the next one. @return None when statistical data was as expected and debugging is disabled.""" currentBin = statData[3] currentAverage = currentBin[1]/currentBin[0] currentVariance = (currentBin[2]-(currentBin[1]*currentBin[1])/currentBin[0])/(currentBin[0]-1) # Append timestamp, k-value, old-bin (n, sum, sum2, avg, variance), # current-bin (n, sum, sum2) oldBin = statData[2] if oldBin is None: statData[2] = (currentBin[0], currentBin[1], currentBin[2], currentAverage, currentVariance,) statData[3] = (0, 0.0, 0.0) if self.debugMode: return 'Initial: n = %d, avg = %s, var = %s\n' % (currentBin[0], \ currentAverage+statData[1], currentVariance) else: totalN = oldBin[0]+currentBin[0] totalSum = oldBin[1]+currentBin[1] totalSum2 = oldBin[2]+currentBin[2] statData[2] = (totalN, totalSum, totalSum2, totalSum/totalN, \ (totalSum2-(totalSum*totalSum)/totalN)/(totalN-1)) statData[3] = (0, 0.0, 0.0) if (currentVariance > 2*oldBin[4]) or (abs(currentAverage-oldBin[3]) > oldBin[4]) \ or self.debugMode: return 'Change: new: n = %d, avg = %s, var = %s; old: n = %d, avg = %s, var = %s\n' % \ (currentBin[0], currentAverage+statData[1], currentVariance, oldBin[0], \ oldBin[3]+statData[1], oldBin[4]) return None deb-build/root/usr/lib/logdata-anomaly-miner/aminer/analysis/Rules.py0000600000000000000000000004142413352720675024677 0ustar rootroot"""This package contains various classes to build check rulesets. The ruleset also supports parallel rule evaluation, e.g. the two rules "A and B and C" and "A and B and D" will only peform the checks for A and B once, then performs check C and D and trigger a match action.""" import datetime import sys from aminer.util import LogarithmicBackoffHistory from aminer.util import ObjectHistory from aminer.analysis.AtomFilters import SubhandlerFilter class MatchAction(object): """This is the interface of all match actions.""" def matchAction(self, logAtom): """This method is invoked if a rule rule has matched. @param logAtom the LogAtom matching the rules.""" raise Exception('Interface called') class EventGenerationMatchAction(MatchAction): """This generic match action forwards information about a rule match on parsed data to a list of event handlers.""" def __init__(self, eventType, eventMessage, eventHandlers): self.eventType = eventType self.eventMessage = eventMessage self.eventHandlers = eventHandlers def matchAction(self, logAtom): for handler in self.eventHandlers: handler.receiveEvent( self.eventType, self.eventMessage, [logAtom.rawData], logAtom, self) class AtomFilterMatchAction(MatchAction, SubhandlerFilter): """This generic match rule forwards all rule matches to a list of AtomHandlerInterface instaces using the analysis.AtomFilters.SubhandlerFilter.""" def __init__(self, subhandlerList, stopWhenHandledFlag=False): SubhandlerFilter.__init__(self, subhandlerList, stopWhenHandledFlag) def matchAction(self, logAtom): self.receiveAtom(logAtom) class MatchRule(object): """This is the interface of all match rules.""" def match(self, logAtom): """Check if this rule matches. On match an optional matchAction could be triggered.""" raise Exception('Interface called on %s' % self) class AndMatchRule(MatchRule): """This class provides a rule to match all subRules (logical and)""" def __init__(self, subRules, matchAction=None): """Create the rule. @param matchAction if None, no action is performed.""" self.subRules = subRules self.matchAction = matchAction def match(self, logAtom): """Check if this rule matches. Rule evaluation will stop when the first match fails. If a matchAction is attached to this rule, it will be invoked at the end of all checks. @return True when all subrules matched.""" for rule in self.subRules: if not rule.match(logAtom): return False if self.matchAction != None: self.matchAction.matchAction(logAtom) return True def __str__(self): result = '' preamble = '' for matchElement in self.subRules: result += '%s(%s)' % (preamble, matchElement) preamble = ' and ' return result class OrMatchRule(MatchRule): """This class provides a rule to match any subRules (logical or)""" def __init__(self, subRules, matchAction=None): """Create the rule. @param matchAction if None, no action is performed.""" self.subRules = subRules self.matchAction = matchAction def match(self, logAtom): """Check if this rule matches. Rule evaluation will stop when the first match succeeds. If a matchAction is attached to this rule, it will be invoked after the first match. @return True when any subrule matched.""" for rule in self.subRules: if rule.match(logAtom): if self.matchAction != None: self.matchAction.matchAction(logAtom) return True return False def __str__(self): result = '' preamble = '' for matchElement in self.subRules: result += '%s(%s)' % (preamble, matchElement) preamble = ' or ' return result class ParallelMatchRule(MatchRule): """This class is a rule testing all the subrules in parallel. From the behaviour it is similar to the OrMatchRule, returning true if any subrule matches. The difference is that matching will not stop after the first positive match. This does only make sense when all subrules have match actions associated.""" def __init__(self, subRules, matchAction=None): """Create the rule. @param matchAction if None, no action is performed.""" self.subRules = subRules self.matchAction = matchAction def match(self, logAtom): """Check if any of the subrules rule matches. The matching procedure will not stop after the first positive match. If a matchAction is attached to this rule, it will be invoked at the end of all checks. @return True when any subrule matched.""" matchFlag = False for rule in self.subRules: if rule.match(logAtom): matchFlag = True if matchFlag and (self.matchAction != None): self.matchAction.matchAction(logAtom) return matchFlag def __str__(self): result = '' preamble = '' for matchElement in self.subRules: result += '%s(%s)' % (preamble, matchElement) preamble = ' por ' return result class ValueDependentDelegatedMatchRule(MatchRule): """This class is a rule delegating rule checking to subrules depending on values found within the parserMatch. The result of this rule is the result of the selected delegation rule.""" def __init__( self, valuePathList, ruleLookupDict, defaultRule=None, matchAction=None): """Create the rule. @param list with value pathes that are used to extract the lookup keys for ruleLookupDict. If value lookup fails, None will be used for lookup. @param ruleLookupDict dicitionary with tuple containing values for valuePathList as key and target rule as value. @param defaultRule when not none, this rule will be executed as default. Otherwise when rule lookup failed, False will be returned unconditionally. @param matchAction if None, no action is performed.""" self.valuePathList = valuePathList self.ruleLookupDict = ruleLookupDict self.defaultRule = defaultRule self.matchAction = matchAction def match(self, logAtom): """Try to locate a rule for delegation or use the default rule. @return True when selected delegation rule matched.""" matchDict = logAtom.parserMatch.getMatchDictionary() valueList = [] for path in self.valuePathList: valueElement = matchDict.get(path, None) if valueElement is None: valueList.append(None) else: valueList.append(valueElement.matchObject) rule = self.ruleLookupDict.get(tuple(valueList), self.defaultRule) if rule is None: return False if rule.match(logAtom): if self.matchAction != None: self.matchAction.matchAction(logAtom) return True return False def __str__(self): result = 'ValueDependentDelegatedMatchRule' return result class NegationMatchRule(MatchRule): """Match elements of this class return true when the subrule did not match.""" def __init__(self, subRule, matchAction=None): self.subRule = subRule self.matchAction = matchAction def match(self, logAtom): if self.subRule.match(logAtom): return False if self.matchAction != None: self.matchAction.matchAction(logAtom) return True def __str__(self): return 'not %s' % self.subRule class PathExistsMatchRule(MatchRule): """Match elements of this class return true when the given path was found in the parsed match data.""" def __init__(self, path, matchAction=None): self.path = path self.matchAction = matchAction def match(self, logAtom): if self.path in logAtom.parserMatch.getMatchDictionary(): if self.matchAction != None: self.matchAction.matchAction(logAtom) return True return False def __str__(self): return 'hasPath(%s)' % self.path class ValueMatchRule(MatchRule): """Match elements of this class return true when the given path exists and has exactly the given parsed value.""" def __init__(self, path, value, matchAction=None): self.path = path self.value = value self.matchAction = matchAction def match(self, logAtom): testValue = logAtom.parserMatch.getMatchDictionary().get(self.path, None) if (testValue != None) and (testValue.matchObject == self.value): if self.matchAction != None: self.matchAction.matchAction(logAtom) return True return False def __str__(self): return 'value(%s)==%s' % (self.path, self.value) class ValueListMatchRule(MatchRule): """Match elements of this class return true when the given path exists and has exactly one of the values included in the value list.""" def __init__(self, path, valueList, matchAction=None): self.path = path self.valueList = valueList self.matchAction = matchAction def match(self, logAtom): testValue = logAtom.parserMatch.getMatchDictionary().get(self.path, None) if (testValue != None) and (testValue.matchObject in self.valueList): if self.matchAction != None: self.matchAction.matchAction(logAtom) return True return False def __str__(self): return 'value(%s) in %s' % (self.path, ' '.join(self.valueList)) class ValueRangeMatchRule(MatchRule): """Match elements of this class return true when the given path exists and the value is included in [lower, upper] range.""" def __init__(self, path, lowerLimit, upperLimit, matchAction=None): self.path = path self.lowerLimit = lowerLimit self.upperLimit = upperLimit self.matchAction = matchAction def match(self, logAtom): testValue = logAtom.parserMatch.getMatchDictionary().get(self.path, None) if testValue is None: return False testValue = testValue.matchObject if (testValue >= self.lowerLimit) and (testValue <= self.upperLimit): if self.matchAction != None: self.matchAction.matchAction(logAtom) return True return False def __str__(self): return 'value(%s) inrange (%s, %s)' % ( self.path, self.lowerLimit, self.upperLimit) class StringRegexMatchRule(MatchRule): """Match elements of this class return true when the given path exists and the string representation of the value matches the given compiled regular expression.""" def __init__(self, path, matchRegex, matchAction=None): self.path = path self.matchRegex = matchRegex self.matchAction = matchAction def match(self, logAtom): # Use the class object as marker for nonexisting entries testValue = logAtom.parserMatch.getMatchDictionary().get(self.path, None) if ((testValue is None) or (self.matchRegex.match(testValue.matchString) is None)): return False if self.matchAction != None: self.matchAction.matchAction(logAtom) return True def __str__(self): return 'string(%s) =regex= %s' % (self.path, self.matchRegex.pattern) class ModuloTimeMatchRule(MatchRule): """Match elements of this class return true when the given path exists, denotes a datetime object and the seconds since 1970 from that date modulo the given value are included in [lower, upper] range.""" def __init__(self, path, secondsModulo, lowerLimit, upperLimit, matchAction=None): """@param path the path to the datetime object to use to evaluate the modulo time rules on. When None, the default timestamp associated with the match is used.""" self.path = path self.secondsModulo = secondsModulo self.lowerLimit = lowerLimit self.upperLimit = upperLimit self.matchAction = matchAction def match(self, logAtom): testValue = None if self.path is None: testValue = logAtom.getTimestamp() else: timeMatch = logAtom.parserMatch.getMatchDictionary().get(self.path, None) if ((timeMatch is None) or not isinstance(timeMatch.matchObject, tuple) or not isinstance(timeMatch.matchObject[0], datetime.datetime)): return False testValue = timeMatch.matchObject[1] if testValue is None: return False testValue %= self.secondsModulo if (testValue >= self.lowerLimit) and (testValue <= self.upperLimit): if self.matchAction != None: self.matchAction.matchAction(logAtom) return True return False class ValueDependentModuloTimeMatchRule(MatchRule): """Match elements of this class return true when the given path exists, denotes a datetime object and the seconds since 1970 from that date modulo the given value are included in a [lower, upper] range selected by values from the match.""" def __init__( self, path, secondsModulo, valuePathList, limitLookupDict, defaultLimit=None, matchAction=None): """@param path the path to the datetime object to use to evaluate the modulo time rules on. When None, the default timestamp associated with the match is used. @param defaultLimit use this default limit when limit lookup failed. Without a default limit, a failed lookup will cause the rule not to match.""" self.path = path self.secondsModulo = secondsModulo self.valuePathList = valuePathList self.limitLookupDict = limitLookupDict self.defaultLimit = defaultLimit self.matchAction = matchAction def match(self, logAtom): matchDict = logAtom.parserMatch.getMatchDictionary() valueList = [] for path in self.valuePathList: valueElement = matchDict.get(path, None) if valueElement is None: valueList.append(None) else: valueList.append(valueElement.matchObject) limits = self.limitLookupDict.get(tuple(valueList), self.defaultLimit) if limits is None: return False testValue = None if self.path is None: testValue = logAtom.getTimestamp() else: timeMatch = logAtom.parserMatch.getMatchDictionary().get(self.path, None) if ((timeMatch is None) or not isinstance(timeMatch.matchObject, tuple) or not isinstance(timeMatch.matchObject[0], datetime.datetime)): return False testValue = timeMatch.matchObject[1] if testValue is None: return False testValue %= self.secondsModulo if (testValue >= limits[0]) and (testValue <= limits[1]): if self.matchAction != None: self.matchAction.matchAction(logAtom) return True return False class IPv4InRFC1918MatchRule(MatchRule): """Match elements of this class return true when the given path was found, contains a valid IPv4 address from the RFC1918 private IP ranges. This could also be done by distinct range match elements, but as this kind of matching is common, have an own element for it.""" def __init__(self, path, matchAction=None): self.path = path self.matchAction = matchAction def match(self, logAtom): matchElement = logAtom.parserMatch.getMatchDictionary().get(self.path, None) if (matchElement is None) or not isinstance(matchElement.matchObject, int): return False value = matchElement.matchObject if (((value&0xff000000) == 0xa000000) or ((value&0xfff00000) == 0xac100000) or ((value&0xffff0000) == 0xc0a80000)): if self.matchAction != None: self.matchAction.matchAction(logAtom) return True return False def __str__(self): return 'hasPath(%s)' % self.path class DebugMatchRule(MatchRule): """This rule can be inserted into a normal ruleset just to see when a match attempt is made. It just prints out the current logAtom that is evaluated. The match action is always invoked when defined, no matter which match result is returned.""" def __init__(self, debugMatchResult=False, matchAction=None): self.debugMatchResult = debugMatchResult self.matchAction = matchAction def match(self, logAtom): print('Rules.DebugMatchRule: triggered while ' \ 'handling "%s"' % repr(logAtom.parserMatch.matchElement.matchString), file=sys.stderr) if self.matchAction != None: self.matchAction.matchAction(logAtom) return self.debugMatchResult def __str__(self): return '%s' % self.debugMatchResult class DebugHistoryMatchRule(MatchRule): """This rule can be inserted into a normal ruleset just to see when a match attempt is made. It just adds the evaluated logAtom to a ObjectHistory.""" def __init__( self, objectHistory=None, debugMatchResult=False, matchAction=None): """Create a DebugHistoryMatchRule object. @param objectHistory use this ObjectHistory to collect the LogAtoms. When None, a default LogarithmicBackoffHistory for 10 items.""" if objectHistory is None: objectHistory = LogarithmicBackoffHistory(10) elif not isinstance(objectHistory, ObjectHistory): raise Exception('objectHistory is not an instance of ObjectHistory') self.objectHistory = objectHistory self.debugMatchResult = debugMatchResult self.matchAction = matchAction def match(self, logAtom): self.objectHistory.addObject(logAtom) if self.matchAction != None: self.matchAction.matchAction(logAtom) return self.debugMatchResult def getHistory(self): """Get the history object from this debug rule.""" return self.objectHistory deb-build/root/usr/lib/logdata-anomaly-miner/aminer/analysis/MissingMatchPathValueDetector.py0000600000000000000000000002331613353143656031476 0ustar rootroot"""This module provides the MissingMatchPathValueDetector to generate events when expected values were not seen for an extended period of time.""" import time from aminer import AMinerConfig from aminer.AnalysisChild import AnalysisContext from aminer.events import EventSourceInterface from aminer.input import AtomHandlerInterface from aminer.util import PersistencyUtil from aminer.util import TimeTriggeredComponentInterface class MissingMatchPathValueDetector( AtomHandlerInterface, TimeTriggeredComponentInterface, EventSourceInterface): """This class creates events when an expected value is not seen within a given timespan, e.g. because the service was deactivated or logging disabled unexpectedly. This is complementary to the function provided by NewMatchPathValueDetector. For each unique value extracted by targetPath, a tracking record is added to expectedValuesDict. It stores three numbers: the timestamp the extracted value was last seen, the maximum allowed gap between observations and the next alerting time when currently in error state. When in normal (alerting) state, the value is zero.""" def __init__( self, aminerConfig, targetPath, anomalyEventHandlers, peristenceId='Default', autoIncludeFlag=False, defaultInterval=3600, realertInterval=86400): """Initialize the detector. This will also trigger reading or creation of persistence storage location. @param targetPath to extract a source identification value from each logatom.""" self.targetPath = targetPath self.anomalyEventHandlers = anomalyEventHandlers self.autoIncludeFlag = autoIncludeFlag self.defaultInterval = defaultInterval self.realertInterval = realertInterval # This timestamps is compared with timestamp values from log atoms # for activation of alerting logic. The first timestamp from logs # above this value will trigger alerting. self.nextCheckTimestamp = 0 self.lastSeenTimestamp = 0 self.nextPersistTime = None PersistencyUtil.addPersistableComponent(self) self.persistenceFileName = AMinerConfig.buildPersistenceFileName( aminerConfig, self.__class__.__name__, peristenceId) persistenceData = PersistencyUtil.loadJson(self.persistenceFileName) if persistenceData is None: self.expectedValuesDict = {} else: self.expectedValuesDict = persistenceData def receiveAtom(self, logAtom): """Receive a log atom from a source. @param atomData binary raw atom data @return True if this handler was really able to handle and process the atom. Depending on this information, the caller may decide if it makes sense passing the atom also to other handlers or to retry later. This behaviour has to be documented at each source implementation sending LogAtoms.""" value = self.getChannelKey(logAtom) if value is None: return False timeStamp = logAtom.getTimestamp() detectorInfo = self.expectedValuesDict.get(value, None) if detectorInfo != None: # Just update the last seen value and switch from non-reporting # error state to normal state. detectorInfo[0] = timeStamp if detectorInfo[2] != 0: detectorInfo[2] = 0 # Delta of this detector might be lower than the default maximum # recheck time. self.nextCheckTimestamp = min( self.nextCheckTimestamp, timeStamp+detectorInfo[1]) elif self.autoIncludeFlag: self.expectedValuesDict[value] = [timeStamp, self.defaultInterval, 0] self.nextCheckTimestamp = min(self.nextCheckTimestamp, timeStamp+self.defaultInterval) # Always enforce persistency syncs from time to time, the timestamps # in the records change even when no new hosts are added. if self.nextPersistTime is None: self.nextPersistTime = time.time()+600 self.checkTimeouts(timeStamp, logAtom) return True def getChannelKey(self, logAtom): """Get the key identifying the channel this logAtom is coming from.""" matchElement = logAtom.parserMatch.getMatchDictionary().get( self.targetPath, None) if matchElement is None: return None return matchElement.matchObject def checkTimeouts(self, timeStamp, logAtom): """Check if there was any timeout on a channel, thus triggering event dispatching.""" self.lastSeenTimestamp = max(self.lastSeenTimestamp, timeStamp) if self.lastSeenTimestamp > self.nextCheckTimestamp: missingValueList = [] # Start with a large recheck interval. It will be lowered if any # of the expectation intervals is below that. self.nextCheckTimestamp = self.lastSeenTimestamp+86400 for value, detectorInfo in self.expectedValuesDict.items(): valueOverdueTime = self.lastSeenTimestamp-detectorInfo[0]-detectorInfo[1] if detectorInfo[2] != 0: nextCheckDelta = detectorInfo[2]-self.lastSeenTimestamp if nextCheckDelta > 0: # Already alerted but not ready for realerting yet. self.nextCheckTimestamp = min( self.nextCheckTimestamp, detectorInfo[2]) continue else: # No alerting yet, see if alerting is required. if valueOverdueTime < 0: self.nextCheckTimestamp = min( self.nextCheckTimestamp, self.lastSeenTimestamp-valueOverdueTime) continue missingValueList.append([value, valueOverdueTime, detectorInfo[1]]) # Set the next alerting time. detectorInfo[2] = self.lastSeenTimestamp+self.realertInterval if missingValueList: messagePart = '' for value, overdueTime, interval in missingValueList: messagePart += '\n %s overdue %ss (interval %s)' % (repr(value), overdueTime, interval) for listener in self.anomalyEventHandlers: listener.receiveEvent( 'Analysis.%s' % self.__class__.__name__, 'Interval too large between values for path %s:%s ' % (self.targetPath, messagePart), [logAtom.rawData], missingValueList, self) return True def setCheckValue(self, value, interval): """Add or overwrite a value to be monitored by the detector.""" self.expectedValuesDict[value] = [self.lastSeenTimestamp, interval, 0] self.nextCheckTimestamp = 0 # Explicitely trigger a persistency sync to avoid staying in unsynced # state too long when no new received atoms trigger it. But do # not sync immediately, that would make bulk calls to this method # quite inefficient. if self.nextPersistTime is None: self.nextPersistTime = time.time()+600 def removeCheckValue(self, value): """Remove checks for given value.""" del self.expectedValuesDict[value] def getTimeTriggerClass(self): """Get the trigger class this component can be registered for. This detector only needs persisteny triggers in real time.""" return AnalysisContext.TIME_TRIGGER_CLASS_REALTIME def doTimer(self, triggerTime): """Check current ruleset should be persisted""" if self.nextPersistTime is None: return 600 delta = self.nextPersistTime-triggerTime if delta <= 0: PersistencyUtil.storeJson(self.persistenceFileName, self.expectedValuesDict) self.nextPersistTime = None delta = 600 return delta def doPersist(self): """Immediately write persistence data to storage.""" PersistencyUtil.storeJson(self.persistenceFileName, self.expectedValuesDict) self.nextPersistTime = None def whitelistEvent( self, eventType, sortedLogLines, eventData, whitelistingData): """Whitelist an event generated by this source using the information emitted when generating the event. @return a message with information about whitelisting @throws Exception when whitelisting of this special event using given whitelistingData was not possible.""" if eventType != 'Analysis.%s' % self.__class__.__name__: raise Exception('Event not from this source') if not isinstance(whitelistingData, int): raise Exception('Whitelisting data has to integer with ' \ 'new interval, -1 to reset to defaults, other negative ' \ 'value to remove the entry') newInterval = whitelistingData if newInterval == -1: newInterval = self.defaultInterval for keyName, in eventData: if newInterval < 0: self.removeCheckValue(keyName) else: self.setCheckValue(keyName, newInterval) return 'Updated %d entries' % len(eventData) class MissingMatchPathListValueDetector(MissingMatchPathValueDetector): """This detector works similar to the MissingMatchPathValueDetector. It only can lookup values from a list of pathes until one path really exists. It then uses this value as key to detect logAtoms belonging to the same data stream. This is useful when e.g. due to different log formats, the hostname, servicename or any other relevant channel identifier has alternative pathes.""" def __init__( self, aminerConfig, targetPathList, anomalyEventHandlers, peristenceId='Default', autoIncludeFlag=False, defaultInterval=3600, realertInterval=86400): """Initialize the detector. This will also trigger reading or creation of persistence storage location. @param targetPath to extract a source identification value from each logatom.""" super(MissingMatchPathListValueDetector, self).__init__( aminerConfig, None, anomalyEventHandlers, peristenceId, autoIncludeFlag, defaultInterval, realertInterval) self.targetPathList = targetPathList def getChannelKey(self, logAtom): """Get the key identifying the channel this logAtom is coming from.""" for targetPath in self.targetPathList: matchElement = logAtom.parserMatch.getMatchDictionary().get( targetPath, None) if matchElement is None: continue return matchElement.matchObject return None deb-build/root/usr/lib/logdata-anomaly-miner/aminer/analysis/TimeCorrelationDetector.py0000600000000000000000000002072313346661756030405 0ustar rootroot"""This module defines a detector for time correlation between atoms.""" from datetime import datetime import random import time from aminer import AMinerConfig from aminer.AnalysisChild import AnalysisContext from aminer.analysis import Rules from aminer.input import AtomHandlerInterface from aminer.util import getLogInt from aminer.util import PersistencyUtil from aminer.util import TimeTriggeredComponentInterface class TimeCorrelationDetector(AtomHandlerInterface, TimeTriggeredComponentInterface): """This class tries to find time correlation patterns between different log atoms. When a possible correlation rule is detected, it creates an event including the rules. This is useful to implement checks as depicted in http://dx.doi.org/10.1016/j.cose.2014.09.006.""" def __init__(self, aminerConfig, parallelCheckCount, correlationTestCount, \ maxFailCount, anomalyEventHandlers, peristenceId='Default'): """Initialize the detector. This will also trigger reading or creation of persistence storage location. @param parallelCheckCount number of rule detection checks to run in parallel. @param correlationTestCount number of tests to perform on a rule under test. @param maxFailCount maximal number of test failures so that rule is still eligible for reporting.""" self.lastTimestamp = 0.0 self.parallelCheckCount = parallelCheckCount self.correlationTestCount = correlationTestCount self.maxFailCount = maxFailCount self.anomalyEventHandlers = anomalyEventHandlers self.maxRuleAttributes = 5 self.lastUnhandledMatch = None self.nextPersistTime = None self.totalRecords = 0 PersistencyUtil.addPersistableComponent(self) self.persistenceFileName = AMinerConfig.buildPersistenceFileName( aminerConfig, 'TimeCorrelationDetector', peristenceId) persistenceData = PersistencyUtil.loadJson(self.persistenceFileName) if persistenceData is None: self.featureList = [] self.eventCountTable = [0]*parallelCheckCount*parallelCheckCount*2 self.eventDeltaTable = [0]*parallelCheckCount*parallelCheckCount*2 # else: # self.knownPathSet = set(persistenceData) def receiveAtom(self, logAtom): timestamp = logAtom.getTimestamp() if timestamp is None: timestamp = time.time() if timestamp < self.lastTimestamp: for listener in self.anomalyEventHandlers: listener.receiveEvent('Analysis.%s' % self.__class__.__name__, \ 'Logdata not sorted: last %s, current %s' % (self.lastTimestamp, timestamp), \ [logAtom.rawData], logAtom, self) return self.lastTimestamp = timestamp parserMatch = logAtom.parserMatch self.totalRecords += 1 featuresFoundList = [] for feature in self.featureList: if feature.rule.match(parserMatch): feature.triggerCount += 1 self.updateTablesForFeature(feature, timestamp) featuresFoundList.append(feature) if len(self.featureList) < self.parallelCheckCount: if (random.randint(0, 1) != 0) and (self.lastUnhandledMatch is not None): parserMatch = self.lastUnhandledMatch newRule = self.createRandomRule(parserMatch) newFeature = CorrelationFeature(newRule, len(self.featureList), timestamp) self.featureList.append(newFeature) newFeature.triggerCount = 1 self.updateTablesForFeature(newFeature, timestamp) featuresFoundList.append(newFeature) for feature in featuresFoundList: feature.lastTriggerTime = timestamp if not featuresFoundList: self.lastUnhandledMatch = parserMatch elif self.nextPersistTime is None: self.nextPersistTime = time.time()+600 if (self.totalRecords%0x10000) == 0: for listener in self.anomalyEventHandlers: listener.receiveEvent('Analysis.%s' % self.__class__.__name__, \ 'Correlation report', [self.analysisStatusToString()], \ parserMatch, self) self.resetStatistics() def getTimeTriggerClass(self): """Get the trigger class this component should be registered for. This trigger is used only for persistency, so real-time triggering is needed.""" return AnalysisContext.TIME_TRIGGER_CLASS_REALTIME def doTimer(self, triggerTime): """Check current ruleset should be persisted""" if self.nextPersistTime is None: return 600 delta = self.nextPersistTime-triggerTime if delta < 0: # PersistencyUtil.storeJson(self.persistenceFileName, list(self.knownPathSet)) self.nextPersistTime = None delta = 600 return delta def doPersist(self): """Immediately write persistence data to storage.""" # PersistencyUtil.storeJson(self.persistenceFileName, list(self.knownPathSet)) self.nextPersistTime = None def createRandomRule(self, parserMatch): """Create a random existing path rule or value match rule.""" subRules = [] allKeys = parserMatch.getMatchDictionary().keys() attributeCount = getLogInt(self.maxRuleAttributes)+1 while attributeCount > 0: keyPos = random.randint(0, len(allKeys)-1) keyName = allKeys[keyPos] allKeys = allKeys[:keyPos]+allKeys[keyPos+1:] keyValue = parserMatch.getMatchDictionary().get(keyName).matchObject # Not much sense handling parsed date values in this implementation, # so just ignore this attribute. if (isinstance(keyValue, tuple)) and (isinstance(keyValue[0], datetime)): if not allKeys: break continue attributeCount -= 1 ruleType = random.randint(0, 1) if ruleType == 0: subRules.append(Rules.PathExistsMatchRule(keyName)) elif ruleType == 1: subRules.append(Rules.ValueMatchRule(keyName, keyValue)) else: raise Exception('Invalid rule type') if not allKeys: break if len(subRules) > 1: return Rules.AndMatchRule(subRules) return subRules[0] def updateTablesForFeature(self, targetFeature, timestamp): """Assume that this event was the effect of a previous cause-related event. Loop over all cause-related features (rows) to search for matches.""" featureTablePos = (targetFeature.index << 1) for feature in self.featureList: delta = timestamp-feature.lastTriggerTime if delta <= 10.0: self.eventCountTable[featureTablePos] += 1 self.eventDeltaTable[featureTablePos] += int(delta*1000) featureTablePos += (self.parallelCheckCount << 1) featureTablePos = ((targetFeature.index*self.parallelCheckCount) << 1)+1 for feature in self.featureList: delta = timestamp-feature.lastTriggerTime if delta <= 10.0: self.eventCountTable[featureTablePos] += 1 self.eventDeltaTable[featureTablePos] -= int(delta*1000) featureTablePos += 2 def analysisStatusToString(self): """Get a string representation of all features.""" result = '' for feature in self.featureList: triggerCount = feature.triggerCount result += '%s (%d) e = %d:' % (feature.rule, feature.index, triggerCount) statPos = (self.parallelCheckCount*feature.index) << 1 for featurePos in range(0, len(self.featureList)): eventCount = self.eventCountTable[statPos] ratio = '-' if triggerCount != 0: ratio = '%.2e' % (float(eventCount)/triggerCount) delta = '-' if eventCount != 0: delta = '%.2e' % (float(self.eventDeltaTable[statPos])*0.001/eventCount) result += '\n %d: {c = %#6d r = %s dt = %s' % (featurePos, eventCount, ratio, delta) statPos += 1 eventCount = self.eventCountTable[statPos] ratio = '-' if triggerCount != 0: ratio = '%.2e' % (float(eventCount)/triggerCount) delta = '-' if eventCount != 0: delta = '%.2e' % (float(self.eventDeltaTable[statPos])*0.001/eventCount) result += ' c = %#6d r = %s dt = %s}' % (eventCount, ratio, delta) statPos += 1 result += '\n' return result def resetStatistics(self): """Reset all features.""" for feature in self.featureList: feature.creationTime = 0 feature.lastTriggerTime = 0 feature.triggerCount = 0 self.eventCountTable = [0]*self.parallelCheckCount*self.parallelCheckCount*2 self.eventDeltaTable = [0]*self.parallelCheckCount*self.parallelCheckCount*2 class CorrelationFeature: """This class defines a correlation feature.""" def __init__(self, rule, index, creationTime): self.rule = rule self.index = index self.creationTime = creationTime self.lastTriggerTime = 0.0 self.triggerCount = 0 deb-build/root/usr/lib/logdata-anomaly-miner/aminer/analysis/HistogramAnalysis.py0000600000000000000000000004373713354625143027253 0ustar rootroot"""This component performs a histogram analysis on one or more input properties. The properties are parsed values denoted by their parsing path. Those values are then handed over to the selected "binning function", that calculates the histogram bin. * Binning: Binning can be done using one of the predefined binning functions or by creating own subclasses from "HistogramAnalysis.BinDefinition". * LinearNumericBinDefinition: Binning function working on numeric values and sorting them into bins of same size. * ModuloTimeBinDefinition: Binning function working on parsed datetime values but applying a modulo function to them. This is useful for analysis of periodic activities. * Example: The following example creates a HistogramAnalysis using only the property "/model/line/time", binned on per-hour basis and sending a report every week: from aminer.analysis import HistogramAnalysis # Use a time-modulo binning function moduloTimeBinDefinition=HistogramAnalysis.ModuloTimeBinDefinition( 3600*24, # Modulo values in seconds (1 day) 3600, # Division factor to get down to reporting unit (1h) 0, # Start of lowest bin 1, # Size of bin in reporting units 24, # Number of bins False) # Disable outlier bins, not possible with time modulo histogramAnalysis=HistogramAnalysis.HistogramAnalysis( aminerConfig, [('/model/line/time', moduloTimeBinDefinition)], 3600*24*7, # Reporting interval (weekly) reportEventHandlers, # Send report to those handlers resetAfterReportFlag=True) # Zero counters after sending of report # Send the appropriate input feed to the component atomFilter.addHandler(histogramAnalysis) """ import time from aminer import AMinerConfig from aminer.AnalysisChild import AnalysisContext from aminer.input import AtomHandlerInterface from aminer.util import PersistencyUtil from aminer.util import TimeTriggeredComponentInterface binomialTest = None try: from scipy import stats binomialTest = stats.binom_test except: pass class BinDefinition(object): """This class defines the bins of the histogram.""" def __init__(self): raise Exception('Not implemented') def hasOutlierBins(self): """Report if this binning works with outlier bins, that are bins for all values outside the normal binning range. If not, outliers are discarded. When true, the outlier bins are the first and last bin.""" raise Exception('Not implemented') def getBinNames(self): """Get the names of the bins for reporting, including the outlier bins if any.""" raise Exception('Not implemented') def getBin(self, value): """Get the number of the bin this value should belong to. @return the bin number or None if the value is an outlier and outlier bins were not requested. With outliers, bin 0 is the bin with outliers below limit, first normal bin is at index 1.""" raise Exception('Not implemented') def getBinPValue(self, binPos, totalValues, binValues): """Calculate a p-Value, how likely the observed number of elements in this bin is. @return the value or None when not applicable.""" return None class LinearNumericBinDefinition(BinDefinition): """This class defines the linear numeric bins.""" def __init__(self, lowerLimit, binSize, binCount, outlierBinsFlag=False): self.lowerLimit = lowerLimit self.binSize = binSize self.binCount = binCount self.outlierBinsFlag = outlierBinsFlag self.binNames = None self.expectedBinRatio = 1.0/float(binCount) def hasOutlierBins(self): """Report if this binning works with outlier bins, that are bins for all values outside the normal binning range. If not, outliers are discarded. When true, the outlier bins are the first and last bin.""" return self.outlierBinsFlag def getBinNames(self): """Get the names of the bins for reporting, including the outlier bins if any.""" # Cache the names here so that multiple histograms using same # BinDefinition do not use separate copies of the strings. if self.binNames != None: return self.binNames self.binNames = [] if self.outlierBinsFlag: self.binNames.append('...-%s)' % self.lowerLimit) start = self.lowerLimit for binPos in range(1, self.binCount+1): end = self.lowerLimit+binPos*self.binSize self.binNames.append('[%s-%s)]' % (start, end)) start = end if self.outlierBinsFlag: self.binNames.append('[%s-...' % start) return self.binNames def getBin(self, value): """Get the number of the bin this value should belong to. @return the bin number or None if the value is an outlier and outlier bins were not requested. With outliers, bin 0 is the bin with outliers below limit, first normal bin is at index 1.""" if self.outlierBinsFlag: if value < self.lowerLimit: return 0 pos = int((value-self.lowerLimit)/self.binSize) if pos < self.binCount: return pos+1 return self.binCount+1 else: if value < self.lowerLimit: return None pos = int((value-self.lowerLimit)/self.binSize) if pos < self.binCount: return pos return None def getBinPValue(self, binPos, totalValues, binValues): """Calculate a p-Value, how likely the observed number of elements in this bin is. @return the value or None when not applicable.""" if binomialTest is None: return None if self.outlierBinsFlag: if (binPos == 0) or (binPos > self.binCount): return None return binomialTest(binValues, totalValues, self.expectedBinRatio) class ModuloTimeBinDefinition(LinearNumericBinDefinition): """This class defines the module time bins.""" def __init__(self, moduloValue, timeUnit, lowerLimit, binSize, binCount, outlierBinsFlag=False): super(ModuloTimeBinDefinition, self).__init__(lowerLimit, \ binSize, binCount, outlierBinsFlag) self.moduloValue = moduloValue self.timeUnit = timeUnit def getBin(self, value): """Get the number of the bin this value should belong to. @return the bin number or None if the value is an outlier and outlier bins were not requested. With outliers, bin 0 is the bin with outliers below limit, first normal bin is at index 1.""" timeValue = (value[1]%self.moduloValue)/self.timeUnit return super(ModuloTimeBinDefinition, self).getBin(timeValue) class HistogramData(): """This class defines the properties of one histogram to create and performs the accounting and reporting. When the Python scipy package is available, reports will also include probability score created using binomial testing.""" def __init__(self, propertyPath, binDefinition): """Create the histogram data structures. @param lowerLimit the lowest value included in the first bin.""" self.propertyPath = propertyPath self.binDefinition = binDefinition self.binNames = binDefinition.getBinNames() self.binData = [0]*(len(self.binNames)) self.hasOutlierBinsFlag = binDefinition.hasOutlierBins() self.totalElements = 0 self.binnedElements = 0 def addValue(self, value): """Add one value to the histogram.""" binPos = self.binDefinition.getBin(value) self.binData[binPos] += 1 self.totalElements += 1 if (self.hasOutlierBinsFlag) and (binPos != 0) and (binPos+1 != len(self.binNames)): self.binnedElements += 1 def reset(self): """Remove all values from this histogram.""" self.totalElements = 0 self.binnedElements = 0 self.binData = [0]*(len(self.binData)) def clone(self): """Clone this object so that calls to addValue do not influence the old object any more. This behavior is a mixture of shallow and deep copy.""" histogramData = HistogramData(self.propertyPath, self.binDefinition) histogramData.binNames = self.binNames histogramData.binData = self.binData[:] histogramData.totalElements = self.totalElements histogramData.binnedElements = self.binnedElements return histogramData def toString(self, indent): """Get a string representation of this histogram.""" result = '%sProperty "%s" (%d elements):' % (indent, self.propertyPath, self.totalElements) fElements = float(self.totalElements) baseElement = self.binnedElements if self.hasOutlierBinsFlag else self.totalElements for binPos in range(0, len(self.binData)): count = self.binData[binPos] if count == 0: continue pValue = self.binDefinition.getBinPValue(binPos, baseElement, count) if pValue is None: result += '\n%s* %s: %d (ratio = %.2e)' % (indent, self.binNames[binPos], \ count, float(count)/fElements) else: result += '\n%s* %s: %d (ratio = %.2e, p = %.2e)' % (indent, \ self.binNames[binPos], count, float(count)/fElements, \ pValue) return result class HistogramAnalysis(AtomHandlerInterface, TimeTriggeredComponentInterface): """This class creates a histogram for one or more properties extracted from a parsed atom.""" def __init__(self, aminerConfig, histogramDefs, reportInterval, reportEventHandlers, resetAfterReportFlag=True, peristenceId='Default'): """Initialize the analysis component. @param histogramDefs is a list of tuples containing the target property path to analyze and the BinDefinition to apply for binning. @param reportInterval delay in seconds between creation of two reports. The parameter is applied to the parsed record data time, not the system time. Hence reports can be delayed when no data is received.""" self.lastReportTime = None self.nextReportTime = 0.0 self.histogramData = [] for (path, binDefinition) in histogramDefs: self.histogramData.append(HistogramData(path, binDefinition)) self.reportInterval = reportInterval self.reportEventHandlers = reportEventHandlers self.resetAfterReportFlag = resetAfterReportFlag self.peristenceId = peristenceId self.nextPersistTime = None PersistencyUtil.addPersistableComponent(self) self.persistenceFileName = AMinerConfig.buildPersistenceFileName( aminerConfig, 'HistogramAnalysis', peristenceId) persistenceData = PersistencyUtil.loadJson(self.persistenceFileName) if persistenceData != None: raise Exception('No data reading, def merge yet') def receiveAtom(self, logAtom): matchDict = logAtom.parserMatch.getMatchDictionary() dataUpdatedFlag = False for dataItem in self.histogramData: match = matchDict.get(dataItem.propertyPath, None) if match is None: continue dataUpdatedFlag = True dataItem.addValue(match.matchObject) timestamp = logAtom.getTimestamp() if self.nextReportTime < timestamp: if self.lastReportTime is None: self.lastReportTime = timestamp self.nextReportTime = timestamp+self.reportInterval else: self.sendReport(timestamp) if (self.nextPersistTime is None) and (dataUpdatedFlag): self.nextPersistTime = time.time()+600 def getTimeTriggerClass(self): """Get the trigger class this component should be registered for. This trigger is used only for persistency, so real-time triggering is needed.""" return AnalysisContext.TIME_TRIGGER_CLASS_REALTIME def doTimer(self, triggerTime): """Check current ruleset should be persisted""" if self.nextPersistTime is None: return 600 delta = self.nextPersistTime-triggerTime if delta < 0: self.doPersist() delta = 600 return delta def doPersist(self): """Immediately write persistence data to storage.""" # PersistencyUtil.storeJson(self.persistenceFileName, list(self.knownPathSet)) self.nextPersistTime = None def sendReport(self, timestamp): """Sends a report to the event handlers.""" reportStr = 'Histogram report ' if self.lastReportTime is not None: reportStr += 'from %s ' % self.lastReportTime reportStr += 'till %s' % timestamp for dataItem in self.histogramData: reportStr += '\n'+dataItem.toString(' ') for listener in self.reportEventHandlers: listener.receiveEvent('Analysis.%s' % self.__class__.__name__, 'Histogram report', [], reportStr, self) if self.resetAfterReportFlag: for dataItem in self.histogramData: dataItem.reset() self.lastReportTime = timestamp self.nextReportTime = timestamp+self.reportInterval class PathDependentHistogramAnalysis(AtomHandlerInterface, TimeTriggeredComponentInterface): """This class provides a histogram analysis for only one property but separate histograms for each group of correlated match pathes. Assume there two pathes that include the requested property but they separate after the property was found on the path. Then objects of this class will produce 3 histograms: one for common path part including all occurences of the target property and one for each separate subpath, counting only those property values where the specific subpath was followed.""" def __init__(self, aminerConfig, propertyPath, binDefinition, reportInterval, reportEventHandlers, resetAfterReportFlag=True, peristenceId='Default'): """Initialize the analysis component. @param reportInterval delay in seconds between creation of two reports. The parameter is applied to the parsed record data time, not the system time. Hence reports can be delayed when no data is received.""" self.lastReportTime = None self.nextReportTime = 0.0 self.propertyPath = propertyPath self.binDefinition = binDefinition self.histogramData = {} self.reportInterval = reportInterval self.reportEventHandlers = reportEventHandlers self.resetAfterReportFlag = resetAfterReportFlag self.peristenceId = peristenceId self.nextPersistTime = None PersistencyUtil.addPersistableComponent(self) self.persistenceFileName = AMinerConfig.buildPersistenceFileName( aminerConfig, 'PathDependentHistogramAnalysis', peristenceId) persistenceData = PersistencyUtil.loadJson(self.persistenceFileName) if persistenceData is not None: raise Exception('No data reading, def merge yet') def receiveAtom(self, logAtom): matchDict = logAtom.parserMatch.getMatchDictionary() match = matchDict.get(self.propertyPath, None) if match is None: return matchValue = match.matchObject allPathSet = set(matchDict.keys()) unmappedPath = [] missingPathes = set() while allPathSet: path = allPathSet.pop() histogramMapping = self.histogramData.get(path, None) if histogramMapping is None: unmappedPath.append(path) continue # So the path is already mapped to one histogram. See if all pathes # to the given histogram are still in allPathSet. If not, a split # within the mapping is needed. for mappedPath in histogramMapping[0]: try: allPathSet.remove(mappedPath) except: if mappedPath != path: missingPathes.add(mappedPath) if not missingPathes: # Everything OK, just add the value to the mapping. histogramMapping[1].addValue(matchValue) histogramMapping[2] = logAtom.parserMatch else: # We need to split the current set here. Keep the current statistics # for all the missingPathes but clone the data for the remaining # pathes. newHistogram = histogramMapping[1].clone() newHistogram.addValue(matchValue) newPathSet = histogramMapping[0]-missingPathes newHistogramMapping = [newPathSet, newHistogram, logAtom.parserMatch] for mappedPath in newPathSet: self.histogramData[mappedPath] = newHistogramMapping histogramMapping[0] = missingPathes missingPathes = set() if unmappedPath: histogram = HistogramData(self.propertyPath, self.binDefinition) histogram.addValue(matchValue) newRecord = [set(unmappedPath), histogram, logAtom.parserMatch] for path in unmappedPath: self.histogramData[path] = newRecord timestamp = logAtom.getTimestamp() if self.nextReportTime < timestamp: if self.lastReportTime is None: self.lastReportTime = timestamp self.nextReportTime = timestamp+self.reportInterval else: self.sendReport(timestamp) if self.nextPersistTime is None: self.nextPersistTime = time.time()+600 def getTimeTriggerClass(self): """Get the trigger class this component should be registered for. This trigger is used only for persistency, so real-time triggering is needed.""" return AnalysisContext.TIME_TRIGGER_CLASS_REALTIME def doTimer(self, triggerTime): """Check current ruleset should be persisted""" if self.nextPersistTime is None: return 600 delta = self.nextPersistTime-triggerTime if delta < 0: self.doPersist() delta = 600 return delta def doPersist(self): """Immediately write persistence data to storage.""" # PersistencyUtil.storeJson(self.persistenceFileName, list(self.knownPathSet)) self.nextPersistTime = None def sendReport(self, timestamp): """Send report to event handlers.""" reportStr = 'Path histogram report ' if self.lastReportTime != None: reportStr += 'from %s ' % self.lastReportTime reportStr += 'till %s' % timestamp allPathSet = set(self.histogramData.keys()) while allPathSet: path = allPathSet.pop() histogramMapping = self.histogramData.get(path) for path in histogramMapping[0]: allPathSet.discard(path) reportStr += '\nPath values "%s":\nExample: %s\n%s' % ( '", "'.join(histogramMapping[0]), histogramMapping[2].matchElement.matchString, histogramMapping[1].toString(' ')) if self.resetAfterReportFlag: histogramMapping[1].reset() for listener in self.reportEventHandlers: listener.receiveEvent('Analysis.%s' % self.__class__.__name__, \ 'Histogram report', [], reportStr, self) self.lastReportTime = timestamp self.nextReportTime = timestamp+self.reportInterval deb-build/root/usr/lib/logdata-anomaly-miner/aminer/analysis/AtomFilters.py0000644000000000000000000001000113352721067026025 0ustar rootroot"""This file collects various classes useful to filter log atoms and pass them to different handlers.""" from aminer.input import AtomHandlerInterface class SubhandlerFilter(AtomHandlerInterface): """Handlers of this class pass the received atoms to one or more subhandlers. Depending on configuration, the atom is passed to all subhandlers or only up to the first suitable to handle the atom.""" def __init__(self, subhandlerList, stopWhenHandledFlag=False): """@param subhandlerList when not None, initialize this filter with the given list of handlers.""" if subhandlerList is None: self.subhandlerList = [] else: if (not isinstance(subhandlerList, list)) or \ (not all(isinstance(handler, AtomHandlerInterface) for handler in subhandlerList)): raise Exception('Only subclasses of AtomHandlerInterface allowed in subhandlerList') self.subhandlerList = [None]*len(subhandlerList) for handlerPos, handlerElement in enumerate(subhandlerList): self.subhandlerList[handlerPos] = (handlerElement, stopWhenHandledFlag) def addHandler(self, atomHandler, stopWhenHandledFlag=False): """Add a handler to the list of handlers.""" self.subhandlerList.append((atomHandler, stopWhenHandledFlag)) def receiveAtom(self, logAtom): """Pass the atom to the subhandlers. @return false when no subhandler was able to handle the atom.""" result = False for handler, stopWhenHandledFlag in self.subhandlerList: handlerResult = handler.receiveAtom(logAtom) if handlerResult is True: result = True if stopWhenHandledFlag: break return result class MatchPathFilter(AtomHandlerInterface): """This class just splits incoming matches according to existance of pathes in the match.""" def __init__(self, parsedAtomHandlerLookupList, defaultParsedAtomHandler): """Initialize the filter. @param parsedAtomHandlerLookupList has to contain tuples with search path string and handler. When the handler is None, the filter will just drop a received atom without forwarding. @param defaultParsedAtomHandler invoke this handler when no handler was found for given match path or do not invoke any handler when None.""" self.parsedAtomHandlerLookupList = parsedAtomHandlerLookupList self.defaultParsedAtomHandler = defaultParsedAtomHandler def receiveAtom(self, logAtom): """Receive an atom and pass it to the subhandlers. @return False when logAtom did not contain match data or was not forwarded to any handler, True otherwise.""" if logAtom.parserMatch is None: return False matchDict = logAtom.parserMatch.getMatchDictionary() for pathName, targetHandler in self.parsedAtomHandlerLookupList: if pathName in matchDict: if targetHandler is not None: targetHandler.receiveAtom(logAtom) return True if self.defaultParsedAtomHandler is None: return False self.defaultParsedAtomHandler.receiveAtom(logAtom) return True class MatchValueFilter(AtomHandlerInterface): """This class just splits incoming matches using a given match value and forward them to different handlers.""" def __init__(self, targetPath, parsedAtomHandlerDict, defaultParsedAtomHandler): """Initialize the splitter. @param defaultParsedAtomHandler invoke this default handler when no value handler was found or do not invoke any handler when None.""" self.targetPath = targetPath self.parsedAtomHandlerDict = parsedAtomHandlerDict self.defaultParsedAtomHandler = defaultParsedAtomHandler def receiveAtom(self, logAtom): if logAtom.parserMatch is None: return False targetValue = logAtom.parserMatch.getMatchDictionary().get(self.targetPath, None) if targetValue is not None: targetValue = targetValue.matchObject targetHandler = self.parsedAtomHandlerDict.get(targetValue, \ self.defaultParsedAtomHandler) if targetHandler is None: return False targetHandler.receiveAtom(logAtom) return True deb-build/root/usr/lib/logdata-anomaly-miner/aminer/analysis/WhitelistViolationDetector.py0000600000000000000000000000244013346662174031135 0ustar rootroot"""This module defines a detector for log atoms not matching any whitelisted rule.""" from aminer.input import AtomHandlerInterface class WhitelistViolationDetector(AtomHandlerInterface): """Objects of this class handle a list of whitelist rules to ensure, that each received log-atom is at least covered by a single whitelist rule. To avoid traversing the complete rule tree more than once, the whitelist rules may have match actions attached that set off an alarm by themselves.""" def __init__(self, whitelistRules, anomalyEventHandlers): """Initialize the detector. @param whitelistRules list of rules executed in same way as inside Rules.OrMatchRule.""" self.whitelistRules = whitelistRules self.anomalyEventHandlers = anomalyEventHandlers def receiveAtom(self, logAtom): """Receive on parsed atom and the information about the parser match. @param logAtom atom with parsed data to check @return True when logAtom is whitelisted, False otherwise.""" for rule in self.whitelistRules: if rule.match(logAtom): return True for listener in self.anomalyEventHandlers: listener.receiveEvent('Analysis.%s' % self.__class__.__name__, \ 'No whitelisting for current atom', [logAtom.rawData], logAtom, self) return False deb-build/root/usr/lib/logdata-anomaly-miner/aminer/analysis/NewMatchPathDetector.py0000600000000000000000000000743613346662316027627 0ustar rootroot"""This module defines a detector for new data paths.""" import time from aminer import AMinerConfig from aminer.AnalysisChild import AnalysisContext from aminer.events import EventSourceInterface from aminer.input import AtomHandlerInterface from aminer.util import TimeTriggeredComponentInterface from aminer.util import PersistencyUtil class NewMatchPathDetector(AtomHandlerInterface, \ TimeTriggeredComponentInterface, EventSourceInterface): """This class creates events when new data path was found in a parsed atom.""" def __init__(self, aminerConfig, anomalyEventHandlers, \ peristenceId='Default', autoIncludeFlag=False): """Initialize the detector. This will also trigger reading or creation of persistence storage location.""" self.anomalyEventHandlers = anomalyEventHandlers self.autoIncludeFlag = autoIncludeFlag self.nextPersistTime = None PersistencyUtil.addPersistableComponent(self) self.persistenceFileName = AMinerConfig.buildPersistenceFileName( aminerConfig, self.__class__.__name__, peristenceId) persistenceData = PersistencyUtil.loadJson(self.persistenceFileName) if persistenceData is None: self.knownPathSet = set() else: self.knownPathSet = set(persistenceData) def receiveAtom(self, logAtom): """Receive on parsed atom and the information about the parser match. @param logAtom the parsed log atom @return True if this handler was really able to handle and process the match. Depending on this information, the caller may decide if it makes sense passing the parsed atom also to other handlers.""" unknownPathList = [] for path in logAtom.parserMatch.getMatchDictionary().keys(): if path not in self.knownPathSet: unknownPathList.append(path) if self.autoIncludeFlag: self.knownPathSet.add(path) if unknownPathList: if self.nextPersistTime is None: self.nextPersistTime = time.time()+600 for listener in self.anomalyEventHandlers: listener.receiveEvent('Analysis.%s' % self.__class__.__name__, \ 'New path(es) %s ' % (', '.join(unknownPathList)), \ [logAtom.rawData], [logAtom, unknownPathList], self) return True def getTimeTriggerClass(self): """Get the trigger class this component can be registered for. This detector only needs persisteny triggers in real time.""" return AnalysisContext.TIME_TRIGGER_CLASS_REALTIME def doTimer(self, triggerTime): """Check current ruleset should be persisted""" if self.nextPersistTime is None: return 600 delta = self.nextPersistTime-triggerTime if delta <= 0: self.doPersist() delta = 600 return delta def doPersist(self): """Immediately write persistence data to storage.""" PersistencyUtil.storeJson(self.persistenceFileName, list(self.knownPathSet)) self.nextPersistTime = None def whitelistEvent(self, eventType, sortedLogLines, eventData, \ whitelistingData): """Whitelist an event generated by this source using the information emitted when generating the event. @return a message with information about whitelisting @throws Exception when whitelisting of this special event using given whitelistingData was not possible.""" if eventType != 'Analysis.%s' % self.__class__.__name__: raise Exception('Event not from this source') if whitelistingData is not None: raise Exception('Whitelisting data not understood by this detector') whitelistedStr = '' for pathName in eventData[1]: if pathName in self.knownPathSet: continue self.knownPathSet.add(pathName) if whitelistedStr: whitelistedStr += ', ' whitelistedStr += repr(pathName) return 'Whitelisted path(es) %s in %s' % (whitelistedStr, sortedLogLines[0]) deb-build/root/usr/lib/logdata-anomaly-miner/aminer/analysis/MatchValueStreamWriter.py0000600000000000000000000000362513346663121030203 0ustar rootroot"""This module dfines a writer that forwards match information to a stream.""" from aminer.AnalysisChild import AnalysisContext from aminer.input import AtomHandlerInterface from aminer.util import TimeTriggeredComponentInterface class MatchValueStreamWriter(AtomHandlerInterface, TimeTriggeredComponentInterface): """This class extracts values from a given match and writes them to a stream. This can be used to forward these values to another program (when stream is a wrapped network socket) or to a file for further analysis. A stream is used instead of a file descriptor to increase performance. To flush it from time to time, add the writer object also to the time trigger list.""" def __init__(self, stream, matchValuePathList, separatorString, missingValueString): """Initialize the writer.""" self.stream = stream self.matchValuePathList = matchValuePathList self.separatorString = separatorString self.missingValueString = missingValueString def receiveAtom(self, logAtom): """Forward match value information to the stream.""" matchDict = logAtom.parserMatch.getMatchDictionary() addSepFlag = False result = b'' for path in self.matchValuePathList: if addSepFlag: result += self.separatorString match = matchDict.get(path, None) if match is None: result += self.missingValueString else: result += match.matchString addSepFlag = True self.stream.write(result) self.stream.write('\n') def getTimeTriggerClass(self): """Get the trigger class this component should be registered for. This trigger is used only for persistency, so real-time triggering is needed.""" return AnalysisContext.TIME_TRIGGER_CLASS_REALTIME def doTimer(self, triggerTime): """Flush the timer.""" self.stream.flush() return 10 def doPersist(self): """Flush the timer.""" self.stream.flush() deb-build/root/usr/lib/logdata-anomaly-miner/aminer/analysis/TimestampsUnsortedDetector.py0000600000000000000000000000407213330052277031137 0ustar rootroot"""This module defines a detector for unsorted timestamps.""" from aminer.events import EventSourceInterface from aminer.input import AtomHandlerInterface class TimestampsUnsortedDetector(AtomHandlerInterface, EventSourceInterface): """This class creates events when unsorted timestamps are detected. This is useful mostly to detect algorithm malfunction or configuration errors, e.g. invalid timezone configuration.""" def __init__(self, anomalyEventHandlers, exitOnErrorFlag=False): """Initialize the detector.""" self.anomalyEventHandlers = anomalyEventHandlers self.lastTimestamp = 0 self.exitOnErrorFlag = exitOnErrorFlag def receiveAtom(self, logAtom): """Receive on parsed atom and the information about the parser match. @param logAtom the parsed log atom @return True if this handler was really able to handle and process the match. Depending on this information, the caller may decide if it makes sense passing the parsed atom also to other handlers.""" timestamp = logAtom.getTimestamp() if timestamp is None: return False if timestamp < self.lastTimestamp: for listener in self.anomalyEventHandlers: listener.receiveEvent('Analysis.%s' % self.__class__.__name__, \ 'Timestamp %s below %s ' % (timestamp, self.lastTimestamp), \ [logAtom.rawData], [logAtom], self) if self.exitOnErrorFlag: import sys sys.exit(1) self.lastTimestamp = timestamp return True def whitelistEvent(self, eventType, sortedLogLines, eventData, whitelistingData): """Whitelist an event generated by this source using the information emitted when generating the event. @return a message with information about whitelisting @throws Exception when whitelisting of this special event using given whitelistingData was not possible.""" if eventType != 'Analysis.%s' % self.__class__.__name__: raise Exception('Event not from this source') raise Exception('No whitelisting for algorithm malfunction or configuration errors') deb-build/root/usr/lib/logdata-anomaly-miner/aminer/util/0000755000000000000000000000000013354626162022366 5ustar rootrootdeb-build/root/usr/lib/logdata-anomaly-miner/aminer/util/__init__.py0000600000000000000000000001407313354625400024466 0ustar rootroot"""This module contains various methods and class definitions useful for various components from parsing, analysis and event handling. Larger separate blocks of code should be split into own subfiles or submodules, e.g. persistency.""" import random from aminer.input import AtomHandlerInterface def getLogInt(maxBits): """Get a log-distributed random integer integer in range 0 to maxBits-1.""" randBits = random.randint(0, (1 << maxBits)-1) result = 0 while (randBits&1) != 0: result += 1 randBits >>= 1 return result def decodeStringAsByteString(string): """Decodes a string produced by the encode function encodeByteStringAsString(byteString) below. @return string.""" decoded = b'' count = 0 while count < len(string): if string[count] in 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRS' \ 'TUVWXYZ1234567890!"#$&\'()*+,-./:;<=>?@[]\\^_`' \ '{}|~ ': decoded += bytes(string[count], 'ascii') count += 1 elif string[count] == '%': decoded += bytearray((int(string[count+1:count+3], 16),)) count += 3 else: raise Exception('Invalid encoded character') return decoded def encodeByteStringAsString(byteString): """Encodes an arbitrary byte string to a string by replacing all non ascii-7 bytes and all non printable ascii-7 bytes and % character by replacing with their escape sequence %[hex]. For example byte string b'/\xc3' is encoded to '/%c3' @return a string with decoded name.""" encoded = '' for byte in byteString: if byte in b'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRS' \ b'TUVWXYZ1234567890!"#$&\'()*+,-./:;<=>?@[]\\^_`' \ b'{}|~ ': encoded += chr(byte) else: encoded += '%%%02x' % byte return encoded class ObjectHistory(object): """This is the superinterface of all object histories. The idea behind that is to use that type of history best suited for a purpose considering amount of data, possibility for history size limits to be reached, priorization which elements should be dropped first.""" def addObject(self, newObject): """Add an object to this history. This method call may evict other objects from the history.""" raise Exception('Interface method called') def getHistory(self): """Get the whole history list. Make sure to clone the list before modification when influences on this object are not intended.""" raise Exception('Interface method called') def clearHistory(self): """Clean the whole history.""" raise Exception('Interface method called') class LogarithmicBackoffHistory(ObjectHistory): """This class keeps a history list of items with logarithmic storage characteristics. When adding objects, the list will be filled to the maximum size with the newest items at the end. When filled, adding a new element will move with probability 1/2 the last element to the next lower position before putting the new item to the end position. With a chance of 1/4, the last 2 elements are moved, with 1/8 the last 3, ... Thus the list will in average span a time range of 2^maxItems items with growing size of holes towards the earliest element.""" def __init__(self, maxItems, initialList=None): self.maxItems = maxItems if initialList is None: initialList = [] else: initialList = initialList[:maxItems] self.history = initialList def addObject(self, newObject): """Add a new object to the list according to the rules described in the class docstring.""" if len(self.history) < self.maxItems: self.history.append(newObject) else: movePos = getLogInt(self.maxItems) if movePos != 0: self.history = self.history[:self.maxItems-movePos]+ \ self.history[self.maxItems+1-movePos:]+[newObject] else: self.history[-1] = newObject def getHistory(self): """Get the whole history list. Make sure to clone the list before modification when influences on this object are not intended.""" return self.history def clearHistory(self): """Clean the whole history.""" self.history[:] = [] class TimeTriggeredComponentInterface(object): """This is the common interface of all components that can be registered to receive timer interrupts. There might be different timelines for triggering, real time and normalized log data time scale for forensic analysis. For forensic analyis different timers might be available to register a component. Therefore the component should state, which type of triggering it would require.""" def getTimeTriggerClass(self): """Get the trigger class this component can be registered for. See AnalysisContext class for different trigger classes available.""" raise Exception('Interface method called') def doTimer(self, triggerTime): """This method is called to perform trigger actions and to determine the time for next invocation. The caller may decide to invoke this method earlier than requested during the previous call. Classes implementing this method have to handle such cases. Each class should try to limit the time spent in this method as it might delay trigger signals to other components. For extensive compuational work or IO, a separate thread should be used. @param triggerTime the time this trigger is invoked. This might be the current real time when invoked from real time timers or the forensic log timescale time value. @return the number of seconds when next invocation of this trigger is required.""" raise Exception('Interface method called') class VolatileLogarithmicBackoffAtomHistory(AtomHandlerInterface, LogarithmicBackoffHistory): """This class is a volatile filter to keep a history of log atoms, e.g. for analysis by other components or for external access via remote control interface.""" def __init__(self, maxItems): """Initialize the history component.""" LogarithmicBackoffHistory.__init__(self, maxItems) def receiveAtom(self, logAtom): """Receive an atom and add it to the history log.""" self.addObject(logAtom) return True deb-build/root/usr/lib/logdata-anomaly-miner/aminer/util/JsonUtil.py0000644000000000000000000000344013354625572024514 0ustar rootroot"""This module converts json strings to object structures also supporting byte array structures.""" import json from aminer.util import encodeByteStringAsString, decodeStringAsByteString def dumpAsJson(inputObject): """Dump an input object encoded as string""" return json.dumps(encodeObject(inputObject)) def loadJson(inputString): """Load an string encoded as object structure""" return decodeObject(json.loads(inputString)) def encodeObject(term): """@param encodedObject return an object encoded as string""" encodedObject = '' if isinstance(term, str): encodedObject = 'string:' + term elif isinstance(term, bytes): encodedObject = 'bytes:' + encodeByteStringAsString(term) elif isinstance(term, (list, tuple)): encodedObject = [encodeObject(item) for item in term] elif isinstance(term, dict): encodedObject = {} for key, var in term.items(): key = encodeObject(key) var = encodeObject(var) encodedObject[key] = var elif isinstance(term, (bool, int, None)): encodedObject = term else: raise Exception('Unencodeable object %s' % type(term)) return encodedObject def decodeObject(term): """@param decodedObject return a string decoded as object structure""" decodedObject = '' if isinstance(term, str) and term.startswith('string:'): decodedObject = term[7:] elif isinstance(term, str) and term.startswith('bytes:'): decodedObject = term[6:] decodedObject = decodeStringAsByteString(decodedObject) elif isinstance(term, list): decodedObject = [decodeObject(item) for item in term] elif isinstance(term, dict): decodedObject = {} for key, var in term.items(): key = decodeObject(key) var = decodeObject(var) decodedObject[key] = var else: decodedObject = term return decodedObject deb-build/root/usr/lib/logdata-anomaly-miner/aminer/util/PersistencyUtil.py0000600000000000000000000000672113352650503026076 0ustar rootroot"""This module defines functions for reading and writing files in a secure way.""" import errno import os import sys import time from aminer.util import SecureOSFunctions from aminer.util import JsonUtil # Have a registry of all persistable components. Those might be # happy to be invoked before python process is terminating. persistableComponents = [] def addPersistableComponent(component): """Add a component to the registry of all persistable components.""" persistableComponents.append(component) def openPersistenceFile(fileName, flags): """This function opens the given persistence file. When O_CREAT was specified, the function will attempt to create the directories too.""" if isinstance(fileName, str): fileName = fileName.encode() try: fd = SecureOSFunctions.secureOpenFile(fileName, flags) return fd except OSError as openOsError: if ((flags&os.O_CREAT) == 0) or (openOsError.errno != errno.ENOENT): raise openOsError # Find out, which directory is missing by stating our way up. dirNameLength = fileName.rfind(b'/') if dirNameLength > 0: os.makedirs(fileName[:dirNameLength]) return SecureOSFunctions.secureOpenFile(fileName, flags) def createTemporaryPersistenceFile(fileName): """Create a temporary file within persistence directory to write new persistence data to it. Thus the old data is not modified, any error creating or writing the file will not harm the old state.""" fd = None while True: # FIXME: This should use O_TMPFILE, but not yet available. That would # obsolete the loop also. fd = openPersistenceFile('%s.tmp-%f' % (fileName, time.time()), \ os.O_WRONLY|os.O_CREAT|os.O_EXCL) break return fd noSecureLinkUnlinkAtWarnOnceFlag = True def replacePersistenceFile(fileName, newFileHandle): """Replace the named file with the file refered by the handle.""" global noSecureLinkUnlinkAtWarnOnceFlag if noSecureLinkUnlinkAtWarnOnceFlag: print('WARNING: SECURITY: unsafe unlink (unavailable unlinkat/linkat should be used, but \ not available in python)', file=sys.stderr) noSecureLinkUnlinkAtWarnOnceFlag = False try: os.unlink(fileName) except OSError as openOsError: if openOsError.errno != errno.ENOENT: raise openOsError tmpFileName = os.readlink('/proc/self/fd/%d' % newFileHandle) os.link(tmpFileName, fileName) os.unlink(tmpFileName) def persistAll(): """Persist all persistable components in the registry.""" for component in persistableComponents: component.doPersist() def loadJson(fileName): """Load persistency data from file. @return None if file did not yet exist.""" persistenceData = None try: persistenceFileHandle = openPersistenceFile(fileName, os.O_RDONLY|os.O_NOFOLLOW) persistenceData = os.read(persistenceFileHandle, os.fstat(persistenceFileHandle).st_size) persistenceData = str(persistenceData, 'utf-8') os.close(persistenceFileHandle) except OSError as openOsError: if openOsError.errno != errno.ENOENT: raise openOsError return None result = None try: result = JsonUtil.loadJson(persistenceData) except ValueError as valueError: raise Exception('Corrupted data in %s' % fileName, valueError) return result def storeJson(fileName, objectData): """Store persistency data to file.""" persistenceData = JsonUtil.dumpAsJson(objectData) fd = createTemporaryPersistenceFile(fileName) os.write(fd, bytes(persistenceData, 'utf-8')) replacePersistenceFile(fileName, fd) os.close(fd) deb-build/root/usr/lib/logdata-anomaly-miner/aminer/util/SecureOSFunctions.py0000600000000000000000000001170313354626162026313 0ustar rootroot"""This module defines functions for secure file handling.""" import os import socket import struct import sys # Those should go away as soon as Python (or aminer via libc) # provides those functions. noSecureOpenWarnOnceFlag = True def secureOpenFile(fileName, flags, trustedRoot='/'): """Secure opening of a file with given flags. This call will refuse to open files where any path component is a symlink. As operating system does not provide any means to do that, open the fileName directory by directory. It also adds O_NOCTTY to the flags as controlling TTY logics as this is just an additional risk and does not make sense for opening of log files. @param fileName is the fileName as byte string @param trustedRoot Opening this directory is deemed safe by default.""" if not fileName.startswith(b'/'): raise Exception('Secure open on relative path not supported') if (fileName.endswith(b'/')) and ((flags&os.O_DIRECTORY) == 0): raise Exception('Opening directory but O_DIRECTORY flag missing') # This code would allow secure open but openat is not available # in python2 series. A long way to go, but keep it here for the # python3 port to come. # if trustedRoot=='/': # fileName = fileName[1:] # else: # if (not fileName.startswith(trustedRoot)) or (fileName[len(trustedRoot)] != '/'): # raise Exception('File name not within trusted root') # fileName = fileName[len(trustedRoot)+1:] # # dirFd = os.open(trustedRoot, os.O_RDONLY|os.O_DIRECTORY|os.O_NOFOLLOW|os.O_NOCTTY) # lastPathPart = None # Open all path parts excluding the last one only as directory. # This will prevent us from opening something unexpected if a # user would move around directories while traversing. # for part in fileName.split['/']: # if len(part)==0: continue # if lastPathPart is not None: # nextFd = os.openat(dirFd, os.O_RDONLY|os.O_DIRECTORY|os.O_NOFOLLOW|os.O_NOCTTY) # os.close(dirFd) # dirFd = nextFd # lastPathPart = part # if lastPathPart is None: lastPathPart = '.' # result = os.openat(dirFd, lastPathPart, flags|os.O_NOFOLLOW|os.O_NOCTTY) # os.close(dirFd) # return(result) global noSecureOpenWarnOnceFlag if noSecureOpenWarnOnceFlag: print('WARNING: SECURITY: No secure open yet due to missing openat in python!', file=sys.stderr) noSecureOpenWarnOnceFlag = False return os.open(fileName, flags|os.O_NOFOLLOW|os.O_NOCTTY) def sendAnnotatedFileDescriptor(sendSocket, sendFd, typeInfo, annotationData): """Send file descriptor and associated annotation data via SCM_RIGHTS. @param typeInfo has to be a null-byte free string to inform the receiver how to handle the file descriptor and how to interpret the annotationData. @param annotationData this optional byte array may convey additional information about the file descriptor.""" # Construct the message data first if isinstance(typeInfo, str): typeInfo = typeInfo.encode() if isinstance(annotationData, str): annotationData = annotationData.encode() if typeInfo.find(b'\x00') >= 0: raise Exception('Null bytes not supported in typeInfo') messageData = b'%s\x00%s' % (typeInfo, annotationData) sendSocket.sendmsg( [messageData], [(socket.SOL_SOCKET, socket.SCM_RIGHTS, struct.pack('i', sendFd))]) def sendLogstreamDescriptor(sendSocket, sendFd, sendFileName): """Send a file descriptor to be used as standard log data stream source for the analysis pipeline.""" sendAnnotatedFileDescriptor(sendSocket, sendFd, b'logstream', sendFileName) def receiveAnnotedFileDescriptor(receiveSocket): """Receive a single file descriptor and attached annotation information via SCM_RIGHTS via the given socket. The method may raise an Exception when invoked on non-blocking sockets and no messages available. @return a tuple containing the received file descriptor, type information (see sendAnnotatedFileDescriptor) and the annotation information.""" messageData, ancData, flags, remoteAddress = receiveSocket.recvmsg( 1<<16, socket.CMSG_LEN(struct.calcsize('i'))) if len(ancData) != 1: raise Exception( 'Received %d sets of ancillary data instead of 1' % len(ancData)) cmsg_level, cmsg_type, cmsg_data = ancData[0] if (cmsg_level != socket.SOL_SOCKET) or (cmsg_type != socket.SCM_RIGHTS): raise Exception('Received invalid message from remote side') # Do not accept multiple or unaligned FDs. if len(cmsg_data) != 4: raise Exception( 'Unsupported control message length %d' % len(cmsg_data)) receivedFd = struct.unpack('i', cmsg_data)[0] splitPos = messageData.find(b'\x00') if splitPos < 0: raise Exception('No null byte in received message') typeInfo = messageData[:splitPos] annotationData = messageData[splitPos+1:] if receivedFd <= 2: print('WARNING: received "reserved" fd %d' % receivedFd, file=sys.stderr) if isinstance(typeInfo, str): typeInfo = typeInfo.encode() if isinstance(annotationData, str): annotationData = annotationData.encode() return(receivedFd, typeInfo, annotationData) deb-build/root/usr/lib/logdata-anomaly-miner/aminer/AnalysisChild.py0000600000000000000000000006521213354626364024514 0ustar rootroot"""This module contains classes for execution of AMiner child process main analysis loop.""" import base64 import errno import fcntl import json import os import select import socket import struct import sys import time import traceback from aminer import AMinerConfig from aminer.input.LogStream import LogStream from aminer.util import PersistencyUtil from aminer.util import SecureOSFunctions from aminer.util import TimeTriggeredComponentInterface from aminer.util import JsonUtil class AnalysisContext(object): """This class collects information about the current analysis context to access it during analysis or remote management.""" TIME_TRIGGER_CLASS_REALTIME = 1 TIME_TRIGGER_CLASS_ANALYSISTIME = 2 def __init__(self, aminerConfig): self.aminerConfig = aminerConfig # This is the factory to create atomiziers for incoming data streams # and link them to the analysis pipeline. self.atomizerFactory = None # This is the current log processing and analysis time regarding # the data stream being analyzed. While None, the analysis time # e.g. used to trigger components (see analysisTimeTriggeredComponents), # is the same as current system time. For forensic analysis this # time has to be updated to values derived from the log data input # to reflect the current log processing time, which will be in # the past and may progress much faster than real system time. self.analysisTime = None # Keep a registry of all analysis and filter configuration for # later use. Remote control interface may then access them for # runtime reconfiguration. self.nextRegistryId = 0 self.registeredComponents = {} # Keep also a list of components by name. self.registeredComponentsByName = {} # Keep lists of components that should receive timer interrupts # when real time or analysis time has elapsed. self.realTimeTriggeredComponents = [] self.analysisTimeTriggeredComponents = [] def addTimeTriggeredComponent(self, component, triggerClass=None): """Add a time-triggered component to the registry.""" if not isinstance(component, TimeTriggeredComponentInterface): raise Exception('Attempting to register component of class ' \ '%s not implementing aminer.util.TimeTriggeredComponentInterface' % ( component.__class__.__name__)) if triggerClass is None: triggerClass = component.getTimeTriggerClass() if triggerClass == AnalysisContext.TIME_TRIGGER_CLASS_REALTIME: self.realTimeTriggeredComponents.append(component) elif triggerClass == AnalysisContext.TIME_TRIGGER_CLASS_ANALYSISTIME: self.analysisTimeTriggeredComponents.append(component) else: raise Exception('Attempting to timer component for unknown class %s' % triggerClass) def registerComponent( self, component, componentName=None, registerTimeTriggerClassOverride=None): """Register a new component. A component implementing the TimeTriggeredComponentInterface will also be added to the appropriate lists unless registerTimeTriggerClassOverride is specified. @param componentName when not none, the component is also added to the named components. When a component with the same name was already registered, this will cause an error. @param registerTimeTriggerClassOverride if not none, ignore the time trigger class supplied by the component and register it for the classes specified in the override list. Use an empty list to disable registration.""" if (componentName != None) and (componentName in self.registeredComponentsByName): raise Exception('Component with same name already registered') if (registerTimeTriggerClassOverride != None) and \ (not isinstance(component, TimeTriggeredComponentInterface)): raise Exception('Requesting override on component not implementing ' \ 'TimeTriggeredComponentInterface') self.registeredComponents[self.nextRegistryId] = (component, componentName) self.nextRegistryId += 1 if componentName != None: self.registeredComponentsByName[componentName] = component if isinstance(component, TimeTriggeredComponentInterface): if registerTimeTriggerClassOverride is None: self.addTimeTriggeredComponent(component) else: for triggerClass in registerTimeTriggerClassOverride: self.addTimeTriggeredComponent(component, triggerClass) def getRegisteredComponentIds(self): """Get a list of currently known component IDs.""" return self.registeredComponents.keys() def getComponentById(self, idString): """Get a component by ID. @return None if not found.""" componentInfo = self.registeredComponents.get(idString, None) if componentInfo is None: return None return componentInfo[0] def getRegisteredComponentNames(self): """Get a list of currently known component names.""" return list(self.registeredComponentsByName.keys()) def getComponentByName(self, name): """Get a component by name. @return None if not found.""" return self.registeredComponentsByName.get(name, None) def buildAnalysisPipeline(self): """Convenience method to create the pipeline.""" self.aminerConfig.buildAnalysisPipeline(self) class AnalysisChild(TimeTriggeredComponentInterface): """This class defines the child performing the complete analysis workflow. When splitting privileges between analysis and monitor process, this class should only be initialized within the analysis process!""" def __init__(self, programName, aminerConfig): self.programName = programName self.analysisContext = AnalysisContext(aminerConfig) self.runAnalysisLoopFlag = True self.logStreamsByName = {} self.persistenceFileName = AMinerConfig.buildPersistenceFileName( self.analysisContext.aminerConfig, self.__class__.__name__+'/RepositioningData') self.nextPersistTime = time.time()+600 self.repositioningDataDict = {} self.masterControlSocket = None self.remoteControlSocket = None # This dictionary provides a lookup list from file descriptor # to associated object for handling the data to and from the given # descriptor. Currently supported handler objects are: # * Parent process socket # * Remote control listening socket # * LogStreams # * Remote control connections self.trackedFdsDict = {} # Override the signal handler to allow graceful shutdown. def gracefulShutdownHandler(_signo, _stackFrame): """This is the signal handler function to react on typical shutdown signals.""" print('%s: caught signal, shutting down' % programName, file=sys.stderr) self.runAnalysisLoopFlag = False import signal signal.signal(signal.SIGHUP, gracefulShutdownHandler) signal.signal(signal.SIGINT, gracefulShutdownHandler) signal.signal(signal.SIGTERM, gracefulShutdownHandler) # Do this on at the end of the initialization to avoid having # partially initialized objects inside the registry. self.analysisContext.addTimeTriggeredComponent(self) def runAnalysis(self, masterFd): """This method runs the analysis thread. @param masterFd the main communication socket to the parent to receive logfile updates from the parent. @return 0 on success, e.g. normal termination via signal or 1 on error.""" # The masterControlSocket is the socket to communicate with the # master process to receive commands or logstream data. Expect # the parent/child communication socket on fd 3. This also duplicates # the fd, so close the old one. self.masterControlSocket = socket.fromfd( masterFd, socket.AF_UNIX, socket.SOCK_DGRAM, 0) os.close(masterFd) self.trackedFdsDict[self.masterControlSocket.fileno()] = \ self.masterControlSocket # Locate the real analysis configuration. self.analysisContext.buildAnalysisPipeline() if self.analysisContext.atomizerFactory is None: print('FATAL: buildAnalysisPipeline() did ' \ 'not initialize atomizerFactory, terminating', file=sys.stderr) return 1 realTimeTriggeredComponents = self.analysisContext.realTimeTriggeredComponents analysisTimeTriggeredComponents = self.analysisContext.analysisTimeTriggeredComponents # Load continuation data for last known log streams. The loaded # data has to be a dictionary with repositioning information for # each stream. The data is used only when creating the first stream # with that name. self.repositioningDataDict = PersistencyUtil.loadJson( self.persistenceFileName) if self.repositioningDataDict is None: self.repositioningDataDict = {} # A list of LogStreams where handleStream() blocked due to downstream # not being able to consume the data yet. blockedLogStreams = [] # Always start when number is None. nextRealTimeTriggerTime = None nextAnalysisTimeTriggerTime = None delayedReturnStatus = 0 while self.runAnalysisLoopFlag: # Build the list of inputs to select for anew each time: the LogStream # file descriptors may change due to rollover. inputSelectFdList = [] outputSelectFdList = [] for fdHandlerObject in self.trackedFdsDict.values(): if isinstance(fdHandlerObject, LogStream): streamFd = fdHandlerObject.getCurrentFd() if streamFd < 0: continue inputSelectFdList.append(streamFd) elif isinstance(fdHandlerObject, AnalysisChildRemoteControlHandler): fdHandlerObject.addSelectFds( inputSelectFdList, outputSelectFdList) else: # This has to be a socket, just add the file descriptor. inputSelectFdList.append(fdHandlerObject.fileno()) # Loop over the list in reverse order to avoid skipping elements # in remove. for logStream in reversed(blockedLogStreams): currentStreamFd = logStream.handleStream() if currentStreamFd >= 0: self.trackedFdsDict[currentStreamFd] = logStream inputSelectFdList.append(currentStreamFd) blockedLogStreams.remove(logStream) readList = None writeList = None exceptList = None try: (readList, writeList, exceptList) = select.select( inputSelectFdList, outputSelectFdList, [], 1) except select.error as selectError: # Interrupting signals, e.g. for shutdown are OK. if selectError[0] == errno.EINTR: continue print('Unexpected select result %s' % str(selectError), file=sys.stderr) delayedReturnStatus = 1 break for readFd in readList: fdHandlerObject = self.trackedFdsDict[readFd] if isinstance(fdHandlerObject, LogStream): # Handle this LogStream. Only when downstream processing blocks, # add the stream to the blocked stream list. handleResult = fdHandlerObject.handleStream() if handleResult < 0: # No need to care if current internal file descriptor in LogStream # has changed in handleStream(), this will be handled when unblocking. del self.trackedFdsDict[readFd] blockedLogStreams.append(fdHandlerObject) elif handleResult != readFd: # The current fd has changed, update the tracking list. del self.trackedFdsDict[readFd] self.trackedFdsDict[handleResult] = fdHandlerObject continue if isinstance(fdHandlerObject, AnalysisChildRemoteControlHandler): try: fdHandlerObject.doReceive() except Exception as receiveException: print('Unclean termination of remote ' \ 'control: %s' % str(receiveException), file=sys.stderr) if fdHandlerObject.isDead(): del self.trackedFdsDict[readFd] # Reading is only attempted when output buffer was already flushed. # Try processing the next request to fill the output buffer for # next round. else: fdHandlerObject.doProcess(self.analysisContext) continue if fdHandlerObject == self.masterControlSocket: self.handleMasterControlSocketReceive() continue if fdHandlerObject == self.remoteControlSocket: # We received a remote connection, accept it unconditionally. # Users should make sure, that they do not exhaust resources by # hogging open connections. (controlClientSocket, remoteAddress) = \ self.remoteControlSocket.accept() # Keep track of information received via this remote control socket. remoteControlHandler = AnalysisChildRemoteControlHandler( controlClientSocket) self.trackedFdsDict[controlClientSocket.fileno()] = remoteControlHandler continue raise Exception('Unhandled object type %s' % type(fdHandlerObject)) for writeFd in writeList: fdHandlerObject = self.trackedFdsDict[writeFd] if isinstance(fdHandlerObject, AnalysisChildRemoteControlHandler): bufferFlushedFlag = False try: bufferFlushedFlag = fdHandlerObject.doSend() except OSError as sendError: print('Error sending data via remote ' \ 'control: %s' % str(sendError), file=sys.stderr) try: fdHandlerObject.terminate() except Exception as terminateException: print('Unclean termination of remote ' \ 'control: %s' % str(terminateException), file=sys.stderr) if bufferFlushedFlag: fdHandlerObject.doProcess(self.analysisContext) if fdHandlerObject.isDead(): del self.trackedFdsDict[writeFd] continue raise Exception('Unhandled object type %s' % type(fdHandlerObject)) # Handle the real time events. realTime = time.time() if nextRealTimeTriggerTime is None or realTime >= nextRealTimeTriggerTime: nextTriggerOffset = 3600 for component in realTimeTriggeredComponents: nextTriggerRequest = component.doTimer(realTime) nextTriggerOffset = min(nextTriggerOffset, nextTriggerRequest) nextRealTimeTriggerTime = realTime+nextTriggerOffset # Handle the analysis time events. The analysis time will be different # when an analysis time component is registered. analysisTime = self.analysisContext.analysisTime if analysisTime is None: analysisTime = realTime if nextAnalysisTimeTriggerTime is None or analysisTime >= nextAnalysisTimeTriggerTime: nextTriggerOffset = 3600 for component in analysisTimeTriggeredComponents: nextTriggerRequest = component.doTimer(realTime) nextTriggerOffset = min(nextTriggerOffset, nextTriggerRequest) nextAnalysisTimeTriggerTime = analysisTime+nextTriggerOffset # Analysis loop is only left on shutdown. Try to persist everything # and leave. PersistencyUtil.persistAll() return delayedReturnStatus def handleMasterControlSocketReceive(self): """Receive information from the parent process via the master control socket. This method may only be invoked when receiving is guaranteed to be nonblocking and to return data.""" # We cannot fail with None here as the socket was in the readList. (receivedFd, receivedTypeInfo, annotationData) = \ SecureOSFunctions.receiveAnnotedFileDescriptor(self.masterControlSocket) if receivedTypeInfo == b'logstream': repositioningData = self.repositioningDataDict.get(annotationData, None) if repositioningData != None: del self.repositioningDataDict[annotationData] resource = None if annotationData.startswith(b'file://'): from aminer.input.LogStream import FileLogDataResource resource = FileLogDataResource(annotationData, receivedFd, \ repositioningData=repositioningData) elif annotationData.startswith(b'unix://'): from aminer.input.LogStream import UnixSocketLogDataResource resource = UnixSocketLogDataResource(annotationData, receivedFd) else: raise Exception('Filedescriptor of unknown type received') # Make fd nonblocking. fdFlags = fcntl.fcntl(resource.getFileDescriptor(), fcntl.F_GETFL) fcntl.fcntl(resource.getFileDescriptor(), fcntl.F_SETFL, fdFlags|os.O_NONBLOCK) logStream = self.logStreamsByName.get(resource.getResourceName()) if logStream is None: streamAtomizer = self.analysisContext.atomizerFactory.getAtomizerForResource( resource.getResourceName()) logStream = LogStream(resource, streamAtomizer) self.trackedFdsDict[resource.getFileDescriptor()] = logStream self.logStreamsByName[resource.getResourceName()] = logStream else: logStream.addNextResource(resource) elif receivedTypeInfo == b'remotecontrol': if self.remoteControlSocket != None: raise Exception('Received another remote control ' \ 'socket: multiple remote control not (yet?) supported.') self.remoteControlSocket = socket.fromfd( receivedFd, socket.AF_UNIX, socket.SOCK_STREAM, 0) os.close(receivedFd) self.trackedFdsDict[self.remoteControlSocket.fileno()] = \ self.remoteControlSocket else: raise Exception('Unhandled type info on received fd: %s' % ( repr(receivedTypeInfo))) def getTimeTriggerClass(self): """Get the trigger class this component can be registered for. See AnalysisContext class for different trigger classes available.""" return AnalysisContext.TIME_TRIGGER_CLASS_REALTIME def doTimer(self, triggerTime): """This method is called to perform trigger actions and to determine the time for next invocation. The caller may decide to invoke this method earlier than requested during the previous call. Classes implementing this method have to handle such cases. Each class should try to limit the time spent in this method as it might delay trigger signals to other components. For extensive compuational work or IO, a separate thread should be used. @param triggerTime the time this trigger is invoked. This might be the current real time when invoked from real time timers or the forensic log timescale time value. @return the number of seconds when next invocation of this trigger is required.""" delta = self.nextPersistTime-triggerTime if delta <= 0: self.repositioningDataDict = {} for logStreamName, logStream in self.logStreamsByName.items(): repositioningData = logStream.getRepositioningData() if repositioningData != None: self.repositioningDataDict[logStreamName] = repositioningData PersistencyUtil.storeJson( self.persistenceFileName, self.repositioningDataDict) delta = 600 self.nextPersistTime = triggerTime+delta return delta class AnalysisChildRemoteControlHandler(object): """This class stores information about one open remote control connection. The handler can be in 3 different states: * receive request: the control request was not completely received. The main process may use select() to wait for input data without blocking or polling. * execute: the request is complete and is currently under execution. In that mode all other aminer analysis activity is blocked. * respond: send back results from execution. All sent and received control packets have following common structure: * Total length in bytes (4 bytes): The maximal length is currently limited to 64k * Type code (4 bytes) * Data The handler processes following types: * Execute request ('EEEE'): Data is loaded as json artefact containing a list with two elements. The first one is the Python code to be executed. The second one is available within the execution namespace as 'remoteControlData'. The handler produces following requests: * Execution response ('RRRR'): The response contains a json artefact with a two element list. The first element is the content of 'remoteControlResponse' from the Python execution namespace. The second one is the exception message and traceback as string if an error has occured. Method naming: * do...(): Those methods perform an action consuming input or output buffer data. * may...(): Those methods return true if it would make sense to call a do...() method with the same name. * put...(): Those methods put a request on the buffers.""" maxControlPacketSize = 1 << 16 def __init__(self, controlClientSocket): self.controlClientSocket = controlClientSocket self.remoteControlFd = controlClientSocket.fileno() self.inputBuffer = b'' self.outputBuffer = b'' def mayReceive(self): """Check if this handler may receive more requests.""" return len(self.outputBuffer) == 0 def doProcess(self, analysisContext): """Process the next request, if any.""" requestData = self.doGet() if requestData is None: return requestType = requestData[4:8] if requestType == b'EEEE': execLocals = {'analysisContext': analysisContext} jsonRemoteControlResponse = None exceptionData = None try: jsonRequestData = (json.loads(requestData[8:].decode())) if (jsonRequestData is None) or \ (not isinstance(jsonRequestData, list)) or \ (len(jsonRequestData) != 2): raise Exception('Invalid request data') execLocals['remoteControlData'] = jsonRequestData[1] exec(jsonRequestData[0], {}, execLocals) jsonRemoteControlResponse = json.dumps( execLocals.get('remoteControlResponse', None)) except: exceptionData = traceback.format_exc() # This is little dirty but avoids having to pass over remoteControlResponse # dumping again. if jsonRemoteControlResponse is None: jsonRemoteControlResponse = 'null' jsonResponse = '[%s, %s]' % (json.dumps(exceptionData), jsonRemoteControlResponse) if len(jsonResponse)+8 > self.maxControlPacketSize: # Damn: the response would be larger than packet size. Fake a # secondary exception and return part of the json string included. # Binary search of size could be more efficient, knowing the maximal # size increase a string could have in json. maxIncludeSize = len(jsonResponse) minIncludeSize = 0 minIncludeResponseData = None while True: testSize = (maxIncludeSize+minIncludeSize) >> 1 if testSize == minIncludeSize: break emergencyResponseData = json.dumps(['Exception: Response ' \ 'too large\nPartial response data: %s...' % jsonResponse[:testSize], None]) if len(emergencyResponseData)+8 > self.maxControlPacketSize: maxIncludeSize = testSize-1 else: minIncludeSize = testSize minIncludeResponseData = emergencyResponseData jsonResponse = minIncludeResponseData # Now size is OK, send the data jsonResponse = jsonResponse.encode() self.outputBuffer += struct.pack("!I", len(jsonResponse)+8)+b'RRRR'+jsonResponse else: raise Exception('Invalid request type %s' % repr(requestType)) def mayGet(self): """Check if a call to doGet would make sense. @return True if the input buffer already contains a complete wellformed packet or definitely malformed one.""" if len(self.inputBuffer) < 4: return False requestLength = struct.unpack("!I", self.inputBuffer[:4])[0] return (requestLength <= len(self.inputBuffer)) or \ (requestLength >= self.maxControlPacketSize) def doGet(self): """Get the next packet from the input buffer and remove it. @return the packet data including the length preamble or None when request not yet complete.""" if len(self.inputBuffer) < 4: return None requestLength = struct.unpack("!I", self.inputBuffer[:4])[0] if (requestLength < 0) or (requestLength >= self.maxControlPacketSize): raise Exception('Invalid length value 0x%x in malformed ' \ 'request starting with b64:%s' % (requestLength, base64.b64encode(self.inputBuffer[:60]))) if requestLength > len(self.inputBuffer): return None requestData = self.inputBuffer[:requestLength] self.inputBuffer = self.inputBuffer[requestLength:] return requestData def doReceive(self): """Receive data from the remote side and add it to the input buffer. This method call expects to read at least one byte of data. A zero byte read indicates EOF and will cause normal handler termination when all input and output buffers are empty. Any other state or error causes handler termination before reporting the error. @return True if read was successful, false if EOF is reached without reading any data and all buffers are empty. @throws Exception when unexpected errors occured while receiving or shuting down the connection.""" data = os.read(self.remoteControlFd, 1 << 16) self.inputBuffer += data if not data: self.terminate() def doSend(self): """Send data from the output buffer to the remote side. @return True if output buffer was emptied.""" sendLength = os.write(self.remoteControlFd, self.outputBuffer) if sendLength == len(self.outputBuffer): self.outputBuffer = b'' return True self.outputBuffer = self.outputBuffer[sendLength:] return False def putRequest(self, requestType, requestData): """Add a request of given type to the send queue. @param requestType is a byte string denoting the type of the request. Currently only 'EEEE' is supported. @param requestData is a byte string denoting the content of the request.""" if not isinstance(requestType, bytes): raise Exception('Request type is not a byte string') if len(requestType) != 4: raise Exception('Request type has to be 4 bytes long') if not isinstance(requestData, bytes): raise Exception('Request data is not a byte string') if len(requestData)+8 > self.maxControlPacketSize: raise Exception('Data too large to fit into single packet') self.outputBuffer += struct.pack("!I", len(requestData)+8)+requestType+requestData def putExecuteRequest(self, remoteControlCode, remoteControlData): """Add a request to send exception data to the send queue.""" remoteControlData = json.dumps([JsonUtil.encodeObject(remoteControlCode), \ JsonUtil.encodeObject(remoteControlData)]) self.putRequest(b'EEEE', remoteControlData.encode()) def addSelectFds(self, inputSelectFdList, outputSelectFdList): """Update the file descriptor lists for selecting on read and write file descriptors.""" if self.outputBuffer: outputSelectFdList.append(self.remoteControlFd) else: inputSelectFdList.append(self.remoteControlFd) def terminate(self): """End this remote control session.""" self.controlClientSocket.close() # Avoid accidential reuse. self.controlClientSocket = None self.remoteControlFd = -1 if self.inputBuffer or self.outputBuffer: raise Exception('Unhandled input data') def isDead(self): """Check if this remote control connection is already dead.""" return self.remoteControlFd == -1 deb-build/root/usr/lib/logdata-anomaly-miner/aminer/events/0000755000000000000000000000000013354627057022721 5ustar rootrootdeb-build/root/usr/lib/logdata-anomaly-miner/aminer/events/__init__.py0000600000000000000000000000474113352707630025022 0ustar rootroot"""This file contains interface definition useful implemented by classes in this directory and for use from code outside this directory. All classes are defined in separate files, only the namespace references are added here to simplify the code.""" class EventHandlerInterface: """This is the common interface of all components that can be notified on significant log data mining events. To avoid interference with the analysis process, the listener may only perform fast actions within the call. Longer running tasks have to be performed asynchronously.""" def receiveEvent(self, eventType, eventMessage, sortedLogLines, eventData, eventSource): """Receive information about a detected event. @param eventType is a string with the event type class this event belongs to. This information can be used to interpret type-specific eventData objects. Together with the eventMessage and sortedLogLines, this can be used to create generic log messages. @param sortedLogLines sorted list of log lines that were considered when generating the event, as far as available to the time of the event. The list has to contain at least one line. @param eventData type-specific event data object, should not be used unless listener really knows about the eventType. @param eventSource reference to detector generating the event""" raise Exception('Interface method called') class EventSourceInterface: """This is the common interface of all event sources. Component not implementing this interface may still emit events without support for callbacks.""" def whitelistEvent(self, eventType, sortedLogLines, eventData, whitelistingData): """Whitelist an event generated by this source using the information emitted when generating the event. @return a message with information about whitelisting @throws NotImplementedError if this source does not support whitelisting per se @throws Exception when whitelisting of this special event using given whitelistingData was not possible.""" raise Exception('Interface method called') # Add also the namespace references to classes defined in this # directory. from aminer.events.DefaultMailNotificationEventHandler import DefaultMailNotificationEventHandler from aminer.events.StreamPrinterEventHandler import StreamPrinterEventHandler from aminer.events.SyslogWriterEventHandler import SyslogWriterEventHandler from aminer.events.Utils import VolatileLogarithmicBackoffEventHistory deb-build/root/usr/lib/logdata-anomaly-miner/aminer/events/Utils.py0000600000000000000000000000166313352707167024370 0ustar rootroot"""This module defines a handler for storing event history.""" from aminer.events import EventHandlerInterface from aminer.util import LogarithmicBackoffHistory class VolatileLogarithmicBackoffEventHistory(EventHandlerInterface, LogarithmicBackoffHistory): """This class is a volatile filter to keep a history of received events, e.g. for analysis by other components or for external access via remote control interface.""" def __init__(self, maxItems): """Initialize the history component.""" LogarithmicBackoffHistory.__init__(self, maxItems) self.eventId = 0 def receiveEvent(self, eventType, eventMessage, sortedLogLines, eventData, eventSource): """Receive information about a detected event and store all related data as tuple to the history log.""" self.addObject((self.eventId, eventType, eventMessage, sortedLogLines, eventData, eventSource)) self.eventId += 1 return True deb-build/root/usr/lib/logdata-anomaly-miner/aminer/events/DefaultMailNotificationEventHandler.py0000600000000000000000000001646213354626636032334 0ustar rootroot"""This module defines the event handler for reporting via emails.""" import email import os import subprocess import sys import time from aminer.AnalysisChild import AnalysisContext from aminer.util import TimeTriggeredComponentInterface from aminer.events import EventHandlerInterface from aminer.parsing import ParserMatch class DefaultMailNotificationEventHandler(EventHandlerInterface, TimeTriggeredComponentInterface): """This class implements an event record listener, that will pool received events, reduce the amount of events below the maximum number allowed per timeframe, create text representation of received events and send them via "sendmail" transport.""" CONFIG_KEY_MAIL_TARGET_ADDRESS = 'MailAlerting.TargetAddress' CONFIG_KEY_MAIL_FROM_ADDRESS = 'MailAlerting.FromAddress' CONFIG_KEY_MAIL_SUBJECT_PREFIX = 'MailAlerting.SubjectPrefix' CONFIG_KEY_MAIL_ALERT_GRACE_TIME = 'MailAlerting.AlertGraceTime' CONFIG_KEY_EVENT_COLLECT_TIME = 'MailAlerting.EventCollectTime' CONFIG_KEY_ALERT_MIN_GAP = 'MailAlerting.MinAlertGap' CONFIG_KEY_ALERT_MAX_GAP = 'MailAlerting.MaxAlertGap' CONFIG_KEY_ALERT_MAX_EVENTS_PER_MESSAGE = 'MailAlerting.MaxEventsPerMessage' def __init__(self, aminerConfig): self.recipientAddress = aminerConfig.configProperties.get( DefaultMailNotificationEventHandler.CONFIG_KEY_MAIL_TARGET_ADDRESS) if self.recipientAddress is None: raise Exception('Cannot create e-mail notification listener without target address') self.senderAddress = aminerConfig.configProperties.get( DefaultMailNotificationEventHandler.CONFIG_KEY_MAIL_FROM_ADDRESS) self.subjectPrefix = aminerConfig.configProperties.get( DefaultMailNotificationEventHandler.CONFIG_KEY_MAIL_SUBJECT_PREFIX, 'AMiner Alerts:') self.alertGraceTimeEnd = aminerConfig.configProperties.get( DefaultMailNotificationEventHandler.CONFIG_KEY_MAIL_ALERT_GRACE_TIME, 0) self.eventCollectTime = aminerConfig.configProperties.get( DefaultMailNotificationEventHandler.CONFIG_KEY_EVENT_COLLECT_TIME, 10) self.minAlertGap = aminerConfig.configProperties.get( DefaultMailNotificationEventHandler.CONFIG_KEY_ALERT_MIN_GAP, 600) self.maxAlertGap = aminerConfig.configProperties.get( DefaultMailNotificationEventHandler.CONFIG_KEY_ALERT_MAX_GAP, 600) self.maxEventsPerMessage = aminerConfig.configProperties.get( DefaultMailNotificationEventHandler.CONFIG_KEY_ALERT_MAX_EVENTS_PER_MESSAGE, 1000) if self.alertGraceTimeEnd > 0: self.alertGraceTimeEnd += time.time() self.eventsCollected = 0 self.eventCollectionStartTime = 0 self.lastAlertTime = 0 self.nextAlertTime = 0 self.currentAlertGap = self.minAlertGap self.currentMessage = '' # Locate the sendmail binary immediately at startup to avoid delayed # errors due to misconfiguration. self.sendmailBinaryPath = '/usr/sbin/sendmail' if not os.path.exists(self.sendmailBinaryPath): raise Exception('sendmail binary not found') self.runningSendmailProcesses = [] def receiveEvent(self, eventType, eventMessage, sortedLogLines, eventData, eventSource): """Receive information about a detected event.""" if self.alertGraceTimeEnd != 0: if self.alertGraceTimeEnd >= time.time(): return self.alertGraceTimeEnd = 0 # Avoid too many calls to the operating system time() currentTime = time.time() if self.eventsCollected < self.maxEventsPerMessage: if self.eventsCollected == 0: self.eventCollectionStartTime = currentTime self.eventsCollected += 1 self.currentMessage += '%s (%d lines)\n' % (eventMessage, len(sortedLogLines)) for line in sortedLogLines: self.currentMessage += ' '+repr(line)+'\n' if eventData is not None: if isinstance(eventData, ParserMatch): self.currentMessage += ' '+eventData.getMatchElement().annotateMatch('')+'\n' else: self.currentMessage += ' '+repr(eventData)+'\n' if self.nextAlertTime == 0: if self.lastAlertTime != 0: # This is the first event received after sending of a previous # notification. If the currentAlertGap has not elapsed, increase # the gap immediately. self.nextAlertTime = self.lastAlertTime+self.currentAlertGap if self.nextAlertTime < currentTime: # We are already out of the required gap. self.currentAlertGap = self.minAlertGap self.lastAlertTime = 0 self.nextAlertTime = currentTime+self.eventCollectTime else: # Increase the gap self.currentAlertGap *= 1.5 if self.currentAlertGap > self.maxAlertGap: self.currentAlertGap = self.maxAlertGap else: # No relevant last alert time recorded, just use default. self.nextAlertTime = currentTime+self.eventCollectTime if (self.nextAlertTime != 0) and (currentTime >= self.nextAlertTime): self.sendNotification(currentTime) return def getTimeTriggerClass(self): """Get the trigger class this component can be registered for. See AnalysisContext class for different trigger classes available.""" return AnalysisContext.TIME_TRIGGER_CLASS_REALTIME def doTimer(self, triggerTime): """Check exit status of previous mail sending procedures and check if alerts should be sent.""" # Cleanup old sendmail processes. if self.runningSendmailProcesses: runningProcesses = [] for process in self.runningSendmailProcesses: process.poll() if process.returncode is None: runningProcesses.append(process) continue if process.returncode != 0: print('WARNING: Sending mail terminated with error %d' % process.returncode, \ file=sys.stderr) self.runningSendmailProcesses = runningProcesses if (self.nextAlertTime != 0) and (triggerTime >= self.nextAlertTime): self.sendNotification(triggerTime) return 10 def sendNotification(self, triggerTime): """Really send out the message.""" if self.eventsCollected == 0: return # Write whole message to file to allow sendmail send it asynchronously. messageTmpFile = os.tmpfile() message = email.mime.Text.MIMEText(self.currentMessage) subjectText = '%s Collected Events' % self.subjectPrefix if self.lastAlertTime != 0: subjectText += ' in the last %d seconds' % (time-self.lastAlertTime) message['Subject'] = subjectText if self.senderAddress is not None: message['From'] = self.senderAddress message['To'] = self.recipientAddress messageTmpFile.write(message.as_string()) # Rewind before handling over the fd to sendmail. messageTmpFile.seek(0) sendmailArgs = ['sendmail'] if self.senderAddress is not None: sendmailArgs += ['-f', self.senderAddress] sendmailArgs.append(self.recipientAddress) # Start the sendmail process. Use close_fds to avoid leaking of # any open file descriptors to the new client. process = subprocess.Popen(sendmailArgs, executable=self.sendmailBinaryPath, \ stdin=messageTmpFile, close_fds=True) # Just append the process to the list of running processes. It # will remain in zombie state until next invocation of list cleanup. self.runningSendmailProcesses.append(process) messageTmpFile.close() self.lastAlertTime = time self.eventsCollected = 0 self.currentMessage = '' self.nextAlertTime = 0 deb-build/root/usr/lib/logdata-anomaly-miner/aminer/events/SyslogWriterEventHandler.py0000600000000000000000000000356513346664620030247 0ustar rootroot"""This module defines an event handler that prints data to a local syslog instance.""" import io import os import syslog from aminer.events import EventHandlerInterface from aminer.events import StreamPrinterEventHandler class SyslogWriterEventHandler(EventHandlerInterface): """This class implements an event record listener to forward events to the local syslog instance. CAVEAT: USE THIS AT YOUR OWN RISK: by creating aminer/syslog log data processing loops, you will flood your syslog and probably fill up your disks.""" def __init__(self, aminerConfig, instanceName='aminer'): self.instanceName = instanceName syslog.openlog('%s[%d]' % (self.instanceName, os.getpid()), \ syslog.LOG_INFO, syslog.LOG_DAEMON) syslog.syslog(syslog.LOG_INFO, 'Syslog logger initialized') self.bufferStream = io.BytesIO() self.eventWriter = StreamPrinterEventHandler.StreamPrinterEventHandler( None, self.bufferStream) self.eventId = 0 def receiveEvent(self, eventType, eventMessage, sortedLogLines, eventData, eventSource): """Receive information about a detected even and forward it to syslog.""" self.bufferStream.seek(0) self.bufferStream.truncate(0) self.eventWriter.receiveEvent(eventType, eventMessage, sortedLogLines, \ eventData, eventSource) eventData = self.bufferStream.getvalue() currentEventId = self.eventId self.eventId += 1 serial = 0 for dataLine in eventData.strip().split('\n'): # Python syslog is very ugly if lines are too long, so break them # down. while dataLine: message = None if serial == 0: message = '[%d] %s' % (currentEventId, dataLine[:800]) else: message = '[%d-%d] %s' % (currentEventId, serial, dataLine[:800]) dataLine = dataLine[800:] syslog.syslog(syslog.LOG_INFO, message) serial += 1 deb-build/root/usr/lib/logdata-anomaly-miner/aminer/events/StreamPrinterEventHandler.py0000600000000000000000000000223313354627057030362 0ustar rootroot"""This module defines an event handler that prints data to a stream.""" import sys from aminer.events import EventHandlerInterface from aminer.input.LogAtom import LogAtom class StreamPrinterEventHandler(EventHandlerInterface): """This class implements an event record listener, that will just print out data about the event to a stream, by default this is stdout""" def __init__(self, aminerConfig, stream=sys.stdout): self.stream = stream def receiveEvent(self, eventType, eventMessage, sortedLogLines, eventData, eventSource): """Receive information about a detected event.""" message = '%s (%d lines)\n' % (eventMessage, len(sortedLogLines)) for line in sortedLogLines: message += ' '+repr(line)[2:-1]+'\n' if eventData is not None: if isinstance(eventData, LogAtom): message += ' [%s/%s]' % (eventData.getTimestamp(), eventData.source) if eventData.parserMatch is not None: message += ' '+eventData.parserMatch.matchElement.annotateMatch('')+'\n' else: message += ' '+repr(eventData)+'\n' print('%s' % message, file=self.stream) self.stream.flush() return deb-build/root/usr/lib/logdata-anomaly-miner/AMiner0000755000000000000000000003730313354627647021256 0ustar rootroot#!/usr/bin/python3 -BbbEIsSttW all """This is the main program of the "aminer" logfile miner tool. It does not import any local default site packages to decrease the attack surface due to manipulation of unused but available packages. CAVEAT: This process will keep running with current permissions, no matter what was specified in 'AMinerUser' and 'AMinerGroup' configuration properties. This is required to allow the AMiner parent parent process to reopen log files, which might need the elevated privileges. NOTE: This tool is developed to allow secure operation even in hostile environment, e.g. when one directory, where AMiner attempts to open logfiles is already under full control of an attacker. However it is not intended to be run as SUID-binary, this would require code changes to protect also against standard SUID attacks. Parameters: * --Config [file]: Location of configuration file, defaults to '/etc/aminer/config.py' when not set. * --RunAnalysis: This parameters is NOT intended to be used on command line when starting aminer, it will trigger execution of the unprivileged aminer background child performing the real analysis.""" import sys # As site packages are not included, define from where we need # to execute code before loading it. sys.path = sys.path[1:]+['/usr/lib/logdata-anomaly-miner', '/etc/aminer/conf-enabled'] import errno import os import re import socket import time def runAnalysisChild(aminerConfig, programName): """Runs the Analysis Child""" from aminer import AMinerConfig # Verify existance and ownership of persistence directory. persistanceDirName = aminerConfig.configProperties.get( AMinerConfig.KEY_PERSISTENCE_DIR, AMinerConfig.DEFAULT_PERSISTENCE_DIR) from aminer.util import SecureOSFunctions print('WARNING: SECURITY: Open should use O_PATH, but not yet available in python', \ file=sys.stderr) if isinstance(persistanceDirName, str): persistanceDirName = persistanceDirName.encode() persistenceDirFd = SecureOSFunctions.secureOpenFile( persistanceDirName, os.O_RDONLY|os.O_DIRECTORY) statResult = os.fstat(persistenceDirFd) import stat if ((not stat.S_ISDIR(statResult.st_mode)) or ((statResult.st_mode&stat.S_IRWXU) != 0o700) or (statResult.st_uid != os.getuid()) or (statResult.st_gid != os.getgid())): print('FATAL: persistence directory "%s" has ' \ 'to be owned by analysis process (uid %d!=%d, gid %d!=%d) ' \ 'and have access mode 0700 only!' % ( persistanceDirName, statResult.st_uid, os.getuid(), statResult.st_gid, os.getgid()), file=sys.stderr) sys.exit(1) print('WARNING: SECURITY: No checking for backdoor ' \ 'access via POSIX ACLs, use "getfacl" from "acl" package ' \ 'to check manually.', file=sys.stderr) os.close(persistenceDirFd) from aminer.AnalysisChild import AnalysisChild child = AnalysisChild(programName, aminerConfig) # This function call will only return on error or signal induced # normal termination. childReturnStatus = child.runAnalysis(3) if childReturnStatus == 0: sys.exit(0) print('%s: runAnalysis terminated with unexpected status %d' % ( programName, childReturnStatus), file=sys.stderr) sys.exit(1) def main(): """AMiner main function""" # Extract program name, but only when sure to contain no problematic # characters. programName = sys.argv[0].split('/')[-1] if (programName == '.') or (programName == '..') or \ (re.match('^[a-zA-Z0-9._-]+$', programName) is None): print('Invalid program name, check your execution args', file=sys.stderr) sys.exit(1) # We will not read stdin from here on, so get rid of it immediately, # thus aberrant child cannot manipulate caller's stdin using it. stdinFd = os.open('/dev/null', os.O_RDONLY) os.dup2(stdinFd, 0) os.close(stdinFd) configFileName = '/etc/aminer/config.py' runInForegroundFlag = False runAnalysisChildFlag = False argPos = 1 while argPos < len(sys.argv): argName = sys.argv[argPos] argPos += 1 if argName == '--Config': configFileName = sys.argv[argPos] argPos += 1 continue if argName == '--Foreground': runInForegroundFlag = True continue if argName == '--RunAnalysis': runAnalysisChildFlag = True continue print('Unknown parameter "%s"' % argName, file=sys.stderr) sys.exit(1) # Load the main configuration file. if not os.path.exists(configFileName): print('%s: config "%s" not (yet) available!' % (programName, configFileName), file=sys.stderr) sys.exit(1) # Minimal import to avoid loading too much within the privileged # process. from aminer import AMinerConfig aminerConfig = AMinerConfig.loadConfig(configFileName) if runAnalysisChildFlag: # Call analysis process, this function will never return. runAnalysisChild(aminerConfig, programName) # Start importing of aminer specific components after reading # of "config.py" to allow replacement of components via sys.path # from within configuration. from aminer.util import SecureOSFunctions from aminer.util import decodeStringAsByteString logSourcesList = aminerConfig.configProperties.get(AMinerConfig.KEY_LOG_SOURCES_LIST, None) if (logSourcesList is None) or not logSourcesList: print('%s: %s not defined' % (programName, AMinerConfig.KEY_LOG_SOURCES_LIST), file=sys.stderr) sys.exit(1) # Now create the management entries for each logfile. logDataResourceDict = {} for logResourceName in logSourcesList: # From here on logResourceName is a byte array. logResourceName = decodeStringAsByteString(logResourceName) logResource = None if logResourceName.startswith(b'file://'): from aminer.input.LogStream import FileLogDataResource logResource = FileLogDataResource(logResourceName, -1) elif logResourceName.startswith(b'unix://'): from aminer.input.LogStream import UnixSocketLogDataResource logResource = UnixSocketLogDataResource(logResourceName, -1) else: print('Unsupported schema in %s: %s' % (AMinerConfig.KEY_LOG_SOURCES_LIST, \ repr(logResourceName)), file=sys.stderr) sys.exit(1) try: logResource.open() except OSError as openOsError: if openOsError.errno == errno.EACCES: print('%s: no permission to access %s' % ( programName, repr(logResourceName)), file=sys.stderr) sys.exit(1) else: print('%s: unexpected error opening %s: %d (%s)' % (programName, \ repr(logResourceName), openOsError.errno, \ os.strerror(openOsError.errno)), file=sys.stderr) sys.exit(1) logDataResourceDict[logResourceName] = logResource childUserName = aminerConfig.configProperties.get(AMinerConfig.KEY_AMINER_USER, None) childGroupName = aminerConfig.configProperties.get(AMinerConfig.KEY_AMINER_GROUP, None) childUserId = -1 childGroupId = -1 try: if childUserName != None: from pwd import getpwnam childUserId = getpwnam(childUserName).pw_uid if childGroupName != None: from grp import getgrnam childGroupId = getgrnam(childUserName).gr_gid except: print('Failed to resolve %s or %s' % ( AMinerConfig.KEY_AMINER_USER, AMinerConfig.KEY_AMINER_GROUP), file=sys.stderr) sys.exit(1) # Create the remote control socket, if any. Do this in privileged # mode to allow binding it at arbitrary locations and support restricted # permissions of any type for current (privileged) uid. remoteControlSocketName = aminerConfig.configProperties.get( AMinerConfig.KEY_REMOTE_CONTROL_SOCKET_PATH, None) remoteControlSocket = None if remoteControlSocketName != None: if os.path.exists(remoteControlSocketName): try: os.unlink(remoteControlSocketName) except OSError: print('Failed to clean up old remote control ' \ 'socket at %s' % remoteControlSocketName, file=sys.stderr) sys.exit(1) # Create the local socket: there is no easy way to create it with # correct permissions, hence a fork is needed, setting umask, # bind the socket. It is also recomended to create the socket # in a directory having the correct permissions already. remoteControlSocket = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) remoteControlSocket.setblocking(0) bindChildPid = os.fork() if bindChildPid == 0: os.umask(0o177) remoteControlSocket.bind(remoteControlSocketName) # Do not perform any cleanup, flushing of streams. Use _exit(0) to avoid # interference with fork. os._exit(0) os.waitpid(bindChildPid, 0) remoteControlSocket.listen(4) # Now have checked all we can get from the configuration in the # privileged process. Detach from the TTY when in daemon mode. if not runInForegroundFlag: childPid = 0 try: # Fork a child to make sure, we are not the process group leader already. childPid = os.fork() except Exception as forkException: print('Failed to daemonize: %s' % forkException, file=sys.stderr) sys.exit(1) if childPid != 0: # This is the parent. Exit without any python cleanup. os._exit(0) # This is the child. Create a new session and become process group # leader. Here we get rid of the controlling tty. os.setsid() # Fork again to become an orphaned process not being session leader, # hence not able to get a controlling tty again. try: childPid = os.fork() except Exception as forkException: print('Failed to daemonize: %s' % forkException, file=sys.stderr) sys.exit(1) if childPid != 0: # This is the parent. Exit without any python cleanup. os._exit(0) # Move to root directory to avoid lingering in some cwd someone # else might want to unmount. os.chdir('/') # Change the umask here to clean all group/other mask bits so # that accidentially created files are not accessible by other. os.umask(0o77) # Install a signal handler catching common stop signals and relaying # it to all children for sure. childTerminationTriggeredFlag = False def gracefulShutdownHandler(_signo, _stackFrame): """This is the signal handler function to react on typical shutdown signals.""" print('%s: caught signal, shutting down' % programName, file=sys.stderr) # Just set the flag. It is likely, that child received same signal # also so avoid multiple signaling, which could interrupt the # shutdown procedure again. global childTerminationTriggeredFlag childTerminationTriggeredFlag = True import signal signal.signal(signal.SIGHUP, gracefulShutdownHandler) signal.signal(signal.SIGINT, gracefulShutdownHandler) signal.signal(signal.SIGTERM, gracefulShutdownHandler) # Now create the socket to connect the analysis child. (parentSocket, childSocket) = socket.socketpair(socket.AF_UNIX, socket.SOCK_DGRAM, 0) # Have it nonblocking from here on. parentSocket.setblocking(0) childSocket.setblocking(0) # Use normal fork, we should have been detached from TTY already. # Flush stderr to avoid duplication of output if both child and # parent want to write something. sys.stderr.flush() childPid = os.fork() if childPid == 0: # Relocate the child socket fd to 3 if needed if childSocket.fileno() != 3: os.dup2(childSocket.fileno(), 3) childSocket.close() # This is the child. Close all parent file descriptors, we do not need. # Perhaps this could be done more elegantly. for closeFd in range(4, 1<<16): try: os.close(closeFd) except OSError as openOsError: if openOsError.errno == errno.EBADF: continue print('%s: unexpected exception closing file ' \ 'descriptors: %s' % (programName, openOsError), file=sys.stderr) # Flush stderr before exit without any cleanup. sys.stderr.flush() os._exit(1) # Clear the supplementary groups before dropping privileges. This # makes only sense when changing the uid or gid. if os.getuid() == 0: if (((childUserId != -1) and (childUserId != os.getuid())) or ((childGroupId != -1) and (childGroupId != os.getgid()))): os.setgroups([]) # Drop privileges before executing child. setuid/gid will raise # an exception when call has failed. if childGroupId != -1: os.setgid(childGroupId) if childUserId != -1: os.setuid(childUserId) else: print('INFO: No privilege separation when started as unprivileged user', file=sys.stderr) # Now resolve the specific analysis configuration file (if any). analysisConfigFileName = aminerConfig.configProperties.get( AMinerConfig.KEY_ANALYSIS_CONFIG_FILE, None) if analysisConfigFileName is None: analysisConfigFileName = configFileName elif not os.path.isabs(analysisConfigFileName): analysisConfigFileName = os.path.join(os.path.dirname(configFileName), analysisConfigFileName) # Now execute the very same program again, but user might have # moved or renamed it meanwhile. This would be problematic with # SUID-binaries (which we do not yet support). # Do NOT just fork but also exec to avoid child circumventing # parent's ALSR due to cloned kernel VMA. execArgs = ['AMinerChild', '--RunAnalysis', '--Config', analysisConfigFileName] os.execve(sys.argv[0], execArgs, {}) print('%s: Failed to execute child process', file=sys.stderr) sys.stderr.flush() os._exit(1) childSocket.close() # Send all log resource information currently available to child # process. for logResourceName, logResource in logDataResourceDict.items(): if (logResource != None) and (logResource.getFileDescriptor() >= 0): SecureOSFunctions.sendLogstreamDescriptor( parentSocket, logResource.getFileDescriptor(), logResourceName) logResource.close() # Send the remote control server socket, if any and close it afterwards. # It is not needed any more on parent side. if remoteControlSocket != None: SecureOSFunctions.sendAnnotatedFileDescriptor( parentSocket, remoteControlSocket.fileno(), 'remotecontrol', '') remoteControlSocket.close() exitStatus = 0 childTerminationTriggeredCount = 0 while True: if childTerminationTriggeredFlag: if childTerminationTriggeredCount == 0: time.sleep(1) elif childTerminationTriggeredCount < 5: os.kill(childPid, signal.SIGTERM) else: os.kill(0, signal.SIGKILL) childTerminationTriggeredCount += 1 (sigChildPid, sigStatus) = os.waitpid(-1, os.WNOHANG) if sigChildPid != 0: if sigChildPid == childPid: if childTerminationTriggeredFlag: # This was expected, just terminate. break print('%s: Analysis child process %d terminated ' \ 'unexpectedly with signal 0x%x' % ( programName, sigChildPid, sigStatus), file=sys.stderr) exitStatus = 1 break # So the child has been cloned, the clone has terminated. This # should not happen either. print('%s: untracked child %d terminated with ' \ 'with signal 0x%x' % (programName, sigChildPid, sigStatus), file=sys.stderr) exitStatus = 1 # Child information handled, scan for rotated logfiles or other # resources, where reopening might make sense. for logResouceName, logDataResource in logDataResourceDict.items(): try: if not logDataResource.open(reopenFlag=True): continue except OSError as openOsError: if openOsError.errno == errno.EACCES: print('%s: no permission to access %s' % ( programName, logResouceName), file=sys.stderr) else: print('%s: unexpected error reopening %s: %d (%s)' % ( programName, logResouceName, openOsError.errno, os.strerror(openOsError.errno)), file=sys.stderr) exitStatus = 2 continue SecureOSFunctions.sendLogstreamDescriptor( parentSocket, logDataResource.getFileDescriptor(), logResouceName) logDataResource.close() time.sleep(1) sys.exit(exitStatus) main() deb-build/root/usr/share/0000755000000000000000000000000013352673207014311 5ustar rootrootdeb-build/root/usr/share/doc/0000755000000000000000000000000013352673207015056 5ustar rootrootdeb-build/root/usr/share/doc/aminer/0000755000000000000000000000000013354674536016341 5ustar rootrootdeb-build/root/usr/share/doc/aminer/ParsingModel.txt0000644000000000000000000000337013326562314021456 0ustar rootrootPreamble: ========= Sorry, this part of the documentation was not yet written! Check the source code documentation headers from files in directory /usr/lib/logdata-anomaly-miner/aminer/parsing. Here is a short list of the most common model elements with short description: * AnyByteDataModelElement: Match anything till end of a log-atom. * Base64StringModelElement: Parse base64 strings as binary data. * DateTimeModelElement: Simple datetime parsing using python datetime module. See also MultiLocaleDateTimeModelElement * DebugModelElement: Output debugging information while parsing a log-atom * DecimalFloatValueModelElement: parsing of float values * DecimalIntegerValueModelElement: parsing of interger values * DelimitedDataModelElement: Same as AnyByteDataModelElement but include data only up to given delimiter string. * ElementValueBranchModelElement: conditional branching due to previously parsed values. * FirstMatchModelElement: Branch the model taking the first branch matching the remaining log-atom data. * FixedDataModelElement: Match a fixed (constant) string. * FixedWordlistDataModelElement: Match one of the fixed strings from a list. * HexStringModelElement: Match a hexadecimal string. * IpAddressDataModelElement: Match an IPv4 address. * MultiLocaleDateTimeModelElement: Parse datetime elements with leap year correction, multiple locale support. * OptionalMatchModelElement: Match subelements zero or one time. * RepeatedElementDataModelElement: Match subelements a given number of times. * SequenceModelElement: Match all the subelements exactly in the given order. * VariableByteDataModelElement: Match variable length data encoded within a given alphabet. * WhiteSpaceLimitedDataModelElement: Match string till next whitespace. deb-build/root/usr/share/doc/aminer/demo/0000755000000000000000000000000013354630154017251 5ustar rootrootdeb-build/root/usr/share/doc/aminer/demo/ubuntu-syslog-config.py0000644000000000000000000002111313352721206023721 0ustar rootroot# This demo creates an analysis pipeline to parse typical Ubuntu # server logs. # # DO NOT USE THIS TO RUN PRODUCTION ANALYSIS PIPELINE AS IT CONTAINS # SETTINGS FOR TESTING, THAT MAY IMPEDE SECURITY AND PERFORMANCE! # THOSE CHANGES ARE MARKED WITH "DEMO" TO AVOID ACCIDENTAL USE! configProperties = {} # Define the list of log resources to read from: the resources # named here do not need to exist when aminer is started. This # will just result in a warning. However if they exist, they have # to be readable by the aminer process! Supported types are: # * file://[path]: Read data from file, reopen it after rollover # * unix://[path]: Open the path as UNIX local socket for reading # DEMO: FORBIDDEN RELATIVE PATH! configProperties['LogResourceList'] = ['file://test.log'] # Define the uid/gid of the process that runs the calculation # after opening the log files: # DEMO: PRIVILEGE SEPARATION DISABLED! # configProperties['AMinerUser'] = 'aminer' # configProperties['AMinerGroup'] = 'aminer' # Define the path, where aminer will listen for incoming remote # control connections. When missing, no remote control socket # will be created. # configProperties['RemoteControlSocket'] = '/var/run/aminer-remote.socket' # Read the analyis from this file. That part of configuration # is separated from the main configuration so that it can be loaded # only within the analysis child. Non-absolute path names are # interpreted relatively to the main configuration file (this # file). Defaults to "analysis.py". # configProperties['AnalysisConfigFile'] = 'analysis.py' # Read and store information to be used between multiple invocations # of AMiner in this directory. The directory must only be accessible # to the 'AMinerUser' but not group/world readable. On violation, # AMiner will refuse to start. When undefined, '/var/lib/aminer' # is used. # DEMO: FORBIDDEN RELATIVE PATH! configProperties['Core.PersistenceDir'] = 'aminer' # Define a target e-mail address to send alerts to. When undefined, # no e-mail notification hooks are added. # configProperties['MailAlerting.TargetAddress'] = 'root@localhost' # Sender address of e-mail alerts. When undefined, "sendmail" # implementation on host will decide, which sender address should # be used. # configProperties['MailAlerting.FromAddress'] = 'root@localhost' # Define, which text should be prepended to the standard aminer # subject. Defaults to "AMiner Alerts:" # configProperties['MailAlerting.SubjectPrefix'] = 'AMiner Alerts:' # Define a grace time after startup before aminer will react to # an event and send the first alert e-mail. Defaults to 0 (any # event can immediately trigger alerting). # configProperties['MailAlerting.AlertGraceTime'] = 0 # Define how many seconds to wait after a first event triggered # the alerting procedure before really sending out the e-mail. # In that timespan, events are collected and will be sent all # using a single e-mail. Defaults to 10 seconds. # configProperties['MailAlerting.EventCollectTime'] = 10 # Define the minimum time between two alert e-mails in seconds # to avoid spamming. All events during this timespan are collected # and sent out with the next report. Defaults to 600 seconds. # configProperties['MailAlerting.MinAlertGap'] = 600 # Define the maximum time between two alert e-mails in seconds. # When undefined this defaults to "MailAlerting.MinAlertGap". # Otherwise this will activate an exponential backoff to reduce # messages during permanent error states by increasing the alert # gap by 50% when more alert-worthy events were recorded while # the previous gap time was not yet elapsed. # configProperties['MailAlerting.MaxAlertGap'] = 600 # Define how many events should be included in one alert mail # at most. This defaults to 1000 # configProperties['MailAlerting.MaxEventsPerMessage'] = 1000 # DEMO: INCLUSION OF ALL AMINER ELEMENTS AND ALL PYTHON SITE PACKAGES # NOT RECOMMENDED! import sys sys.path = sys.path+['/etc/aminer/conf-available/generic', '/usr/lib/python2.7/dist-packages'] # DEMO: DISABLE SECURE OPEN TO ALLOW RELATIVE PATH, SYMLINKS! import os def insecureDemoOpen(fileName, flags): """Perform a normal open supporting also relative path to override more strict secureOpenFile function in test environment.""" return os.open(fileName, flags|os.O_NOCTTY) from aminer.util import SecureOSFunctions SecureOSFunctions.secureOpenFile = insecureDemoOpen # Add your ruleset here: def buildAnalysisPipeline(analysisContext): """Define the function to create pipeline for parsing the log data. It has also to define an AtomizerFactory to instruct AMiner how to process incoming data streams to create log atoms from them.""" # Build the parsing model first: from aminer.parsing import FirstMatchModelElement from aminer.parsing import SequenceModelElement serviceChildren = [] import CronParsingModel serviceChildren.append(CronParsingModel.getModel()) import EximParsingModel serviceChildren.append(EximParsingModel.getModel()) import RsyslogParsingModel serviceChildren.append(RsyslogParsingModel.getModel()) import SyslogPreambleModel syslogPreambleModel = SyslogPreambleModel.getModel() parsingModel = SequenceModelElement('model', [ syslogPreambleModel, FirstMatchModelElement('services', serviceChildren)]) # Some generic imports. from aminer.analysis import AtomFilters from aminer.analysis import Rules # Create all global handler lists here and append the real handlers # later on. # Use this filter to distribute all atoms to the analysis handlers. atomFilter = AtomFilters.SubhandlerFilter(None) anomalyEventHandlers = [] # Now define the AtomizerFactory using the model. A simple line # based one is usually sufficient. from aminer.input import SimpleByteStreamLineAtomizerFactory analysisContext.atomizerFactory = SimpleByteStreamLineAtomizerFactory( parsingModel, [atomFilter], anomalyEventHandlers, defaultTimestampPath='/model/syslog/time') # Always report the unparsed lines: a part of the parsing model # seems to be missing or wrong. from aminer.input import SimpleUnparsedAtomHandler atomFilter.addHandler( SimpleUnparsedAtomHandler(anomalyEventHandlers), stopWhenHandledFlag=True) # Report new parsing model path values. Those occurr when a line # with new structural properties was parsed. from aminer.analysis import NewMatchPathDetector newMatchPathDetector = NewMatchPathDetector( analysisContext.aminerConfig, anomalyEventHandlers, autoIncludeFlag=True) analysisContext.registerComponent( newMatchPathDetector, componentName='DefaultMatchPathDetector') atomFilter.addHandler(newMatchPathDetector) # Run a whitelisting over the parsed lines. from aminer.analysis import WhitelistViolationDetector violationAction = Rules.EventGenerationMatchAction( 'Analysis.GenericViolation', 'Violation detected', anomalyEventHandlers) whitelistRules = [] # Filter out things so bad, that we do not want to accept the # risk, that a too broad whitelisting rule will accept the data # later on. whitelistRules.append(Rules.ValueMatchRule( '/model/services/cron/msgtype/exec/user', 'hacker', violationAction)) # Ignore Exim queue run start/stop messages whitelistRules.append(Rules.PathExistsMatchRule( '/model/services/exim/msg/queue/pid')) # Ignore all ntpd messages for now. whitelistRules.append(Rules.PathExistsMatchRule('/model/services/ntpd')) # Add a debugging rule in the middle to see everything not whitelisted # up to this point. whitelistRules.append(Rules.DebugMatchRule(False)) # Ignore hourly cronjobs, but only when started at expected time # and duration is not too long. whitelistRules.append(Rules.AndMatchRule([ Rules.ValueMatchRule( '/model/services/cron/msgtype/exec/command', '( cd / && run-parts --report /etc/cron.hourly)'), Rules.ModuloTimeMatchRule('/model/syslog/time', 3600, 17*60, 17*60+5)])) atomFilter.addHandler(WhitelistViolationDetector(whitelistRules, anomalyEventHandlers)) # Include the e-mail notification handler only if the configuration # parameter was set. from aminer.events import DefaultMailNotificationEventHandler if DefaultMailNotificationEventHandler.CONFIG_KEY_MAIL_TARGET_ADDRESS in analysisContext.aminerConfig.configProperties: mailNotificationHandler = DefaultMailNotificationEventHandler(analysisContext.aminerConfig) analysisContext.registerComponent( mailNotificationHandler, componentName=None) anomalyEventHandlers.append(mailNotificationHandler) # Add stdout stream printing for debugging, tuning. from aminer.events import StreamPrinterEventHandler anomalyEventHandlers.append(StreamPrinterEventHandler(analysisContext.aminerConfig)) deb-build/root/usr/share/doc/aminer/Analysis.txt0000644000000000000000000002413713326562314020661 0ustar rootrootPreamble: ========= This document lists all analysis components, that are components that emit events to the reporting infrastructure on certain conditions. The components in the following list are annotated with short codes describing their properties to speed up search for suitable analysis component. Property codes: * (A)utoconfiguration: This component may learn from the input data and adapt itself to new inputs. * (F)iltering: This component just filters input and distributes it to other analysis components. * (H)ardwired: This component generates events by hard rules. This is the opposite to "statistical triggering". * (N)ondeterministic: This component may react differently to the same input in two runs. * (R)eporting: This component will generate analysis reports for evaluation by an analyst. Those components can be very useful in the configuration phase to understand the processed data better. * (S)tatistical triggering: This component uses statistical methods to trigger on unexpected data. Such components may miss relevant events or cause false-positives. List of components: * EnhancedNewMatchPathValueComboDetector (AH): Same as NewMatchPathValueComboDetector but also supporting value transformation and storage of extra data. * HistogramAnalysis.HistogramAnalysis (R): Create histogram reports for parsed values. * HistogramAnalysis.PathDependentHistogramAnalysis (R): Create path-dependent histogram reports. * MatchValueAverageChangeDetector (AS): Detect when average value of given parsed value changes over time. * AtomFilters.MatchValueFilter (F): Use value of parsed element to forward input data to other analyzers. * MatchValueStreamWriter (F): Forward selected input data e.g. as CSV list, to other compoments via stream, e.g. to perform analysis in another tool. * MissingMatchPathValueDetector (AH): Detect when values for a given path are not received for a longer timespan, e.g. a host, service or address stopped sending/reporting. * MissingMatchPathListValueDetector (AH): Like MissingMatchPathValueDetector but looking on more than one match path for key extraction. * NewMatchPathDetector (AH): Generate events when new parser pathes are found. * NewMatchPathValueComboDetector (AH): Same as NewMatchPathValueDetector but considers combination of values for list of data pathes, e.g. source IP, destination IP, destination port for link analysis. * NewMatchPathValueDetector (AH): Generate events when new parsed values are observed for a given path, e.g. new MAC addresses, user names, ... * TimeCorrelationDetector (ANR): Try to detect time correlations and report them. * TimeCorrelationViolationDetector.TimeCorrelationViolationDetector (H): Detect changes in time correlation on a given ruleset. * TimestampCorrectionFilters.SimpleMonotonicTimestampAdjust (F): Adjust decreasing timestamp of new records to the maximum observed so far to ensure monotony for other analysis components. * TimestampsUnsortedDetector.TimestampsUnsortedDetector (HR): This detector is useful to to detect algorithm malfunction or configuration errors, e.g. invalid timezone configuration. * WhitelistViolationDetector (FH): Check all inputs using ruleset and create events, forward input to other components. HistogramAnalysis.HistogramAnalysis: ==================================== This component performs a histogram analysis on one or more input properties. The properties are parsed values denoted by their parsing path. Those values are then handed over to the selected "binning function", that calculates the histogram bin. * Binning: Binning can be done using one of the predefined binning functions or by creating own subclasses from "HistogramAnalysis.BinDefinition". * LinearNumericBinDefinition: Binning function working on numeric values and sorting them into bins of same size. * ModuloTimeBinDefinition: Binning function working on parsed datetime values but applying a modulo function to them. This is useful for analysis of periodic activities. * Example: The following example creates a HistogramAnalysis using only the property "/model/line/time", binned on per-hour basis and sending a report every week: from aminer.analysis import HistogramAnalysis # Use a time-modulo binning function moduloTimeBinDefinition=HistogramAnalysis.ModuloTimeBinDefinition( 3600*24, # Modulo values in seconds (1 day) 3600, # Division factor to get down to reporting unit (1h) 0, # Start of lowest bin 1, # Size of bin in reporting units 24, # Number of bins False) # Disable outlier bins, not possible with time modulo histogramAnalysis=HistogramAnalysis.HistogramAnalysis( aminerConfig, [('/model/line/time', moduloTimeBinDefinition)], 3600*24*7, # Reporting interval (weekly) reportEventHandlers, # Send report to those handlers resetAfterReportFlag=True) # Zero counters after sending of report # Send the appropriate input feed to the component atomFilter.addHandler(histogramAnalysis) HistogramAnalysis.PathDependentHistogramAnalysis: ================================================= This component creates a histogram for only a single input property, e.g. an IP address, but for each group of correlated match pathes. Assume there two pathes that include the input property but they separate after the property was found on the path. This might be for example the client IP address in ssh log atoms, where the parsing path may split depending if this was a log atom for a successful login, logout or some error. This analysis component will then create separate histograms, one for the path common to all atoms and one for each disjunct part of the subpathes found. The component uses the same binning functions as the standard HistogramAnalysis.HistogramAnalysis, see documentation there. * Example: # Perform path-dependent histogram analysis: from aminer.analysis import HistogramAnalysis # Use a time-modulo binning function moduloTimeBinDefinition=HistogramAnalysis.ModuloTimeBinDefinition( 3600*24, # Modulo values in seconds (1 day) 3600, # Division factor to get down to reporting unit (1h) 0, # Start of lowest bin 1, # Size of bin in reporting units 24, # Number of bins False) # Disable outlier bins, not possible with time modulo pathDependentHistogramAnalysis=HistogramAnalysis.PathDependentHistogramAnalysis( aminerConfig, '/model/line/time', # The value properties to check moduloTimeBinDefinition, 3600*24*7, # Reporting interval (weekly) reportEventHandlers, # Send report to those handlers resetAfterReportFlag=True) # Zero counters after sending of report # Send the appropriate input feed to the component atomFilter.addHandler(pathDependentHistogramAnalysis) WhitelistViolationDetector: =========================== This detector manages a list of whitelist rules to filter parsed atoms. All atoms not hit by any whitelist rule will cause events to be generated. When an atom is matched by a rule, it will be regarded as whitelisted by default but there is also an option to call user-defined functions on a matching rule via MatchAction elements, e.g. to forward the atom to another analyzer in one pass. Predefined actions are: * EventGenerationMatchAction: Generate events, when a rule matches, e.g. to report interesting matches, violations or for debugging. * AtomFilterMatchAction: Filter out the parsed atoms on match and forward it to other handlers, e.g. analysis components. * Rules: The ruleset of this detector is created from classes defined in aminer.analysis.Rules. See below for short list of supported rules or source for full documentation: * AndMatchRule: match only if all subrules match * DebugMatchRule: print debugging text when matching * DebugHistoryMatchRule: keep history of matched LogAtoms * IPv4InRFC1918MatchRule: match IPs in private networks * ModuloTimeMatchRule: match cyclic time values, e.g. nighttime * NegationMatchRule: match only if other rule did not * OrMatchRule: match if any subrule matches * ParallelMatchRule: match if any subrule matches but do not stop at first successful match * PathExistsMatchRule: match if parsed data contains given path * StringRegexMatchRule: match if parsed data string matches given regular expression. If applicable, Value[X]MatchRule should be used instead. * ValueDependentDelegatedMatchRule: select match rules according to values from parsed data * ValueDependentModuloTimeMatchRule: like ModuloTimeMatchRule but select limits according to values from parsed data * ValueListMatchRule: match if value is in given lookup list * ValueMatchRule: match if parsed data contains specific value * ValueRangeMatchRule: match if parsed data value is within given range * Example: # Run a whitelisting over the parsed lines. from aminer.analysis import Rules from aminer.analysis import WhitelistViolationDetector violationAction=Rules.EventGenerationMatchAction('Analysis.GenericViolation', 'Violation detected', anomalyEventHandlers) whitelistRules=[] # Filter out things so bad, that we do not want to accept the # risk, that a too broad whitelisting rule will accept the data # later on. whitelistRules.append(Rules.ValueMatchRule('/model/services/cron/msgtype/exec/user', 'hacker', violationAction)) # Ignore Exim queue run start/stop messages whitelistRules.append(Rules.PathExistsMatchRule('/model/services/exim/msg/queue/pid')) # Add a debugging rule in the middle to see everything not whitelisted # up to this point. whitelistRules.append(Rules.DebugMatchRule(False)) # Ignore hourly cronjobs, but only when started at expected time # and duration is not too long. whitelistRules.append(Rules.AndMatchRule([ Rules.ValueMatchRule('/model/services/cron/msgtype/exec/command', '( cd / && run-parts --report /etc/cron.hourly)'), Rules.ModuloTimeMatchRule('/model/syslog/time', 3600, 17*60, 17*60+5)])) atomFilter.addHandler(WhitelistViolationDetector(whitelistRules, anomalyEventHandlers)) deb-build/root/usr/share/doc/aminer/Changelog.txt0000644000000000000000000001461213354674536020775 0ustar rootrootV1.0.0 (2018-10-02): ==================== * Changes: * Ported code to Python 3 * Code cleanup using pylint * Added util/JsonUtil.py to encode byte strings for storing them as json objects * Added docs/development-procedures.txt which documents development procedures * Features: * New MissingMatchPathListValueDetector to detect stream interuption * Added parsing support for kernel IP layer martian package messages * Systemd parsing of apt invocation messages. * Bugfixes: * AnalysisChild: handle remote control client connection errors correctly * Various bugfixes V0.0.8 (2017-05-30): ==================== * New features: * Base64StringModelElement for parsing of base64 strings * DecimalFloatValueModelElement for parsing of floats * AnalysisChild full asynchronous remote control connection handling * MissingMatchPathValueDetector realerting support added * StringRegexMatchRule for matching parsed data using regex * EnhancedNewMatchPathValueComboDetector added for more complex value combination detection providing transformation, annotation, features ... * Changes: * Various bugfixes * Code cleanup using pylint V0.0.7 (2017-01-09): ==================== * New features: * Datetime parsing reimplemented in DateTimeModelElement to fix various shortcomings of strptime in Python and libc. * ElementValueBranchModelElement added to support conditional branching. * DebugHistoryMatchRule added: keep history of matched LogAtoms. * Unix socket input support: improved logdata reading now also supporting connects to UNIX server sockets for reading. * Changes: * Restructured AMinerUtils and utils package. V0.0.6 (2016-11-04): ==================== * New features: * Added "LogAtom" and "AtomHandlerInterface" to pass data from IO-layer to upper layers consistently. * Added TimestampsUnsortedDetector.py to fail (exit) when data is not properly sorted. * Added SimpleMultisourceAtomSync.py to merge LogAtoms read from parallel streams, e.g. from multiple logfiles. * Bugfixes: * Logic flaw in logfile rollover handling, selecting on bad fd. * Unassigned, reused variable caused same logfile to be opened more than once. V0.0.5 (2016-10-11): ==================== * New features: * Added helper class for parser matching process debugging. * Added interface to ease detector state updates after triggering an event. * Added minimal model for parsing Tomcat start/stop syslog messages. * Added support for logfile reading continuation after shutdown, alternative lowlevel data stream splitting tools (atomizers). * Bugfixes: * Fixed file descriptor leak in PersistencyUtil.py on store. V0.0.4 (2016-08-26): ==================== * New features: * NewMatchPathValueComboDetector: Detect new combination of values for list of data pathes, e.g. for link analysis. * ParsedAtomFilterMatchAction: Forward data to specific handlers when rules match. * VolatileLogarithmicBackoffEventHistory: Added unique EventId * Pass event source reference to event handlers to allow query/change of source parameters when handling the event. * Generic logdata parsing model improvements * Added new rule types: ValueDependentDelegatedMatchRule and ValueDependentModuloTimeMatchRule * Bugfixes: * Packaging: unowned directory after purge, see https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=832347 * DefaultMailNotificationEventHandler: cleanup of old sendmail process instances not working * EventGenerationMatchAction: wrong method name, class field access V0.0.3 (2016-07-21): ==================== * Backport of changes from Debian ITP process review, see https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=813096 * Handle rollover from one logfile to the next more graceful. * New features: * Added support for multiple parallel timescales in component trigger timing, e.g. real-time and forensic-time. * Added MissingMatchPathValueDetector to detect when data values were not reported for an extended period of time. * Added VolatileLogarithmicBackoffEventHistory to keep data of limited number of events in memory for automated processing. * AMinerRemoteControl: Added "--ExecFile" parameter to execute commands from file, "--StringResponse" for direct output. * Bugfixes: * debian/postinst: disable service start when not already enabled using standard methods debian/rules and debian/postinst. V0.0.2 (2016-05-12): ==================== * Added remote control support, command line tool for sending commands. * Added AnalysisContext as registry for analysis components and access point for remote control. * Added component to write events to syslog (read component documentation for CAVEATs) * Package structure fixes from Debian mentoring procedure * Bugfixes: * ModuloTimeMatchRule: time object path was ignored on error * Indent-formatting repaired, single-line mode added V0.0.1 (2016-04-07): ==================== * Bugfixes * Minor feature improvements (options) * New features: * MultiLocaleDateTimeModelElement: decode timestamps not encoded using local system language or byte encoding. * ParsedAtomFilters: filter received atoms and filter to other handlers * TimestampCorrectionFilters: Correct timestamps from broken sources to have monotonic time. V0.0.0 (2016-03-04): ==================== Initial Release Features: * Common parsing model elements available: fixed strings, numbers, IP addresses, date-time fields, delimited fields, fixed alphabet fields, ... * Common parsing model structural elements: sequences, branches, repeated elements, optional elements. * Stream and file reading, splitting into lines. * Operating system integration: privileged parent process forwarding file descriptors to analysis child. * Reopening of log files using open/fstat loop. * Functionality for state persistence handling between restarts. * Analysis components: * NewMatchPathDetector: generate events when new match path is detected. * HistogramAnalysis: generate complete and path-dependent histograms for given properties. * MatchValueQueueSplitter: split input from e.g. one parser and forward it to different analyzers depending on match values. * WhitelistViolationDetector: ignore log data that is whitelisted at least by single rule (logcheck equivalent behaviour) * TimeCorrelationViolationDetector: check if usually correlated loglines are really found both. deb-build/root/usr/share/doc/aminer/Readme.txt0000644000000000000000000002762513354643232020300 0ustar rootrootPreamble: ========= This document is the starting point to the user documentation. For developer documentation start with "Design.txt". The documentation attempts to strike out especially useful information with following keywords. CAVEAT: Keep that in mind using AMiner PRODUCTION: This is a hint for production use Installation Requirements: ========================== * Python language compatibility: Code is compatible with Python 3, software was tested on: * Ubuntu Xenial 1604: Released 2016-04-21, Python 3.6.5 * Ubuntu Bionic 1804: Released 2018-09-13, Python 3.6.5 * System requirements: Requirements are depending on AMiner configuration. * Simple Logcheck-like operation: As opposed to logcheck, AMiner runs in stream-processing mode. For simple filtering 32MB RAM are sufficient. * Advanced operation: See documentation of analysis components configured to learn about memory requirements. Concepts: ========= * Alert: Alerts are generated when severe or numerous events where detected. * Event: Events are triggered when unexpected log atoms are received or expected log atoms are missing. * Log Atom: Log-data, that is always written as whole block by atomic logging operation. This can be a single syslog line or an XML-logger stanza. * Parsing Model: Tree-like model describing the structure of log atoms on that system. The model could be seen as a single, very large regular expression modelling the log data. Thus computational complexity for log atom parsing is more like O(log(n)) than the O(n) when handling data with separate regular expressions. * Detector Rules: Those rules select parsed log atoms according to their properties and facilitate interaction with the appropriate detectors. The rules for also tree-like structures to allow evaluation of the rules "A&B" and "A&C" with only a single check of property "A". Features: ========= This is a brief list of currently supported features. To learn more about each feature, read "Design.txt". * Daemon mode and foreground operation * Privilege separation between logfile reading and analysis process * Fast tree-shaped parsing model to extract features from log atoms. Currently supported data elements: * fixed strings * decimal/hex numbers * IP addresses * date and time fields * delimited fields * fixed alphabet fields Current structural elements: * sequences * branches * repeated elements * optional elements * Very flexible pipeline design to: * analyse parsed data (see analyzers below) * split the stream, e.g. per host or per source zone * Analysis components for event generation (see Analysis.txt): * Events on unknown (unparseable) log atoms * Events on log atoms with new structure, e.g. log atoms of type not observed before on this system * Detection of new items in lines (new MACs, IPs, hostnames, user names) * Histogram reports for given properties in any path or per path * Whitelisting of parsed atoms using complex rulesets * Statistical check if last n extracted values deviate from average previously observed * Event generation due to non-correlated log atoms * Action components: * E-mail notifications Following features are partially implemented: * Persistence of analysis state between restarts Getting Started: ================ This document contains only an overview and simplified instructions for aminer use. The most accurate setup documentation including all the gory details is currently "ParanoidsInstallationGuide.txt". * Test AMiner with generic configuration: A good way to get knowing AMiner and its features is running it in foreground on prerecorded data. You can start from the quite empty template /etc/aminer/config.py.template or use one of the demo configurations, e.g. /usr/share/doc/aminer/demo/ubuntu-syslog-config.py. CAVEAT: The demo file contains settings nice for demonstration but unsuitable for production use! Use /etc/aminer/config.py.template as a starting point instead! You just can move your input file to analyze to "test.log", create a directory "aminer" and run AMiner from that working directory or you may adjust the demo configuration. gzip -cd /usr/share/doc/aminer/demo/ubuntu-syslog-config.py.gz > config.py /usr/bin/AMiner --Config config.py On the first run you may notice, that AMiner will report detection of numerous new path elements, e.g. ------------------------------------------------------------------------------- New path /model/services/cron/pid (1 lines) Feb 5 08:17:01 test.local CRON[11581]: (root) CMD ( cd / && run-parts --report /etc/cron.hourly) /model: None ('Feb 5 08:17:01 test.local CRON[11581]: (root) CMD ( cd / && run-parts --report /etc/cron.hourly)') /model/syslog: None ('Feb 5 08:17:01 test.local ') /model/syslog/time: 2015-02-05 08:17:01 ('Feb 5 08:17:01') /model/syslog/sp0: (' ') ------------------------------------------------------------------------------- Each of those reports corresponds to a structural property of the parsed log line, that was not encountered before. But as the the demo "ubuntu-syslog-config.py" is configured to report each of that occurrences only once, you will see each path in this run only once. When AMiner is terminated using [Ctrl]-C and started again, it loads information about previously encountered items from the persistence directory and will also not report again. PRODUCTION: To avoid too many false reports when deploying AMiner the first time, perform a run with "autoIncludeFlag=True" on the current logfiles of the target system to and then toggle to "False". Rule Based Checking Using Detector Rules: ========================================= AMiner comes with a pattern matching rule engine for efficient implementation of both whitelisting, blacklisting and correlation approaches. See "demo/ubuntu-syslog-config.py" for rule examples. The rules themselves can be used for whitelisting but also to hook custom actions, e.g. to handle exclusions from whitelisting, but also to forward events to correlation checking. PRODUCTION: One featrure to increase performace is the "ParallelMatchRule". While a normal "OrMatchRule" will match if any of the subrules matches and then terminate the search, the parallel rule will evaluate all "Or" branches, thus allowing parallelization of checks. This makes only sense, when the different branches also make use of "MatchActions" to act when one of the branches found a match. Correlation Checking: ===================== AMiner also allows whitelisting using time correlation of log atoms. With central log aggregation, this can be done across services and machines. This could be applied e.g. to following log lines: Feb 5 08:17:01 test.local CRON[11581]: (root) CMD ( cd / && run-parts --report /etc/cron.hourly) Feb 05 08:17:04 test.local CRON[11581]: pam_unix(cron:session): session closed for user root Usually an event hidden inside the logics of one machine triggers a set of other events, thus producing log-atoms as artefacts. In the example log lines, this is the expiration of the a timer within the cron daemon. This hidden event is denominated A*. It then triggers event A, the syslog entry when starting a hourly cronjob, thus producing the first artefact A. When the cron jobs is completed, pam-library is invoked (hidden event B*), causing writing of artefact B to "auth.log". As A* always causes A and B* and B* always results in B, the artefacts A and B have to occur in pairs. CAVEAT: The cause of events above does not imply, that A has to be seen before B. As A* is causing B in the end, depending on the delay A*->A and B*->B, B may be observed before A! Matching will only happen, if A and B are close enough in time. Thus a rule for the cronjobs above may report an anomaly if the cron jobs run time is abnormally long. Other examples for log lines to be correlated: * Each webserver access has to pass through a firewall before. Thus each webserver logline has to be preceeded by a firewall logline (vice versa is not true for bad internet: someone may only send single SYN-packet, thus causing firewall log but webserver will never receive a request). * Monitoring plugin logs on using SSH and then executes check command, logs out. Thus login artefact without check or logout in time is suspicious. Implementation example: ------------------------------------------------------------------------------- # Create a correlation rule: As syslog timestamps have only second # precision and B usually is some ms after A, accept correlation # only in range (0.0, 1.0) seconds. aImpliesBRule=TimeCorrelationViolationDetector.CorrelationRule( 'A implies B', 0.0, 1.0, maxArtefactsAForSingleB=1, artefactMatchParameters=None) # Build event selectors: As one selector may select an event that # is input to more than one correlation rule, accept lists as input. aClassSelector=TimeCorrelationViolationDetector.EventClassSelector( 'A-Event', [aImpliesBRule], None) bClassSelector=TimeCorrelationViolationDetector.EventClassSelector( 'B-Event', None, [aImpliesBRule]) # Hook the selectors to the detector rules tree. ... allRules.append(Rules.PathExistsMatchElement('/model/services/A', aClassSelector)) ... # Use the standard WhitelistViolationDetector but with parallel # matching to a) perform whitelisting of acceptable log lines # and b) forwarding of selected events to the correlation detectors. whitelistViolationDetector=WhitelistViolationDetector.WhitelistViolationDetector( [Rules.ParallelMatchRule(allRules)], anomalyEventHandlers) ------------------------------------------------------------------------------- Artefact output would be: ------------------------------------------------------------------------------- Correlation rule "A implies B" violated (1 lines) Jan 07 15:59:55 AAAAAA FAIL: "Jan 07 15:59:55 AAAAAA" (A-Event) Historic examples: "Sep 15 17:31:57 AAAAAA" (A-Event) ==> "Sep 15 17:31:57 BBBBBB" (B-Event) "Jan 29 03:34:37 AAAAAA" (A-Event) ==> "Jan 29 03:34:38 BBBBBB" (B-Event) "May 13 03:39:39 AAAAAA" (A-Event) ==> "May 13 03:39:39 BBBBBB" (B-Event) "May 13 09:09:09 AAAAAA" (A-Event) ==> "May 13 09:09:10 BBBBBB" (B-Event) "May 20 06:21:37 AAAAAA" (A-Event) ==> "May 20 06:21:37 BBBBBB" (B-Event) "May 20 14:44:56 AAAAAA" (A-Event) ==> "May 20 14:44:57 BBBBBB" (B-Event) "May 25 03:35:42 AAAAAA" (A-Event) ==> "May 25 03:35:43 BBBBBB" (B-Event) "May 30 22:43:22 AAAAAA" (A-Event) ==> "May 30 22:43:22 BBBBBB" (B-Event) "Jun 03 17:18:49 AAAAAA" (A-Event) ==> "Jun 03 17:18:49 BBBBBB" (B-Event) "Jun 04 09:56:36 AAAAAA" (A-Event) ==> "Jun 04 09:56:36 BBBBBB" (B-Event) ------------------------------------------------------------------------------- Running as a Service: ===================== The package comes with an upstart and a systemd script to run AMiner as a daemon. Those scripts are deactivated by default because: * there is no AMiner default configuration, you need to generate it before starting anyway. * CAVEAT: When AMiner is misconfigured and his diagnostic output somehow reaches the logfiles it is currently processing, a log data loop can be generated, just filling up your disks! To enable AMiner daemon, do the following: * Upstart: Enable autostart by uncommenting the "start on" stanza in "/etc/init/aminer.conf". As AMiner stdout/stderr messages would end up in "/var/log/upstart/aminer.log", there is no risk for log data loops UNLESS you include those files to be handled by AMiner or you have another type of data forwarding from the files to e.g. main syslog in place. * Systemd: Service is enabled using "systemctl enable aminer". The default AMiner service configuration will write all messages from stdout/stderr to /dev/null due to "StandardOutput=null" setting to avoid log data loops. If you are sure what you are doing, you may want to change this to "StandardOutput=journal" and deactivate journal to syslog forwarding when you need to have aminer parse the syslog data also. deb-build/root/usr/share/doc/aminer/Design.txt0000644000000000000000000002000613326562314020276 0ustar rootrootPreamble: ========= This document describes the requirement, design and implementation of AMiner. For using it, the general "Readme.txt" may suit your needs better than this document. Requirements: ============= * IO-Event triggered stream processing of messages to avoid CPU peaks and allow timely generation of alerts. * Sensible alerting model, e.g. sending of aggregated report 10sec after first anomaly, then have gracetime of 5min. When more events occurred, send another report and double grace time. * Have "environment" flags, e.g. maintenance mode to reduce messages under known procedures. Example: rsyslog should only restart during daily cronjobs, but at any time during maintenance. Design: ======= * Configuration layout: The behaviour of AMiner is controlled by 3 different configuration data sources: * config.py: This configuration file is used by the privileged parent process for startup and launching of child process. To avoid parsing and loading larger amounts of configuration into a privileged process, this configuration may contain only the minimal set of parameters required by the parent process. * analysis.py: This (optional) configuration file contains the whole analysis child configuration (code). When missing those configuration parameters are also taken from the main config. * /var/lib/aminer: This directory is used for persistency of runtime data, e.g. learned patterns, statistical data, between different AMiner invocations. * Loading of python code: AMiner does not use the default dist/site-packages to load code. The rationale behind that is: * Avoid unused code to be loadable or even loaded by default: that code may only increase the attack surface or the memory footprint. * Reduce risk of side effects of unrelated updates: even when not good practices, some pyhton modules try to detect existence of other modules to adapt behaviour when available. This may cause unintended runtime changes when installing or updating completely unrelated python software. * Log file reading: Those problems have to be addressed when processing a continous stream of logging data from multiple sources: * High performance log reading conflicts with proper EOF detection: The select() call is useful to react to available data from sockets and pipes but will always include any descriptors for plain files, as they are always readable, even when at EOF. To detect availability of more data, inotify would have to be used. But while waiting, no socket change can be detected. Apart from that, unprivileged child may not access the directories containing the opened log file descriptors. * Log files may roll over: the service writing it or a helper program will move the file aside and switch to a newly created file. * Multiple file synchronization: When processing messages from two different log data sources to correlate them, care must be taken not to read newest messages only from one source and fall behind on the other source. Otherwise messages generated with quite different time stamps might be processed nearly at the same time while messages originating nearly at same timepoint might be separated. Solutions: * High performance log reading: No perfect solution possible. Therefore workaround similar to "tail -f" was choosen: Use select() on master/child communication socket also for sleeping between file descriptor read attempts. After timeout, handle the master/child communication (if any), then read each file until all of them did not supply any more data. Go to sleep again. * Roll over: Privileged process monitors if the file currently read has moved. When a move is detected, notify the child about the new file. This detection has to occur quite timely as otherwise the child process not knowing about the new file will continue processing and miss relevant correlated patterns due to reading only some of the currently relevant streams. FIXME: readlink best method? Inotify? * Roll over in child: The challenge is to completely read the old file before switching to the new one. Therefore the child relies on the notifications from the parent process to know about new files. When a new file is received, the old one is fstat'ed to known the maximum size of the file, then the remaining data is read before closing the old file descriptor. * Multiple file synchronization: Useful file synchronization requires basic understanding of reported timestamps which implies the need for parsing. Also timestamp correction should be performed before using the timestamp for synchronization, e.g. host clocks might drift away or logging may use wrong timezone. When processing multiple log data streams, all parsed log atoms will be reordered using the timestamp. One stream might not be read at all for some time, when an atom from that stream has timestamp larger than those from other streams. When reaching the end of input on all streams, marks on all reordering queues of unforwarded parsed log atoms are set. Everything before that mark will be forwared anyway after a configurable timespan. This should prevent bogus items from staying within the reordering queue forever due to timestamps far in future. * Input parsing: Fast input disecting is key for performant rule checks later on. Therefore the algorithm should have following properties: * Avoid passing over same data twice (as distinct regular expressions would do), instead allow a tree-like parsing structure, that will follow one parsing path for a given log-atom. * Make parsed parts quickly accessible so that rule checks can just pick out the data they need without searching the tree again. * Rule based distribution of parsed input to detectors: Implementation: =============== * AMiner: This is the privileged master process having access to logfiles. It just launches the AMinerAnalysisChild and forwards logfiles to it. * AMinerAnalysisChild: This process runs without root capablities and just reads logfiles and stores state information in /var/lib/aminer. AMinerAnalysisChild processes data in a multistage process. Each transformation step is configurable, components can be registered to receive output from one layer and create input for the next one. * aminerConfig.buildAnalysisPipeline: This function creates the pipeline for parsing the log data and hands over the list of RawAtom handlers (those who will receive new log-atoms) and a list of components needing timer interrupts. Thus the need for multithreaded operation or asynchronous timer events is eliminated. * TimeCorrelationDetector: This component attempts to perform following steps for each recieved log-atom: * Check which test rules match it. If no rule matched the data, keep it for reference when creating new rules next time. * When a match A was found, go through correlation table to check if any of the other matches has matched recently. If a recent match B had occured, update 2 counters, one assuming that A* (hidden internal event) caused B and then A, the other one that B* cause B and then A. * If maximum number of parallel check rules not reached yet, create a new random rule now using the current log-atom or the last unmatched one. * Perform correlation result accounting until at least some correlation counters reach values high enough. Otherwise discard features after some time or number of log atoms received when they did not reach sufficiently high counts: they may be unique features likely not being observed again. This detection algorithm has some weaknesses: * If match A is followed by multiple machtes of B, that will raise the correlation hypothesis for A*->A->B above the count of A. * For A*->A->B hypothesis, two As detected before the first B will increment count only once, the second pair is deemed non-correlated. deb-build/root/usr/share/doc/aminer/ParanoidsInstallationGuide.txt0000644000000000000000000004131613353202132024341 0ustar rootrootPreamble: ========= This document provides an installation guide to all those paranoid users, who * do not trust their systems to be in a completely sane state when starting the analysis, * do not trust the aminer analysis features without understanding (and perhaps auditing) them and * do not trust the aminer autoconfiguration features to perform correctly or want to avoid them accepting parts of the insane system state as normal. Thus the following instructions will show how to enable each small feature manually to create full blown and really paranoid realtime mining configuration for syslog data. Creating the basic service configuration: ========================================= AMiner supports splitting the configuration loaded by the parent and child process. As the parent usually has to be run with root privileges, one might want to avoid loading the full configuration here. Steps: * Create parent configuration: cd /etc/aminer cp config.py.template config.py Remove everything below configuration property "AnalysisConfigFile". Define the child configuration file: configProperties['AnalysisConfigFile'] = 'analysis.py' While not fully configured, you may want to start with an empty input file. A paranoid aminer instance will report everything not explicitely whitelisted as an error. Basically you would get your whole syslog echoed back, just with an amplification factor applied. In that case change the property "LogResourceList" to: configProperties['LogResourceList'] = ['file:///etc/aminer/test.log'] and do: touch /etc/aminer/test.log chmod 0600 /etc/aminer/test.log Enable also the remote control socket: it will be needed to adjust aminer child settings on the fly without reloading of the configuration or editing of persistency files. In default configuration, only root user can connect to the socket. The remote control commands are executed within the analysis child process usually running as a dedicated less-privileged user. configProperties['RemoteControlSocket'] = '/var/run/aminer-remote.socket' * Create child configuration: cp config.py.template analysis.py Remove everything below (but keeping) "configProperties = {}" down to (including) configuration property "AnalysisConfigFile". You may want to define the "MailAlerting.TargetAddress" parameter to receive e-mail alerts and data reports. During testing the "StreamPrinterEventHandler" is useful to get the aminer events printed to stdout. Therefore configuration line comments have to be removed: from aminer.events import StreamPrinterEventHandler anomalyEventHandlers.append(StreamPrinterEventHandler(analysisContext.aminerConfig)) * Allow python code imports: AMiner does not use the default dist/site-packages to load code. See Design.txt "Loading of python code" for explanation. By default pytz is used for timestamp handling, so add it: ln -s /usr/lib/python3/dist-packages/pytz /etc/aminer/conf-enabled * Check for configuration errors: AMiner --Foreground --Config /etc/aminer/config.py Here you will get some warnings. The rationale behind that is not to be silent about code that is not hardened to the maximum level. The code should be secure in the current execution environment where * semantics of kernel syscall interface did not change over time * component is used as recommended in documentation * core system services or users are not already under control of an adversery Here are short explanations for the warnings: * WARNING: SECURITY: No secure open yet due to missing openat in python! Python2.7 os module does not support "openat". Without that, Linux kernel does not provide any secury way to open a file in an untrusted directory, e.g. "/var/log", a directory owned by user "syslog". The only way using "(f)chdir/open" repeats is not really practical. But usually when your syslog user is controlled by an adversery, you will be done anyway. * WARNING: SECURITY: Open should use O_PATH, but not yet available in python Linux allos to open files and directories with "O_PATH" flag. The file descriptor can be used as a reference to the file but not for reading/writing. Thus leak of file descriptor to other process (no close before exit) or standard stdin/stdout write data corruption can be avoided. Aminer (or your specific setup) would need to have such a vulnerability in first place to let the "O_PATH" hardening become effective. * WARNING: SECURITY: No checking for backdoor access via POSIX ACLs, use "getfacl" from "acl" package to check manually. Apart from the standard file mode flags, each file or directory may also have "POSIX ACLs" attached. When not checking for them, an adversery may use them to gain access even to newly created files which is not expected when just looking at the file mode. But again, this would require, that someone has acquired access to some core system file directories, e.g. "/var/log", beforehand. * WARNING: SECURITY: unsafe unlink (unavailable unlinkat/linkat should be used, but not available in python) Same as "No secure open yet due to missing openat" from above. When up and running using a test file, you can test the aminer output adding a test line: head -n 1 /var/log/syslog >> /etc/aminer/test.log As there is no hardcoded parsing model or parsing model generator configured, you should get: Unparsed data (1 lines) Jun 19 00:10:01 somehost.local rsyslogd: [origin ... Adding the first parsing model element: ======================================= When using the first line from your syslog for testing, it should be a line from the syslog daemon startup. Hence the default parsing model might match the syslog daemon output on your system. To enable add ln -s ../conf-available/generic/RsyslogParsingModel.py /etc/aminer/conf-enabled and edit your configuration: import RsyslogParsingModel serviceChildren.append(RsyslogParsingModel.getModel()) When starting aminer again, no warning about unparsed data should be printed. If still present, the model might not match your line. * Creating or modifying a model: Unlike logfile parsing tools, e.g. logcheck and many SIEMs, aminer does not use regular expressions, that have to be applied to each log line separately. The parsing model is more like a tree, having a common trunk, e.g. the syslog preamble with timestamp and hostname, and specific service outputs being handled in model branches. See Readme.txt "Concepts" for more information. Create a model file e.g. in "conf-available/local" and link it or directly in "conf-enabled", whatever suits your production process best. mkdir /etc/aminer/conf-available/local cp /etc/aminer/conf-available/generic/RsyslogParsingModel.py /etc/aminer/conf-available/local Edit /etc/aminer/conf-available/local/RsyslogParsingModel.py See ParsingModel.txt for documentation on the available model elements. Especially the "DebugModelElement" can be inserted at any position in your model to see where parsing breaks when parsing failures seem unexplainable. When even parsing of the syslog preamble using SyslogPreambleModel fails, this can be adjusted to your needs also. You may have to supply a different time model element, or host name model in: /etc/aminer/conf-available/SyslogPreambleModel.py or even switch to MultiLocaleDateTimeModelElement. See source code documentation in those files, e.g. /usr/lib/logdata-anomaly-miner/aminer/parsing/DateTimeModelElement.py /usr/lib/logdata-anomaly-miner/aminer/parsing/MultiLocaleDateTimeModelElement.py and Python format string specification in https://docs.python.org/3/library/datetime.html#strftime-strptime-behavior * Debugging a model: Being paranoid and using only own, handcrafted models, one will usually experience problems that some atom is not handled as expected. To follow the parsing model, a DebugMatchContext can be used instead of the default context. It is slower but it will capture state information about the matching process. With remote control (see below), debugging can even occur while AMiner is processing data. Remote control code for that purpose could would be: from aminer.parsing import DebugMatchContext matchContext = DebugMatchContext('Your test input here') # Working with registered components is handy... match = analysisContext.registeredComponentsByName['ParsingModel'].getMatchElement('', matchContext) remoteControlResponse = 'Result: %s, debug info %s' % (str(match), matchContext.getDebugInfo()) Without remote control change in /usr/lib/logdata-anomaly-miner/aminer/input/ByteStreamLineAtomizer.py the matchContext and add an appropriate output, e.g., matchContext = DebugMatchContext(lineData) matchElement = self.parsingModel.getMatchElement('', matchContext) print('Result: %s, debug info %s' % (str(matchElement), matchContext.getDebugInfo())) Also the import of DebugMatchContext might be required. * Contributing to the model templates: If your model configuration might be useful for others and you are willing and legally alowed to publish it under "GNU GPL v3", please consider sending the change to a maintainer or filing a bug, both available at https://launchpad.net/logdata-anomaly-miner Adding detection for new parser pathes: ======================================= Usually your systems will not emit an arbitrary large number of differently structured loglines. In normal operation with a given set of configuration options and standard software use patterns, each service only produces a small subset of log message type compared to all the messages it could theoretically create. On the other hand, service failures or attacks often result in new message types to be seen. Hence each new parser path corresponding to a new message type should cause some distrust. You may use the configuration from the config.py.template, but when really paranoid, following changes might be useful: * Disable autoinclusion: autoIncludeFlag = False * Register as named component: componentName = 'DefaultNewMatchPathDetector' from aminer.analysis import NewMatchPathDetector newMatchPathDetector = NewMatchPathDetector( analysisContext.aminerConfig, anomalyEventHandlers, autoIncludeFlag=False) analysisContext.registerComponent( newMatchPathDetector, componentName='DefaultNewMatchPathDetector') atomFilter.addHandler(newMatchPathDetector) With automatic inclusion enabled, the detector would complain about each new path seen exactly once and then add it to the list of accepted pathes. Setting this flag to True during configuration phase is very good to shorten the configuration time, but it will also work without that. And therefore the "componentName" comes in handy. As you may have run aminer with autoinclusion enabled already during "Adding the first parsing model element", you may want to delete all the included patterns by stopping aminer and deleting "/var/lib/aminer/NewMatchPathDetector/Default". * Using different detectors for subset of sources: When running aminer on a remote logging host, you may process messages from completely different systems. While e.g. a sudo message is completely normal on one machine, it might be completely unexpected on another one. To use different detectors for different host groups, a filter has to be installed to separate the data: * Create different NewMatchPathDetector instances with unique persistance IDs: newMatchPathDetectorA = NewMatchPathDetector( analysisContext.aminerConfig, anomalyEventHandlers, peristenceId='WwwHostsGroup', autoIncludeFlag=False) * Create the filter: from aminer.analysis import AtomFilters hostnameValueFilter = AtomFilters.MatchValueFilter( '/model/syslog/host', {'host-a': newMatchPathDetectorA, ...}, defaultNewMatchPathDetector) atomFilter.addHandler(hostnameValueFilter) In production, you may want to create all those groups in a loop, reading from a list of host/group mappings. Using the remote control interface for runtime changes: ======================================================= When really running with autoIncludeFlag=False, each unknown path would be reported over and over again. On some installations you may want to roll out the known path persistency files containing simple JSON data and load them when starting aminer. Therefore adding the new item to the persistency file is sufficient. This has to be done while aminer is NOT running, otherwise shutdown may overwrite the files. More common is to add the changes via the remote control interface e.g. * manually (for demonstration as shown below) * using custom integration code, e.g. embedded in your SIEM that is receiving the aminer events * use orchestration tools * Verify remote control is working: For manual inclusion, remote control socket has to be enabled and accessible. To have more robust configuration procedures, the detectors should be registered as named components. To verify, that remote control is working, just execute: AMinerRemoteControl --ControlSocket /var/run/aminer-remote.socket --Exec 'remoteControlResponse = analysisContext.getRegisteredComponentNames()' Remote execution response: [u'DefaultNewMatchPathDetector'] The command executed is executed inside the running aminer child process. See man page of AMinerRemoteControl for more information. For multiline Pyhton code, writing it to a file and using "--ExecFile" for invocation is recommended. * Modify the detector state: AMinerRemoteControl --ControlSocket /var/run/aminer-remote.socket --Data '["/model/services/rsyslog/msg/statechange/type/HUPed"]' --Exec 'for pathName in remoteControlData: analysisContext.getComponentByName("DefaultNewMatchPathDetector").knownPathSet.add(pathName)' --Exec 'analysisContext.getComponentByName("DefaultNewMatchPathDetector").doPersist()' Detect missing logs: ==================== Logdata analysis might detect anomalies within the data as long as data is available. On small scale setups, that might not be such an issue, but in larger setups, nobody might notice, that one service or even host stopped to emit log messates. The reason for that might be an error or an attack, that is left to the admin to find out. Here is an example how to detect that hosts stop sending logs and to generate alerts: from aminer.analysis import MissingMatchPathValueDetector missingMatchPathValueDetector = MissingMatchPathValueDetector( analysisContext.aminerConfig, '/model/syslog/host', anomalyEventHandlers, autoIncludeFlag=True, defaultInterval=24*3600) analysisContext.registerComponent( missingMatchPathValueDetector, componentName='DefaultMissingMatchPathValueDetector') atomFilter.addHandler(missingMatchPathValueDetector) The default interval should fit your operational procedures: * When aminer is used to detect malfunction of business critical processes not monitored via other means, the interval should be so low to react early enough to fullfil your SLAs. A busy webserver not serving (and logging) a page for 2 minutes might be of relevance. You may want to feed those alerts into your monitoring solution to have event-deduplication e.g. during maintenance downtimes. * For problems you would not start dealing with immediately, where the service is not that important, a longer timeout might be suitable. Just let aminer wait some before alerting to see if the problem vanishes without interaction. By setting the "componentName" in the code example above, you again make it easier to perform remote control actions on the running miner, e.g. * Change check interval for single value: AMinerRemoteControl --ControlSocket /var/run/aminer-remote.socket --Exec 'analysisContext.getComponentByName("DefaultMissingMatchPathValueDetector").setCheckValue("buildhost.localdomain", 12*3600)' * Remove value from monitoring: AMinerRemoteControl --ControlSocket /var/run/aminer-remote.socket --Exec 'analysisContext.getComponentByName("DefaultMissingMatchPathValueDetector").removeCheckValue("buildhost.localdomain")' * Force persistency write when your changes were that important: AMinerRemoteControl --ControlSocket /var/run/aminer-remote.socket --Exec 'analysisContext.getComponentByName("DefaultMissingMatchPathValueDetector").doPersist()' Apply whitelisting to parsed entries: ===================================== Even when a log-atom was parsed using parsing model parts already matching previous atoms, this does not mean, that this should be treated a normal situation. As opposed to SIEM solutions, that frequently apply blacklisting for event generation, the aminer rules engine can be used to whitelist elements based on combinations of parameters. For performance reasons, those rules can also be arranged in a tree-like fashion. Rules can also be used to invoke actions when it matches. Parsed data that does not match any rule will trigger an event. See "/usr/share/doc/aminer/demo/ubuntu-syslog-config.py" for example how the WhitelistViolationDetector is added to the configuration. deb-build/root/etc/0000755000000000000000000000000013352673207013151 5ustar rootrootdeb-build/root/etc/aminer/0000755000000000000000000000000013354630156014422 5ustar rootrootdeb-build/root/etc/aminer/config.py.template0000644000000000000000000001517613352721361020062 0ustar rootroot# This is a template for the "aminer" logfile miner tool. Copy # it to "config.py" and define your ruleset. configProperties = {} # Define the list of log resources to read from: the resources # named here do not need to exist when aminer is started. This # will just result in a warning. However if they exist, they have # to be readable by the aminer process! Supported types are: # * file://[path]: Read data from file, reopen it after rollover # * unix://[path]: Open the path as UNIX local socket for reading configProperties['LogResourceList'] = ['file:///var/log/auth.log', 'file:///var/log/syslog'] # Define the uid/gid of the process that runs the calculation # after opening the log files: configProperties['AMinerUser'] = 'aminer' configProperties['AMinerGroup'] = 'aminer' # Define the path, where aminer will listen for incoming remote # control connections. When missing, no remote control socket # will be created. # configProperties['RemoteControlSocket'] = '/var/run/aminer-remote.socket' # Read the analyis from this file. That part of configuration # is separated from the main configuration so that it can be loaded # only within the analysis child. Non-absolute path names are # interpreted relatively to the main configuration file (this # file). When empty, this configuration has to contain the configuration # for the child also. # configProperties['AnalysisConfigFile'] = 'analysis.py' # Read and store information to be used between multiple invocations # of AMiner in this directory. The directory must only be accessible # to the 'AMinerUser' but not group/world readable. On violation, # AMiner will refuse to start. When undefined, '/var/lib/aminer' # is used. # configProperties['Core.PersistenceDir'] = '/var/lib/aminer' # Define a target e-mail address to send alerts to. When undefined, # no e-mail notification hooks are added. configProperties['MailAlerting.TargetAddress'] = 'root@localhost' # Sender address of e-mail alerts. When undefined, "sendmail" # implementation on host will decide, which sender address should # be used. # configProperties['MailAlerting.FromAddress'] = 'root@localhost' # Define, which text should be prepended to the standard aminer # subject. Defaults to "AMiner Alerts:" # configProperties['MailAlerting.SubjectPrefix'] = 'AMiner Alerts:' # Define a grace time after startup before aminer will react to # an event and send the first alert e-mail. Defaults to 0 (any # event can immediately trigger alerting). # configProperties['MailAlerting.AlertGraceTime'] = 0 # Define how many seconds to wait after a first event triggered # the alerting procedure before really sending out the e-mail. # In that timespan, events are collected and will be sent all # using a single e-mail. Defaults to 10 seconds. # configProperties['MailAlerting.EventCollectTime'] = 10 # Define the minimum time between two alert e-mails in seconds # to avoid spamming. All events during this timespan are collected # and sent out with the next report. Defaults to 600 seconds. # configProperties['MailAlerting.MinAlertGap'] = 600 # Define the maximum time between two alert e-mails in seconds. # When undefined this defaults to "MailAlerting.MinAlertGap". # Otherwise this will activate an exponential backoff to reduce # messages during permanent error states by increasing the alert # gap by 50% when more alert-worthy events were recorded while # the previous gap time was not yet elapsed. # configProperties['MailAlerting.MaxAlertGap'] = 600 # Define how many events should be included in one alert mail # at most. This defaults to 1000 # configProperties['MailAlerting.MaxEventsPerMessage'] = 1000 # Add your ruleset here: def buildAnalysisPipeline(analysisContext): """Define the function to create pipeline for parsing the log data. It has also to define an AtomizerFactory to instruct AMiner how to process incoming data streams to create log atoms from them.""" # Build the parsing model: from aminer.parsing import FirstMatchModelElement from aminer.parsing import SequenceModelElement serviceChildren = [] # import AudispdParsingModel # serviceChildren.append(AudispdParsingModel.getModel()) # import CronParsingModel # serviceChildren.append(CronParsingModel.getModel()) # import EximParsingModel # serviceChildren.append(EximParsingModel.getModel()) # import RsyslogParsingModel # serviceChildren.append(RsyslogParsingModel.getModel()) # import SshdParsingModel # serviceChildren.append(SshdParsingModel.getModel()) # import SuSessionParsingModel # serviceChildren.append(SuSessionParsingModel.getModel()) # import UlogdParsingModel # serviceChildren.append(UlogdParsingModel.getModel()) import SyslogPreambleModel syslogPreambleModel = SyslogPreambleModel.getModel() parsingModel = SequenceModelElement('model', [ syslogPreambleModel, FirstMatchModelElement('services', serviceChildren)]) # Some generic imports. from aminer.analysis import AtomFilters # Create all global handler lists here and append the real handlers # later on. # Use this filter to distribute all atoms to the analysis handlers. atomFilter = AtomFilters.SubhandlerFilter(None) anomalyEventHandlers = [] # Now define the AtomizerFactory using the model. A simple line # based one is usually sufficient. from aminer.input import SimpleByteStreamLineAtomizerFactory analysisContext.atomizerFactory = SimpleByteStreamLineAtomizerFactory( parsingModel, [atomFilter], anomalyEventHandlers, defaultTimestampPath='/model/syslog/time') # Just report all unparsed atoms to the event handlers. from aminer.input import SimpleUnparsedAtomHandler atomFilter.addHandler( SimpleUnparsedAtomHandler(anomalyEventHandlers), stopWhenHandledFlag=True) from aminer.analysis import NewMatchPathDetector newMatchPathDetector = NewMatchPathDetector( analysisContext.aminerConfig, anomalyEventHandlers, autoIncludeFlag=True) analysisContext.registerComponent(newMatchPathDetector, componentName=None) atomFilter.addHandler(newMatchPathDetector) # Include the e-mail notification handler only if the configuration # parameter was set. from aminer.events import DefaultMailNotificationEventHandler if DefaultMailNotificationEventHandler.CONFIG_KEY_MAIL_TARGET_ADDRESS in analysisContext.aminerConfig.configProperties: mailNotificationHandler = DefaultMailNotificationEventHandler( analysisContext.aminerConfig) analysisContext.registerComponent( mailNotificationHandler, componentName=None) anomalyEventHandlers.append(mailNotificationHandler) # Add stdout stream printing for debugging, tuning. # from aminer.events import StreamPrinterEventHandler # anomalyEventHandlers.append(StreamPrinterEventHandler(analysisContext.aminerConfig)) deb-build/root/etc/aminer/conf-available/0000755000000000000000000000000013352673207017267 5ustar rootrootdeb-build/root/etc/aminer/conf-available/generic/0000755000000000000000000000000013352673345020706 5ustar rootrootdeb-build/root/etc/aminer/conf-available/generic/EximParsingModel.py0000644000000000000000000000643513351130674024467 0ustar rootroot"""This module defines a parser model for exim""" from aminer.parsing import AnyByteDataModelElement from aminer.parsing import DecimalIntegerValueModelElement from aminer.parsing import DelimitedDataModelElement from aminer.parsing import FirstMatchModelElement from aminer.parsing import FixedDataModelElement from aminer.parsing import FixedWordlistDataModelElement from aminer.parsing import IpAddressDataModelElement from aminer.parsing import OptionalMatchModelElement from aminer.parsing import SequenceModelElement from aminer.parsing import WhiteSpaceLimitedDataModelElement def getModel(userNameModel=None): """This function defines how to parse a su session information message after any standard logging preamble, e.g. from syslog.""" typeChildren = [] typeChildren.append(SequenceModelElement('queue', [ FixedWordlistDataModelElement('type', [b'Start', b'End']), FixedDataModelElement('s0', b' queue run: pid='), DecimalIntegerValueModelElement('pid')])) typeChildren.append(SequenceModelElement('rec-log', [ WhiteSpaceLimitedDataModelElement('id'), FixedDataModelElement('s0', b' <= '), WhiteSpaceLimitedDataModelElement('env-from'), FirstMatchModelElement('source', [ SequenceModelElement('network', [ FixedDataModelElement('s0', b' H=('), DelimitedDataModelElement('hostname', b') '), FixedDataModelElement('s1', b') ['), IpAddressDataModelElement('hostip'), FixedDataModelElement('s2', b']')]), SequenceModelElement('user', [ FixedDataModelElement('s0', b' U='), WhiteSpaceLimitedDataModelElement('user')]) ]), FixedDataModelElement('s2', b' P='), WhiteSpaceLimitedDataModelElement('proto'), FixedDataModelElement('s3', b' S='), DecimalIntegerValueModelElement('size'), OptionalMatchModelElement('idopt', SequenceModelElement('iddata', [ FixedDataModelElement('s0', b' id='), AnyByteDataModelElement('id')])) ])) typeChildren.append(SequenceModelElement('send-log', [ WhiteSpaceLimitedDataModelElement('id'), # Strange: first address seems to use different separator than # second one. FixedWordlistDataModelElement('s0', [b' => b', b' ->']), DelimitedDataModelElement('env-to', b' R='), FixedDataModelElement('s1', b' R='), WhiteSpaceLimitedDataModelElement('route'), FixedDataModelElement('s2', b' T='), WhiteSpaceLimitedDataModelElement('transport'), AnyByteDataModelElement('unparsed') ])) typeChildren.append(SequenceModelElement('sent', [ WhiteSpaceLimitedDataModelElement('id'), FixedDataModelElement('s0', b' Completed')])) typeChildren.append(SequenceModelElement('started', [ FixedDataModelElement('s0', b' exim '), WhiteSpaceLimitedDataModelElement('version'), FixedDataModelElement('s1', b' daemon started: pid='), DecimalIntegerValueModelElement('pid'), FixedDataModelElement('s2', b', -q30m, listening for SMTP on [127.0.0.1]:25') ])) model = SequenceModelElement('exim', [ FixedDataModelElement('sname', b'exim['), DecimalIntegerValueModelElement('pid'), FixedDataModelElement('s0', b']: '), FirstMatchModelElement('msg', typeChildren)]) return model deb-build/root/etc/aminer/conf-available/generic/AudispdParsingModel.py0000644000000000000000000005701713352424175025163 0ustar rootroot"""This module contains functions and classes to create the parsing model.""" from aminer.parsing import AnyByteDataModelElement from aminer.parsing import DecimalIntegerValueModelElement from aminer.parsing import DelimitedDataModelElement from aminer.parsing import ElementValueBranchModelElement from aminer.parsing import FirstMatchModelElement from aminer.parsing import FixedDataModelElement from aminer.parsing import FixedWordlistDataModelElement from aminer.parsing import HexStringModelElement from aminer.parsing import IpAddressDataModelElement from aminer.parsing import MatchElement from aminer.parsing import OptionalMatchModelElement from aminer.parsing import RepeatedElementDataModelElement from aminer.parsing import SequenceModelElement from aminer.parsing import VariableByteDataModelElement from aminer.parsing import WhiteSpaceLimitedDataModelElement def getModel(): """This function defines how to parse a audispd message logged via syslog after any standard logging preamble, e.g. from syslog.""" class ExecArgumentDataModelElement(object): """This is a helper class for parsing the (encoded) exec argument strings found within audit logs.""" def __init__(self, elementId): self.elementId = elementId def getChildElements(self): """Get the children of this element (none).""" return None def getMatchElement(self, path, matchContext): """Find the maximum number of bytes belonging to an exec argument. @return a match when at least two bytes were found including the delimiters.""" data = matchContext.matchData matchLen = 0 matchValue = b'' if data[0] == ord(b'"'): matchLen = data.find(b'"', 1) if matchLen == -1: return None matchValue = data[1:matchLen] matchLen += 1 elif data.startswith(b'(null)'): matchLen = 6 matchValue = None else: # Must be upper case hex encoded: nextValue = -1 for dByte in data: if (dByte >= 0x30) and (dByte <= 0x39): dByte -= 0x30 elif (dByte >= 0x41) and (dByte <= 0x46): dByte -= 0x37 else: break if nextValue == -1: nextValue = (dByte<<4) else: matchValue += bytearray(((nextValue|dByte),)) nextValue = -1 matchLen += 1 if nextValue != -1: return None matchData = data[:matchLen] matchContext.update(matchData) return MatchElement( "%s/%s" % (path, self.elementId), matchData, matchValue, None) pamStatusWordList = FixedWordlistDataModelElement( 'status', [b'failed', b'success']) typeBranches = {} typeBranches['ADD_USER'] = SequenceModelElement('adduser', [ FixedDataModelElement('s0', b' pid='), DecimalIntegerValueModelElement('pid'), FixedDataModelElement('s1', b' uid='), DecimalIntegerValueModelElement('uid'), FixedDataModelElement('s2', b' auid='), DecimalIntegerValueModelElement('auid'), FixedDataModelElement('s3', b' ses='), DecimalIntegerValueModelElement('ses'), FixedDataModelElement('s4', b' msg=\'op=adding user id='), DecimalIntegerValueModelElement('newuserid'), FixedDataModelElement('s5', b' exe="'), DelimitedDataModelElement('exec', b'"'), FixedDataModelElement('s6', b'" hostname='), DelimitedDataModelElement('clientname', b' '), FixedDataModelElement('s7', b' addr='), DelimitedDataModelElement('clientip', b' '), FixedDataModelElement('s8', b' terminal='), WhiteSpaceLimitedDataModelElement('terminal'), FixedDataModelElement('s9', b' res=success\'') ]) typeBranches['BPRM_FCAPS'] = SequenceModelElement('bprmfcaps', [ FixedDataModelElement('s0', b' fver=0 fp='), HexStringModelElement('fp'), FixedDataModelElement('s1', b' fi='), HexStringModelElement('fi'), FixedDataModelElement('s2', b' fe='), HexStringModelElement('fe'), FixedDataModelElement('s3', b' old_pp='), HexStringModelElement('pp-old'), FixedDataModelElement('s4', b' old_pi='), HexStringModelElement('pi-old'), FixedDataModelElement('s5', b' old_pe='), HexStringModelElement('pe-old'), FixedDataModelElement('s6', b' new_pp='), HexStringModelElement('pp-new'), FixedDataModelElement('s7', b' new_pi='), HexStringModelElement('pi-new'), FixedDataModelElement('s8', b' new_pe='), HexStringModelElement('pe-new') ]) typeBranches['CONFIG_CHANGE'] = SequenceModelElement('conf-change', [ FixedDataModelElement('s0', b' auid='), DecimalIntegerValueModelElement('auid'), FixedDataModelElement('s1', b' ses='), DecimalIntegerValueModelElement('ses'), FixedDataModelElement('s2', b' op="add rule" key=(null) list='), DecimalIntegerValueModelElement('list'), FixedDataModelElement('s3', b' res='), DecimalIntegerValueModelElement('result') ]) typeBranches['CRED_ACQ'] = SequenceModelElement('credacq', [ FixedDataModelElement('s0', b' pid='), DecimalIntegerValueModelElement('pid'), FixedDataModelElement('s1', b' uid='), DecimalIntegerValueModelElement('uid'), FixedDataModelElement('s2', b' auid='), DecimalIntegerValueModelElement('auid'), FixedDataModelElement('s3', b' ses='), DecimalIntegerValueModelElement('ses'), FixedDataModelElement('s4', b' msg=\'op=PAM:setcred acct="'), DelimitedDataModelElement('username', b'"'), FixedDataModelElement('s5', b'" exe="'), DelimitedDataModelElement('exec', b'"'), FixedDataModelElement('s6', b'" hostname='), DelimitedDataModelElement('clientname', b' '), FixedDataModelElement('s7', b' addr='), DelimitedDataModelElement('clientip', b' '), FixedDataModelElement('s8', b' terminal='), WhiteSpaceLimitedDataModelElement('terminal'), FixedDataModelElement('s9', b' res=success\'') ]) typeBranches['CRED_DISP'] = SequenceModelElement('creddisp', [ FixedDataModelElement('s0', b' pid='), DecimalIntegerValueModelElement('pid'), FixedDataModelElement('s1', b' uid='), DecimalIntegerValueModelElement('uid'), FixedDataModelElement('s2', b' auid='), DecimalIntegerValueModelElement('auid'), FixedDataModelElement('s3', b' ses='), DecimalIntegerValueModelElement('ses'), FixedDataModelElement('s4', b' msg=\'op=PAM:setcred acct="'), DelimitedDataModelElement('username', b'"'), FixedDataModelElement('s5', b'" exe="'), DelimitedDataModelElement('exec', b'"'), FixedDataModelElement('s6', b'" hostname='), DelimitedDataModelElement('clientname', b' '), FixedDataModelElement('s7', b' addr='), DelimitedDataModelElement('clientip', b' '), FixedDataModelElement('s8', b' terminal='), WhiteSpaceLimitedDataModelElement('terminal'), FixedDataModelElement('s9', b' res=success\'') ]) typeBranches['CRED_REFR'] = SequenceModelElement('creddisp', [ FixedDataModelElement('s0', b' pid='), DecimalIntegerValueModelElement('pid'), FixedDataModelElement('s1', b' uid='), DecimalIntegerValueModelElement('uid'), FixedDataModelElement('s2', b' auid='), DecimalIntegerValueModelElement('auid'), FixedDataModelElement('s3', b' ses='), DecimalIntegerValueModelElement('ses'), FixedDataModelElement('s4', b' msg=\'op=PAM:setcred acct="root" ' \ b'exe="/usr/sbin/sshd" hostname='), IpAddressDataModelElement('clientname'), FixedDataModelElement('s5', b' addr='), IpAddressDataModelElement('clientip'), FixedDataModelElement('s6', b' terminal=ssh res=success\'')]) typeBranches['CWD'] = SequenceModelElement('cwd', [ FixedDataModelElement('s0', b' cwd='), ExecArgumentDataModelElement('cwd')]) # We need a type branch here also, but there is no additional # data in EOE records after Ubuntu Trusty any more. typeBranches['EOE'] = OptionalMatchModelElement( 'eoe', FixedDataModelElement('s0', b'')) execArgModel = SequenceModelElement('execarg', [ FixedDataModelElement('s0', b' a'), DecimalIntegerValueModelElement('argn'), FixedDataModelElement('s1', b'='), ExecArgumentDataModelElement('argval')]) typeBranches['EXECVE'] = SequenceModelElement('execve', [ FixedDataModelElement('s0', b' argc='), DecimalIntegerValueModelElement('argc'), RepeatedElementDataModelElement('arg', execArgModel)]) typeBranches['FD_PAIR'] = SequenceModelElement('fdpair', [ FixedDataModelElement('s0', b' fd0='), DecimalIntegerValueModelElement('fd0'), FixedDataModelElement('s1', b' fd1='), DecimalIntegerValueModelElement('fd1')]) # This message differs on Ubuntu 32/64 bit variants. typeBranches['LOGIN'] = SequenceModelElement('login', [ FixedDataModelElement('s0', b' pid='), DecimalIntegerValueModelElement('pid'), FixedDataModelElement('s1', b' uid='), DecimalIntegerValueModelElement('uid'), FixedWordlistDataModelElement('s2', [b' old auid=', b' old-auid=']), DecimalIntegerValueModelElement('auid-old'), FixedWordlistDataModelElement('s3', [b' new auid=', b' auid=']), DecimalIntegerValueModelElement('auid-new'), FixedWordlistDataModelElement('s4', [b' old ses=', b' old-ses=']), DecimalIntegerValueModelElement('ses-old'), FixedWordlistDataModelElement('s5', [b' new ses=', b' ses=']), DecimalIntegerValueModelElement('ses-new'), FixedDataModelElement('s6', b' res='), DecimalIntegerValueModelElement('result')]) inodeInfoModelElement = SequenceModelElement('inodeinfo', [ FixedDataModelElement('s0', b' inode='), DecimalIntegerValueModelElement('inode'), FixedDataModelElement('s1', b' dev='), # A special major/minor device element could be better here. VariableByteDataModelElement('dev', b'0123456789abcdef:'), FixedDataModelElement('s2', b' mode='), # FIXME: is octal DecimalIntegerValueModelElement('mode'), FixedDataModelElement('s3', b' ouid='), DecimalIntegerValueModelElement('ouid'), FixedDataModelElement('s4', b' ogid='), DecimalIntegerValueModelElement('ogid'), FixedDataModelElement('s5', b' rdev='), # A special major/minor device element could be better here (see above). VariableByteDataModelElement('rdev', b'0123456789abcdef:'), FixedDataModelElement('s6', b' nametype=')]) typeBranches['NETFILTER_CFG'] = SequenceModelElement('conf-change', [ FixedDataModelElement('s0', b' table='), FixedWordlistDataModelElement('table', [b'filter', b'mangle', b'nat']), FixedDataModelElement('s1', b' family='), DecimalIntegerValueModelElement('family'), FixedDataModelElement('s2', b' entries='), DecimalIntegerValueModelElement('entries') ]) typeBranches['OBJ_PID'] = SequenceModelElement('objpid', [ FixedDataModelElement('s0', b' opid='), DecimalIntegerValueModelElement('opid'), FixedDataModelElement('s1', b' oauid='), DecimalIntegerValueModelElement( 'oauid', valueSignType=DecimalIntegerValueModelElement.SIGN_TYPE_OPTIONAL), FixedDataModelElement('s2', b' ouid='), DecimalIntegerValueModelElement('ouid'), FixedDataModelElement('s3', b' oses='), DecimalIntegerValueModelElement( 'oses', valueSignType=DecimalIntegerValueModelElement.SIGN_TYPE_OPTIONAL), FixedDataModelElement('s4', b' ocomm='), ExecArgumentDataModelElement('ocomm'), ]) typeBranches['PATH'] = SequenceModelElement('path', [ FixedDataModelElement('s0', b' item='), DecimalIntegerValueModelElement('item'), FixedDataModelElement('s1', b' name='), ExecArgumentDataModelElement('name'), FirstMatchModelElement('fsinfo', [ inodeInfoModelElement, FixedDataModelElement('noinfo', b' nametype=')]), FixedWordlistDataModelElement( 'nametype', [b'CREATE', b'DELETE', b'NORMAL', b'PARENT', b'UNKNOWN']), ]) typeBranches['PROCTITLE'] = SequenceModelElement('proctitle', [ FixedDataModelElement('s1', b' proctitle='), ExecArgumentDataModelElement('proctitle')]) typeBranches['SERVICE_START'] = SequenceModelElement('service', [ FixedDataModelElement('s0', b' pid='), DecimalIntegerValueModelElement('pid'), FixedDataModelElement('s1', b' uid='), DecimalIntegerValueModelElement('uid'), FixedDataModelElement('s2', b' auid='), DecimalIntegerValueModelElement('auid'), FixedDataModelElement('s3', b' ses='), DecimalIntegerValueModelElement('ses'), FixedDataModelElement('s4', b' msg=\'unit='), DelimitedDataModelElement('unit', b' '), FixedDataModelElement('s5', b' comm="systemd" exe="'), DelimitedDataModelElement('exec', b'"'), FixedDataModelElement('s6', b'" hostname='), DelimitedDataModelElement('clientname', b' '), FixedDataModelElement('s7', b' addr='), DelimitedDataModelElement('clientip', b' '), FixedDataModelElement('s8', b' terminal='), WhiteSpaceLimitedDataModelElement('terminal'), FixedDataModelElement('s9', b' res='), pamStatusWordList, FixedDataModelElement('s10', b'\'') ]) typeBranches['SERVICE_STOP'] = typeBranches['SERVICE_START'] typeBranches['SOCKADDR'] = SequenceModelElement('sockaddr', [ FixedDataModelElement('s0', b' saddr='), HexStringModelElement('sockaddr', upperCase=True), ]) typeBranches['SYSCALL'] = SequenceModelElement('syscall', [ FixedDataModelElement('s0', b' arch='), HexStringModelElement('arch'), FixedDataModelElement('s1', b' syscall='), DecimalIntegerValueModelElement('syscall'), OptionalMatchModelElement('personality', SequenceModelElement('pseq', [ FixedDataModelElement('s0', b' per='), DecimalIntegerValueModelElement('personality'), ])), OptionalMatchModelElement('result', SequenceModelElement('rseq', [ FixedDataModelElement('s2', b' success='), FixedWordlistDataModelElement('succes', [b'no', b'yes']), FixedDataModelElement('s3', b' exit='), DecimalIntegerValueModelElement( 'exit', valueSignType=DecimalIntegerValueModelElement.SIGN_TYPE_OPTIONAL), ])), FixedDataModelElement('s4', b' a0='), HexStringModelElement('arg0'), FixedDataModelElement('s5', b' a1='), HexStringModelElement('arg1'), FixedDataModelElement('s6', b' a2='), HexStringModelElement('arg2'), FixedDataModelElement('s7', b' a3='), HexStringModelElement('arg3'), FixedDataModelElement('s8', b' items='), DecimalIntegerValueModelElement('items'), FixedDataModelElement('s9', b' ppid='), DecimalIntegerValueModelElement('ppid'), FixedDataModelElement('s10', b' pid='), DecimalIntegerValueModelElement('pid'), FixedDataModelElement('s11', b' auid='), DecimalIntegerValueModelElement('auid'), FixedDataModelElement('s12', b' uid='), DecimalIntegerValueModelElement('uid'), FixedDataModelElement('s13', b' gid='), DecimalIntegerValueModelElement('gid'), FixedDataModelElement('s14', b' euid='), DecimalIntegerValueModelElement('euid'), FixedDataModelElement('s15', b' suid='), DecimalIntegerValueModelElement('suid'), FixedDataModelElement('s16', b' fsuid='), DecimalIntegerValueModelElement('fsuid'), FixedDataModelElement('s17', b' egid='), DecimalIntegerValueModelElement('egid'), FixedDataModelElement('s18', b' sgid='), DecimalIntegerValueModelElement('sgid'), FixedDataModelElement('s19', b' fsgid='), DecimalIntegerValueModelElement('fsgid'), FixedDataModelElement('s20', b' tty='), DelimitedDataModelElement('tty', b' '), FixedDataModelElement('s21', b' ses='), DecimalIntegerValueModelElement('sesid'), FixedDataModelElement('s22', b' comm='), ExecArgumentDataModelElement('command'), FixedDataModelElement('s23', b' exe="'), DelimitedDataModelElement('executable', b'"'), FixedDataModelElement('s24', b'" key='), AnyByteDataModelElement('key') ]) # The UNKNOWN type is used then audispd does not know the type # of the event, usually because the kernel is more recent than # audispd, thus emiting yet unknown event types. # * type=1327: procitle: see https://www.redhat.com/archives/linux-audit/2014-February/msg00047.html typeBranches['UNKNOWN[1327]'] = SequenceModelElement('unknown-proctitle', [ FixedDataModelElement('s0', b' proctitle='), ExecArgumentDataModelElement('proctitle') ]) typeBranches['USER_ACCT'] = SequenceModelElement('useracct', [ FixedDataModelElement('s0', b' pid='), DecimalIntegerValueModelElement('pid'), FixedDataModelElement('s1', b' uid='), DecimalIntegerValueModelElement('uid'), FixedDataModelElement('s2', b' auid='), DecimalIntegerValueModelElement('auid'), FixedDataModelElement('s3', b' ses='), DecimalIntegerValueModelElement('ses'), FixedDataModelElement('s4', b' msg=\'op=PAM:accounting acct="'), DelimitedDataModelElement('username', b'"'), FixedDataModelElement('s5', b'" exe="'), DelimitedDataModelElement('exec', b'"'), FixedDataModelElement('s6', b'" hostname='), DelimitedDataModelElement('clientname', b' '), FixedDataModelElement('s7', b' addr='), DelimitedDataModelElement('clientip', b' '), FixedDataModelElement('s8', b' terminal='), WhiteSpaceLimitedDataModelElement('terminal'), FixedDataModelElement('s9', b' res=success\'') ]) typeBranches['USER_AUTH'] = SequenceModelElement('userauth', [ FixedDataModelElement('s0', b' pid='), DecimalIntegerValueModelElement('pid'), FixedDataModelElement('s1', b' uid='), DecimalIntegerValueModelElement('uid'), FixedDataModelElement('s2', b' auid='), DecimalIntegerValueModelElement('auid'), FixedDataModelElement('s3', b' ses='), DecimalIntegerValueModelElement('ses'), FixedDataModelElement('s4', b' msg=\'op=PAM:authentication acct="'), DelimitedDataModelElement('username', b'"'), FixedDataModelElement('s5', b'" exe="'), DelimitedDataModelElement('exec', b'"'), FixedDataModelElement('s6', b'" hostname='), DelimitedDataModelElement('clientname', b' '), FixedDataModelElement('s7', b' addr='), DelimitedDataModelElement('clientip', b' '), FixedDataModelElement('s8', b' terminal='), WhiteSpaceLimitedDataModelElement('terminal'), FixedDataModelElement('s9', b' res=success\'') ]) typeBranches['USER_START'] = SequenceModelElement('userstart', [ FixedDataModelElement('s0', b' pid='), DecimalIntegerValueModelElement('pid'), FixedDataModelElement('s1', b' uid='), DecimalIntegerValueModelElement('uid'), FixedDataModelElement('s2', b' auid='), DecimalIntegerValueModelElement('auid'), FixedDataModelElement('s3', b' ses='), DecimalIntegerValueModelElement('ses'), FixedDataModelElement('s4', b' msg=\'op=PAM:session_open acct="'), DelimitedDataModelElement('username', b'"'), FixedDataModelElement('s5', b'" exe="'), DelimitedDataModelElement('exec', b'"'), FixedDataModelElement('s6', b'" hostname='), DelimitedDataModelElement('clientname', b' '), FixedDataModelElement('s7', b' addr='), DelimitedDataModelElement('clientip', b' '), FixedDataModelElement('s8', b' terminal='), WhiteSpaceLimitedDataModelElement('terminal'), FixedDataModelElement('s9', b' res=success\'') ]) typeBranches['USER_END'] = SequenceModelElement('userend', [ FixedDataModelElement('s0', b' pid='), DecimalIntegerValueModelElement('pid'), FixedDataModelElement('s1', b' uid='), DecimalIntegerValueModelElement('uid'), FixedDataModelElement('s2', b' auid='), DecimalIntegerValueModelElement('auid'), FixedDataModelElement('s3', b' ses='), DecimalIntegerValueModelElement('ses'), FixedDataModelElement('s4', b' msg=\'op=PAM:session_close acct="'), DelimitedDataModelElement('username', b'"'), FixedDataModelElement('s5', b'" exe="'), DelimitedDataModelElement('exec', b'"'), FixedDataModelElement('s6', b'" hostname='), DelimitedDataModelElement('clientname', b' '), FixedDataModelElement('s7', b' addr='), DelimitedDataModelElement('clientip', b' '), FixedDataModelElement('s8', b' terminal='), WhiteSpaceLimitedDataModelElement('terminal'), FixedDataModelElement('s9', b' res=success\'') ]) typeBranches['USER_ERR'] = SequenceModelElement('usererr', [ FixedDataModelElement('s0', b' pid='), DecimalIntegerValueModelElement('pid'), FixedDataModelElement('s1', b' uid='), DecimalIntegerValueModelElement('uid'), FixedDataModelElement('s2', b' auid='), DecimalIntegerValueModelElement('auid'), FixedDataModelElement('s3', b' ses='), DecimalIntegerValueModelElement('ses'), FixedDataModelElement('s4', b' msg=\'op=PAM:bad_ident acct="?" exe="'), DelimitedDataModelElement('exec', b'"'), FixedDataModelElement('s5', b'" hostname='), DelimitedDataModelElement('clientname', b' '), FixedDataModelElement('s6', b' addr='), DelimitedDataModelElement('clientip', b' '), FixedDataModelElement('s7', b' terminal='), WhiteSpaceLimitedDataModelElement('terminal'), FixedDataModelElement('s8', b' res=failed\'') ]) typeBranches['USER_LOGIN'] = SequenceModelElement('userlogin', [ FixedDataModelElement('s0', b' pid='), DecimalIntegerValueModelElement('pid'), FixedDataModelElement('s1', b' uid='), DecimalIntegerValueModelElement('uid'), FixedDataModelElement('s2', b' auid='), DecimalIntegerValueModelElement('auid'), FixedDataModelElement('s3', b' ses='), DecimalIntegerValueModelElement('ses'), FixedDataModelElement('s4', b' msg=\'op=login '), FirstMatchModelElement('msgtype', [ FixedDataModelElement('loginok', b'id=0'), SequenceModelElement('loginfail', [ FixedDataModelElement('s0', b'acct='), ExecArgumentDataModelElement('account') ])]), FixedDataModelElement('s5', b' exe="'), DelimitedDataModelElement('exec', b'"'), FixedDataModelElement('s6', b'" hostname='), DelimitedDataModelElement('clientname', b' '), FixedDataModelElement('s7', b' addr='), DelimitedDataModelElement('clientip', b' '), FixedDataModelElement('s8', b' terminal='), WhiteSpaceLimitedDataModelElement('terminal'), FixedDataModelElement('s9', b' res='), pamStatusWordList, FixedDataModelElement('s10', b'\'') ]) model = SequenceModelElement('audispd', [ FixedDataModelElement('sname', b'audispd: '), FirstMatchModelElement('msg', [ ElementValueBranchModelElement( 'record', SequenceModelElement('preamble', [ FixedDataModelElement('s0', b'type='), WhiteSpaceLimitedDataModelElement('type'), FixedDataModelElement('s1', b' msg=audit('), DecimalIntegerValueModelElement('time'), FixedDataModelElement('s0', b'.'), DecimalIntegerValueModelElement('ms'), FixedDataModelElement('s1', b':'), DecimalIntegerValueModelElement('seq'), FixedDataModelElement('s2', b'):') ]), 'type', typeBranches, defaultBranch=None), FixedDataModelElement('queue-full', b'queue is full - dropping event') ]) ]) return model deb-build/root/etc/aminer/conf-available/generic/SsmtpParsingModel.py0000644000000000000000000000225013351132161024653 0ustar rootroot"""This module defines a parser for ssmtp.""" from aminer.parsing import DecimalIntegerValueModelElement from aminer.parsing import DelimitedDataModelElement from aminer.parsing import FirstMatchModelElement from aminer.parsing import FixedDataModelElement from aminer.parsing import SequenceModelElement def getModel(): """This function returns the model.""" typeChildren = [] typeChildren.append(SequenceModelElement('sent', [ FixedDataModelElement('s0', b'Sent mail for '), DelimitedDataModelElement('to-addr', b' ('), FixedDataModelElement('s1', b' ('), DelimitedDataModelElement('status', b') uid='), FixedDataModelElement('s2', b') uid='), DecimalIntegerValueModelElement('uid'), FixedDataModelElement('s3', b' username='), DelimitedDataModelElement('username', b' outbytes='), FixedDataModelElement('s4', b' outbytes='), DecimalIntegerValueModelElement('bytes'), ])) model = SequenceModelElement('ssmtp', [ FixedDataModelElement('sname', b'sSMTP['), DecimalIntegerValueModelElement('pid'), FixedDataModelElement('s0', b']: '), FirstMatchModelElement('msg', typeChildren)]) return model deb-build/root/etc/aminer/conf-available/generic/NtpParsingModel.py0000644000000000000000000000544113351131053024312 0ustar rootroot"""This module defines the parsing model for ntpd logs.""" from aminer.parsing import DecimalFloatValueModelElement from aminer.parsing import DecimalIntegerValueModelElement from aminer.parsing import DelimitedDataModelElement from aminer.parsing import FirstMatchModelElement from aminer.parsing import FixedDataModelElement from aminer.parsing import IpAddressDataModelElement from aminer.parsing import SequenceModelElement from aminer.parsing import VariableByteDataModelElement def getModel(): """Get the model.""" interfaceNameModel = VariableByteDataModelElement( 'interface', b'0123456789abcdefghijklmnopqrstuvwxyz.') typeChildren = [] typeChildren.append(SequenceModelElement('exit', [ FixedDataModelElement('s0', b'ntpd exiting on signal '), DecimalIntegerValueModelElement('signal') ])) typeChildren.append(SequenceModelElement('listen-drop', [ FixedDataModelElement('s0', b'Listen and drop on '), DecimalIntegerValueModelElement('fd'), FixedDataModelElement('s1', b' '), interfaceNameModel, FixedDataModelElement('s2', b' '), FirstMatchModelElement('address', [ IpAddressDataModelElement('ipv4'), DelimitedDataModelElement('ipv6', b' '), ]), FixedDataModelElement('s3', b' UDP 123') ])) typeChildren.append(SequenceModelElement('listen-normal', [ FixedDataModelElement('s0', b'Listen normally on '), DecimalIntegerValueModelElement('fd'), FixedDataModelElement('s1', b' '), interfaceNameModel, FixedDataModelElement('s2', b' '), IpAddressDataModelElement('ip'), FirstMatchModelElement('msg', [ FixedDataModelElement('port-new', b':123'), FixedDataModelElement('port-old', b' UDP 123') ]) ])) typeChildren.append(SequenceModelElement('listen-routing', [ FixedDataModelElement('s0', b'Listening on routing socket on fd #'), DecimalIntegerValueModelElement('fd'), FixedDataModelElement('s1', b' for interface updates') ])) typeChildren.append(FixedDataModelElement( 'new-interfaces', b'new interface(s) found: waking up resolver')) typeChildren.append(FixedDataModelElement( 'ntp-io', b'ntp_io: estimated max descriptors: 1024, initial socket boundary: 16')) typeChildren.append(FixedDataModelElement( 'peers-refreshed', b'peers refreshed')) typeChildren.append(SequenceModelElement('precision', [ FixedDataModelElement('s0', b'proto: precision = '), DecimalFloatValueModelElement('precision'), FixedDataModelElement('s1', b' usec') ])) model = SequenceModelElement('ntpd', [ FixedDataModelElement('sname', b'ntpd['), DecimalIntegerValueModelElement('pid'), FixedDataModelElement('s0', b']: '), FirstMatchModelElement('msg', typeChildren)]) return model deb-build/root/etc/aminer/conf-available/generic/UlogdParsingModel.py0000644000000000000000000000576313351133407024637 0ustar rootroot"""This module defines the parser for ulogd messages.""" from aminer.parsing import DecimalIntegerValueModelElement from aminer.parsing import DelimitedDataModelElement from aminer.parsing import FirstMatchModelElement from aminer.parsing import FixedDataModelElement from aminer.parsing import FixedWordlistDataModelElement from aminer.parsing import IpAddressDataModelElement from aminer.parsing import OptionalMatchModelElement from aminer.parsing import SequenceModelElement def getModel(): """This function defines how to parse a su session information message after any standard logging preamble, e.g. from syslog.""" typeChildren = [] typeChildren.append(SequenceModelElement('build-stack', [ FixedDataModelElement('s0', b'building new pluginstance stack: \''), DelimitedDataModelElement('stack', b'\''), FixedDataModelElement('s1', b'\'') ])) # Netflow entry typeChildren.append(SequenceModelElement('nfct-event', [ FixedDataModelElement('s0', b'[DESTROY] ORIG: SRC='), IpAddressDataModelElement('osrcip'), FixedDataModelElement('s1', b' DST='), IpAddressDataModelElement('odstip'), FixedDataModelElement('s2', b' PROTO='), FixedWordlistDataModelElement('proto', [b'TCP', b'UDP']), FixedDataModelElement('s3', b' SPT='), DecimalIntegerValueModelElement('ospt'), FixedDataModelElement('s4', b' DPT='), DecimalIntegerValueModelElement('odpt'), FixedDataModelElement('s5', b' PKTS='), DecimalIntegerValueModelElement('opkts'), FixedDataModelElement('s6', b' BYTES='), DecimalIntegerValueModelElement('obytes'), FixedDataModelElement('s7', b' , REPLY: SRC='), IpAddressDataModelElement('rsrcip'), FixedDataModelElement('s8', b' DST='), IpAddressDataModelElement('rdstip'), FixedDataModelElement('s9', b' PROTO='), FixedWordlistDataModelElement('rproto', [b'TCP', b'UDP']), FixedDataModelElement('s10', b' SPT='), DecimalIntegerValueModelElement('rspt'), FixedDataModelElement('s11', b' DPT='), DecimalIntegerValueModelElement('rdpt'), FixedDataModelElement('s12', b' PKTS='), DecimalIntegerValueModelElement('rpkts'), FixedDataModelElement('s13', b' BYTES='), DecimalIntegerValueModelElement('rbytes'), # No additional whitespace from Ubuntu Trusty 14.04 on. OptionalMatchModelElement('tail', FixedDataModelElement('s0', b' ')), ])) typeChildren.append(FixedDataModelElement('nfct-plugin', b'NFCT plugin working in event mode')) typeChildren.append(FixedDataModelElement('reopen', b'reopening capture file')) typeChildren.append(FixedDataModelElement('signal', b'signal received, calling pluginstances')) typeChildren.append(FixedDataModelElement('uidchange', b'Changing UID / GID')) model = SequenceModelElement('ulogd', [ FixedDataModelElement('sname', b'ulogd['), DecimalIntegerValueModelElement('pid'), FixedDataModelElement('s0', b']: '), FirstMatchModelElement('msg', typeChildren)]) return model deb-build/root/etc/aminer/conf-available/generic/SshdParsingModel.py0000644000000000000000000002142713351131306024455 0ustar rootroot"""This module provides support for parsing of sshd messages.""" from aminer.parsing import DecimalIntegerValueModelElement from aminer.parsing import DelimitedDataModelElement from aminer.parsing import FirstMatchModelElement from aminer.parsing import FixedDataModelElement from aminer.parsing import FixedWordlistDataModelElement from aminer.parsing import IpAddressDataModelElement from aminer.parsing import OptionalMatchModelElement from aminer.parsing import SequenceModelElement from aminer.parsing import VariableByteDataModelElement def getModel(userNameModel=None): """This function defines how to parse a sshd information message after any standard logging preamble, e.g. from syslog.""" if userNameModel is None: userNameModel = VariableByteDataModelElement('user', b'0123456789abcdefghijklmnopqrstuvwxyz.-') typeChildren = [] typeChildren.append(SequenceModelElement('accepted key', [ FixedDataModelElement('s0', b'Accepted publickey for '), userNameModel, FixedDataModelElement('s1', b' from '), IpAddressDataModelElement('clientip'), FixedDataModelElement('s2', b' port '), DecimalIntegerValueModelElement('port'), FixedDataModelElement('s3', b' ssh2: RSA '), VariableByteDataModelElement( 'fingerprint', b'0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/:') ])) typeChildren.append(SequenceModelElement('btmp-perm', [ FixedDataModelElement('s0', b'Excess permission or bad ownership on file /var/log/btmp') ])) typeChildren.append(SequenceModelElement('close-sess', [ FixedDataModelElement('s0', b'Close session: user '), userNameModel, FixedDataModelElement('s1', b' from '), IpAddressDataModelElement('clientip'), FixedDataModelElement('s2', b' port '), DecimalIntegerValueModelElement('port'), FixedDataModelElement('s3', b' id '), DecimalIntegerValueModelElement('userid') ])) typeChildren.append(SequenceModelElement('closing', [ FixedDataModelElement('s0', b'Closing connection to '), IpAddressDataModelElement('clientip'), FixedDataModelElement('s1', b' port '), DecimalIntegerValueModelElement('port')])) typeChildren.append(SequenceModelElement('closed', [ FixedDataModelElement('s0', b'Connection closed by '), IpAddressDataModelElement('clientip')])) typeChildren.append(SequenceModelElement('connect', [ FixedDataModelElement('s0', b'Connection from '), IpAddressDataModelElement('clientip'), FixedDataModelElement('s1', b' port '), DecimalIntegerValueModelElement('port'), FixedDataModelElement('s2', b' on '), IpAddressDataModelElement('serverip'), FixedDataModelElement('s3', b' port '), DecimalIntegerValueModelElement('sport') ])) typeChildren.append(SequenceModelElement('disconnectreq', [ FixedDataModelElement('s0', b'Received disconnect from '), IpAddressDataModelElement('clientip'), FixedDataModelElement('s1', b' port '), DecimalIntegerValueModelElement('port'), FixedDataModelElement('s2', b':'), DecimalIntegerValueModelElement('session'), FixedDataModelElement('s3', b': '), FixedWordlistDataModelElement('reason', [b'disconnected by user']) ])) typeChildren.append(SequenceModelElement('disconnected', [ FixedDataModelElement('s0', b'Disconnected from '), IpAddressDataModelElement('clientip'), FixedDataModelElement('s1', b' port '), DecimalIntegerValueModelElement('port') ])) typeChildren.append(SequenceModelElement('error-bind', [ FixedDataModelElement('s0', b'error: bind: Cannot assign requested address')])) typeChildren.append(SequenceModelElement('error-channel-setup', [ FixedDataModelElement('s0', b'error: channel_setup_fwd_listener: cannot listen to port: '), DecimalIntegerValueModelElement('port')])) typeChildren.append(SequenceModelElement('ident-missing', [ FixedDataModelElement('s0', b'Did not receive identification string from '), IpAddressDataModelElement('clientip') ])) typeChildren.append(SequenceModelElement('invalid-user', [ FixedDataModelElement('s0', b'Invalid user '), DelimitedDataModelElement('user', b' from '), FixedDataModelElement('s1', b' from '), IpAddressDataModelElement('clientip') ])) typeChildren.append(SequenceModelElement('invalid-user-auth-req', [ FixedDataModelElement('s0', b'input_userauth_request: invalid user '), DelimitedDataModelElement('user', b' [preauth]'), FixedDataModelElement('s1', b' [preauth]') ])) typeChildren.append(SequenceModelElement('postppk', [ FixedDataModelElement('s0', b'Postponed publickey for '), userNameModel, FixedDataModelElement('s1', b' from '), IpAddressDataModelElement('clientip'), FixedDataModelElement('s2', b' port '), DecimalIntegerValueModelElement('port'), FixedDataModelElement('s3', b' ssh2 [preauth]')])) typeChildren.append(SequenceModelElement('readerr', [ FixedDataModelElement('s0', b'Read error from remote host '), IpAddressDataModelElement('clientip'), FixedDataModelElement('s1', b': Connection timed out'), ])) typeChildren.append(SequenceModelElement('disconnect', [ FixedDataModelElement('s0', b'Received disconnect from '), IpAddressDataModelElement('clientip'), FixedDataModelElement('s1', b': 11: '), FirstMatchModelElement('reason', [ FixedDataModelElement('disconnected', b'disconnected by user'), SequenceModelElement('remotemsg', [ DelimitedDataModelElement('msg', b' [preauth]'), FixedDataModelElement('s0', b' [preauth]') ]), ]), ])) typeChildren.append(SequenceModelElement('signal', [ FixedDataModelElement('s0', b'Received signal '), DecimalIntegerValueModelElement('signal'), FixedDataModelElement('s1', b'; terminating.'), ])) typeChildren.append(SequenceModelElement('server', [ FixedDataModelElement('s0', b'Server listening on '), DelimitedDataModelElement('serverip', b' '), FixedDataModelElement('s1', b' port '), DecimalIntegerValueModelElement('port'), FixedDataModelElement('s2', b'.'), ])) typeChildren.append(SequenceModelElement('oom-adjust', [ FixedDataModelElement('s0', b'Set /proc/self/oom_score_adj '), OptionalMatchModelElement('from', FixedDataModelElement('default', b'from 0 ')), FixedDataModelElement('s1', b'to '), DecimalIntegerValueModelElement( 'newval', valueSignType=DecimalIntegerValueModelElement.SIGN_TYPE_OPTIONAL) ])) typeChildren.append(SequenceModelElement('session-start', [ FixedDataModelElement('s0', b'Starting session: '), FirstMatchModelElement('sess-info', [ SequenceModelElement('shell', [ FixedDataModelElement('s0', b'shell on '), DelimitedDataModelElement('terminal', b' '), ]), SequenceModelElement('subsystem', [ FixedDataModelElement('s0', b'subsystem \'sftp\''), ]), SequenceModelElement('forced-command', [ FixedDataModelElement('s0', b'forced-command (key-option) \''), DelimitedDataModelElement('command', b'\' for '), FixedDataModelElement('s1', b'\''), ]) ]), FixedDataModelElement('s1', b' for '), userNameModel, FixedDataModelElement('s2', b' from '), IpAddressDataModelElement('clientip'), FixedDataModelElement('s3', b' port '), DecimalIntegerValueModelElement('port'), OptionalMatchModelElement('idinfo', SequenceModelElement('idinfo', [ FixedDataModelElement('s0', b' id '), DecimalIntegerValueModelElement('id') ])) ])) typeChildren.append(SequenceModelElement('transferred', [ FixedDataModelElement('s0', b'Transferred: sent '), DecimalIntegerValueModelElement('sent'), FixedDataModelElement('s1', b', received '), DecimalIntegerValueModelElement('received'), FixedDataModelElement('s1', b' bytes')])) typeChildren.append(SequenceModelElement('pam', [ FixedDataModelElement('s0', b'pam_unix(sshd:session): session '), FixedWordlistDataModelElement('change', [b'opened', b'closed']), FixedDataModelElement('s1', b' for user '), userNameModel, OptionalMatchModelElement('openby', FixedDataModelElement('default', b' by (uid=0)')), ])) typeChildren.append(SequenceModelElement('child', [ FixedDataModelElement('s0', b'User child is on pid '), DecimalIntegerValueModelElement('pid')])) model = SequenceModelElement('sshd', [ FixedDataModelElement('sname', b'sshd['), DecimalIntegerValueModelElement('pid'), FixedDataModelElement('s0', b']: '), FirstMatchModelElement('msg', typeChildren)]) return model deb-build/root/etc/aminer/conf-available/generic/RsyslogParsingModel.py0000644000000000000000000000301513351131106025205 0ustar rootroot"""This module defines a parser for rsyslog""" from aminer.parsing import DecimalIntegerValueModelElement from aminer.parsing import DelimitedDataModelElement from aminer.parsing import FirstMatchModelElement from aminer.parsing import FixedDataModelElement from aminer.parsing import SequenceModelElement def getModel(userNameModel=None): """This function defines how to parse a su session information message after any standard logging preamble, e.g. from syslog.""" typeChildren = [] typeChildren.append(SequenceModelElement('gidchange', [ FixedDataModelElement('s0', b'rsyslogd\'s groupid changed to '), DecimalIntegerValueModelElement('gid') ])) typeChildren.append(SequenceModelElement('statechange', [ FixedDataModelElement('s0', b'[origin software="rsyslogd" swVersion="'), DelimitedDataModelElement('version', b'"'), FixedDataModelElement('s1', b'" x-pid="'), DecimalIntegerValueModelElement('pid'), FixedDataModelElement('s2', b'" x-info="http://www.rsyslog.com"] '), FirstMatchModelElement('type', [ FixedDataModelElement('HUPed', b'rsyslogd was HUPed'), FixedDataModelElement('start', b'start') ]) ])) typeChildren.append(SequenceModelElement('uidchange', [ FixedDataModelElement('s0', b'rsyslogd\'s userid changed to '), DecimalIntegerValueModelElement('uid') ])) model = SequenceModelElement('rsyslog', [ FixedDataModelElement('sname', b'rsyslogd: '), FirstMatchModelElement('msg', typeChildren)]) return model deb-build/root/etc/aminer/conf-available/generic/KernelMsgParsingModel.py0000644000000000000000000000267013351130753025447 0ustar rootroot"""This module defines a parser for kernelmsg.""" from aminer.parsing import AnyByteDataModelElement from aminer.parsing import DelimitedDataModelElement from aminer.parsing import FirstMatchModelElement from aminer.parsing import FixedDataModelElement from aminer.parsing import FixedWordlistDataModelElement from aminer.parsing import IpAddressDataModelElement from aminer.parsing import SequenceModelElement def getModel(): """This function defines how to parse messages from kernel logging.""" typeChildren = [] typeChildren.append(SequenceModelElement('ipv4-martian', [ FixedDataModelElement('s0', b'IPv4: martian '), FixedWordlistDataModelElement('direction', [b'source', b'destination']), FixedDataModelElement('s1', b' '), IpAddressDataModelElement('destination'), FixedDataModelElement('s2', b' from '), IpAddressDataModelElement('source'), FixedDataModelElement('s3', b', on dev '), AnyByteDataModelElement('interface') ])) typeChildren.append(SequenceModelElement('net-llheader', [ FixedDataModelElement('s0', b'll header: '), AnyByteDataModelElement('data')])) typeChildren.append(AnyByteDataModelElement('unparsed')) model = SequenceModelElement('kernel', [ FixedDataModelElement('sname', b'kernel: ['), DelimitedDataModelElement('timestamp', b']'), FixedDataModelElement('s0', b'] '), FirstMatchModelElement('msg', typeChildren)]) return model deb-build/root/etc/aminer/conf-available/generic/AMinerParsingModel.py0000644000000000000000000000257713352404607024744 0ustar rootroot"""This module defines a parser for the aminer.""" from aminer.parsing import AnyByteDataModelElement from aminer.parsing import DecimalIntegerValueModelElement from aminer.parsing import FirstMatchModelElement from aminer.parsing import FixedDataModelElement from aminer.parsing import SequenceModelElement def getModel(): """This method returns the model.""" typeChildren = [] typeChildren.append(FixedDataModelElement('warn-no-openat', \ b'WARNING: SECURITY: No secure open yet due to missing openat in python!')) typeChildren.append(FixedDataModelElement('warn-no-OPATH', \ b'WARNING: SECURITY: Open should use O_PATH, but not yet available in python')) typeChildren.append(FixedDataModelElement('warn-POSIX-acls', \ b'WARNING: SECURITY: No checking for backdoor access via \ POSIX ACLs, use "getfacl" from "acl" package to check manually.')) typeChildren.append(FixedDataModelElement('warn-no-linkat', \ b'WARNING: SECURITY: unsafe unlink (unavailable unlinkat/linkat \ should be used, but not available in python)')) typeChildren.append(AnyByteDataModelElement('unparsed')) model = SequenceModelElement('aminer', [ FixedDataModelElement('sname', b'AMiner['), DecimalIntegerValueModelElement('pid'), FixedDataModelElement('s0', b']: '), FirstMatchModelElement('msg', typeChildren)]) return model deb-build/root/etc/aminer/conf-available/generic/TomcatParsingModel.py0000644000000000000000000000170113351133341024775 0ustar rootroot""" This module defines a parser for tomcat""" from aminer.parsing import AnyByteDataModelElement from aminer.parsing import DecimalIntegerValueModelElement from aminer.parsing import FirstMatchModelElement from aminer.parsing import FixedDataModelElement from aminer.parsing import SequenceModelElement def getModel(): """This method returns the model.""" typeChildren = [] typeChildren.append(FixedDataModelElement('start', b' * Starting Tomcat servlet engine tomcat7')) typeChildren.append(FixedDataModelElement('stop', b' * Stopping Tomcat servlet engine tomcat7')) typeChildren.append(FixedDataModelElement('done', b' ...done.')) typeChildren.append(AnyByteDataModelElement('unparsed')) model = SequenceModelElement('tomcat7', [ FixedDataModelElement('sname', b'tomcat7['), DecimalIntegerValueModelElement('pid'), FixedDataModelElement('s0', b']: '), FirstMatchModelElement('msg', typeChildren)]) return model deb-build/root/etc/aminer/conf-available/generic/SyslogPreambleModel.py0000644000000000000000000000213313352423015025153 0ustar rootroot"""This module defines a parser for syslog.""" from aminer.parsing import DateTimeModelElement from aminer.parsing import FixedDataModelElement from aminer.parsing import SequenceModelElement from aminer.parsing import VariableByteDataModelElement def getModel(timeModel=None): """This function defines the model for parsing a standard syslog preamble including timestamp and hostname. @param timeModel when not none, the given model element is used for parsing timestamps. Otherwise a standard DateTimeModelElement with format b'%b %d %H:%M:%S' is created. CAVEAT: the standard model may not work when log data timestamp locale does not match host or shell environment locale. See MultiLocaleDateTimeModelElement instead. """ if timeModel is None: timeModel = DateTimeModelElement('time', b'%b %d %H:%M:%S') hostNameModel = VariableByteDataModelElement('host', b'-.01234567890abcdefghijklmnopqrstuvwxyz') model = SequenceModelElement('syslog', [ timeModel, FixedDataModelElement('sp0', b' '), hostNameModel, FixedDataModelElement('sp1', b' ')]) return model deb-build/root/etc/aminer/conf-available/generic/SystemdParsingModel.py0000644000000000000000000000733313351133311025202 0ustar rootroot"""This module contains functions and classes to create the parsing model.""" from aminer.parsing import DecimalFloatValueModelElement from aminer.parsing import DecimalIntegerValueModelElement from aminer.parsing import DelimitedDataModelElement from aminer.parsing import FirstMatchModelElement from aminer.parsing import FixedDataModelElement from aminer.parsing import OptionalMatchModelElement from aminer.parsing import SequenceModelElement from aminer.parsing import VariableByteDataModelElement def getSystemdModel(): """This function defines the parsing model for messages directly from systemd.""" typeChildren = [] typeChildren.append(FixedDataModelElement( 'apt-daily-start', b'Starting Daily apt activities...')) typeChildren.append(FixedDataModelElement( 'apt-daily-started', b'Started Daily apt activities.')) typeChildren.append(SequenceModelElement('apt-daily-timer', [ FixedDataModelElement('s0', b'apt-daily.timer: Adding '), OptionalMatchModelElement('hopt', SequenceModelElement('hblock', [ DecimalIntegerValueModelElement('hours'), FixedDataModelElement('s1', b'h '), ])), DecimalIntegerValueModelElement('minutes'), FixedDataModelElement('s2', b'min '), DecimalFloatValueModelElement('seconds'), FixedDataModelElement('s3', b's random time.')])) typeChildren.append(FixedDataModelElement( 'tmp-file-cleanup', b'Starting Cleanup of Temporary Directories...')) typeChildren.append(FixedDataModelElement( 'tmp-file-cleanup-started', b'Started Cleanup of Temporary Directories.')) model = SequenceModelElement('systemd', [ FixedDataModelElement('sname', b'systemd['), DecimalIntegerValueModelElement('pid'), FixedDataModelElement('s0', b']: '), FirstMatchModelElement('msg', typeChildren)]) return model def getLogindModel(userNameModel=None): """This function defines how to parse a systemd logind daemon message after any standard logging preamble, e.g. from syslog.""" if userNameModel is None: userNameModel = VariableByteDataModelElement( 'user', b'0123456789abcdefghijklmnopqrstuvwxyz-') typeChildren = [] # FIXME: Will fail on username models including the dot at the end. typeChildren.append(SequenceModelElement('new session', [ FixedDataModelElement('s0', b'New session '), DecimalIntegerValueModelElement('session'), FixedDataModelElement('s1', b' of user '), userNameModel, FixedDataModelElement('s2', b'.')])) typeChildren.append(SequenceModelElement('removed session', [ FixedDataModelElement('s0', b'Removed session '), DecimalIntegerValueModelElement('session'), FixedDataModelElement('s1', b'.')])) model = SequenceModelElement('systemd-logind', [ FixedDataModelElement('sname', b'systemd-logind['), DecimalIntegerValueModelElement('pid'), FixedDataModelElement('s0', b']: '), FirstMatchModelElement('msg', typeChildren)]) return model def getTmpfilesModel(): """This function defines how to parse a systemd tmpfiles daemon message after any standard logging preamble, e.g. from syslog.""" typeChildren = [] # FIXME: Will fail on username models including the dot at the end. typeChildren.append(SequenceModelElement('duplicate', [ FixedDataModelElement('s0', b'[/usr/lib/tmpfiles.d/var.conf:14] Duplicate line for path "'), DelimitedDataModelElement('path', b'", ignoring.'), FixedDataModelElement('s2', b'", ignoring.')])) model = SequenceModelElement('systemd-tmpfiles', [ FixedDataModelElement('sname', b'systemd-tmpfiles['), DecimalIntegerValueModelElement('pid'), FixedDataModelElement('s0', b']: '), FirstMatchModelElement('msg', typeChildren)]) return model deb-build/root/etc/aminer/conf-available/generic/CronParsingModel.py0000644000000000000000000000414313351130542024452 0ustar rootroot"""This module defines a parser for cron.""" from aminer.parsing import AnyByteDataModelElement from aminer.parsing import DecimalIntegerValueModelElement from aminer.parsing import DelimitedDataModelElement from aminer.parsing import FirstMatchModelElement from aminer.parsing import FixedDataModelElement from aminer.parsing import FixedWordlistDataModelElement from aminer.parsing import OptionalMatchModelElement from aminer.parsing import SequenceModelElement from aminer.parsing import VariableByteDataModelElement def getModel(userNameModel=None): """This function defines how to parse a cron message logged via syslog after any standard logging preamble, e.g. from syslog.""" if userNameModel is None: userNameModel = VariableByteDataModelElement('user', b'0123456789abcdefghijklmnopqrstuvwxyz.-') typeChildren = [] typeChildren.append(SequenceModelElement('exec', [ FixedDataModelElement('s0', b'('), userNameModel, FixedDataModelElement('s1', b') CMD '), AnyByteDataModelElement('command') ])) typeChildren.append(SequenceModelElement('pam', [ FixedDataModelElement('s0', b'pam_unix(cron:session): session '), FixedWordlistDataModelElement('change', [b'opened', b'closed']), FixedDataModelElement('s1', b' for user '), userNameModel, OptionalMatchModelElement('openby', FixedDataModelElement('default', b' by (uid=0)')), ])) model = FirstMatchModelElement('cron', [ SequenceModelElement('std', [ FixedDataModelElement('sname', b'CRON['), DecimalIntegerValueModelElement('pid'), FixedDataModelElement('s0', b']: '), FirstMatchModelElement('msgtype', typeChildren) ]), SequenceModelElement('low', [ FixedDataModelElement('sname', b'cron['), DecimalIntegerValueModelElement('pid'), FixedDataModelElement('s0', b']: (*system*'), DelimitedDataModelElement('rname', b') RELOAD ('), FixedDataModelElement('s1', b') RELOAD ('), DelimitedDataModelElement('fname', b')'), FixedDataModelElement('s2', b')'), ]), ]) return model deb-build/root/etc/aminer/conf-available/generic/SuSessionParsingModel.py0000644000000000000000000000416013351132263025505 0ustar rootroot"""This module defines a parser for susession.""" from aminer.parsing import DecimalIntegerValueModelElement from aminer.parsing import DelimitedDataModelElement from aminer.parsing import FirstMatchModelElement from aminer.parsing import FixedDataModelElement from aminer.parsing import FixedWordlistDataModelElement from aminer.parsing import OptionalMatchModelElement from aminer.parsing import SequenceModelElement from aminer.parsing import VariableByteDataModelElement def getModel(userNameModel=None): """This function defines how to parse a su session information message after any standard logging preamble, e.g. from syslog.""" if userNameModel is None: userNameModel = VariableByteDataModelElement('user', b'0123456789abcdefghijklmnopqrstuvwxyz.-') srcUserNameModel = VariableByteDataModelElement('srcuser', \ b'0123456789abcdefghijklmnopqrstuvwxyz.-') typeChildren = [] typeChildren.append(SequenceModelElement('su-good', [ FixedDataModelElement('s0', b'Successful su for '), userNameModel, FixedDataModelElement('s1', b' by '), srcUserNameModel])) typeChildren.append(SequenceModelElement('su-good', [ FixedDataModelElement('s0', b'+ '), DelimitedDataModelElement('terminal', b' '), FixedDataModelElement('s1', b' '), srcUserNameModel, FixedDataModelElement('s2', b':'), userNameModel])) typeChildren.append(SequenceModelElement('pam', [ FixedDataModelElement('s0', b'pam_unix(su:session): session '), FixedWordlistDataModelElement('change', [b'opened', b'closed']), FixedDataModelElement('s1', b' for user '), userNameModel, OptionalMatchModelElement('openby', \ SequenceModelElement('userinfo', [ FixedDataModelElement('s0', b' by (uid='), DecimalIntegerValueModelElement('uid'), FixedDataModelElement('s1', b')')])) ])) model = SequenceModelElement('su', [ FixedDataModelElement('sname', b'su['), DecimalIntegerValueModelElement('pid'), FixedDataModelElement('s0', b']: '), FirstMatchModelElement('msg', typeChildren)]) return model deb-build/root/etc/aminer/conf-enabled/0000755000000000000000000000000013352673345016744 5ustar rootrootdeb-build/root/etc/aminer/conf-enabled/Readme.txt0000644000000000000000000000071213326562314020674 0ustar rootrootThis directory contains files enabled to be included in the analysis pipeline configuration. The files are made available by including this directory within the site packages. If you have objections enabling all the python site packages stored on this host within a process running with elevated privileges, you can also include only some site package components by placing symlinks here, e.g. ln -s /usr/lib/python2.7/dist-packages/pytz conf-enabled/pytz deb-build/root/etc/init/0000755000000000000000000000000013352673345014117 5ustar rootrootdeb-build/root/etc/init/aminer.conf0000644000000000000000000000103713326562314016234 0ustar rootroot# Simple upstart script to run default aminer instance as daemon. # To configure edit "/etc/aminer/config.py". description "aminer startup script" # Enable AMiner autostart only when you have tested your configuration # and know, what you are doing. See /usr/share/doc/aminer/Readme.txt # section "Running as a Service". # Start immediately when filesystem is available. # DISABLED: start on filesystem stop on runlevel [06] # AMiner will fork twice when in background mode. expect daemon respawn exec /usr/lib/logdata-anomaly-miner/AMiner deb-build/debian/0000755000000000000000000000000013361140013012615 5ustar rootrootdeb-build/debian/AMiner.1.xml0000644000000000000000000001677513354671510014706 0ustar rootroot .
will be generated. You may view the manual page with: nroff -man .
| less'. A typical entry in a Makefile or Makefile.am is: DB2MAN = /usr/share/sgml/docbook/stylesheet/xsl/docbook-xsl/manpages/docbook.xsl XP = xsltproc -''-nonet -''-param man.charmap.use.subset "0" manpage.1: manpage.xml $(XP) $(DB2MAN) $< The xsltproc binary is found in the xsltproc package. The XSL files are in docbook-xsl. A description of the parameters you can use can be found in the docbook-xsl-doc-* packages. Please remember that if you create the nroff version in one of the debian/rules file targets (such as build), you will need to include xsltproc and docbook-xsl in your Build-Depends control field. Alternatively use the xmlto command/package. That will also automatically pull in xsltproc and docbook-xsl. Notes for using docbook2x: docbook2x-man does not automatically create the AUTHOR(S) and COPYRIGHT sections. In this case, please add them manually as ... . To disable the automatic creation of the AUTHOR(S) and COPYRIGHT sections read /usr/share/doc/docbook-xsl/doc/manpages/authors.html. This file can be found in the docbook-xsl-doc-html package. Validation can be done using: `xmllint -''-noout -''-valid manpage.xml` General documentation about man-pages and man-page-formatting: man(1), man(7), http://www.tldp.org/HOWTO/Man-Page/ --> ]> &dhtitle; &dhpackage; &dhfirstname; &dhsurname; Wrote this manpage for the Debian system.
&dhemail;
2016 &dhusername; This manual page was written for the Debian system (and may be used by others). Permission is granted to copy, distribute and/or modify this document under the terms of the GNU General Public License, Version 3. On Debian systems, the complete text of the GNU General Public License can be found in /usr/share/common-licenses/GPL.
AMINER &dhsection; AMiner lightweight tool for log checking, log analysis AMiner DESCRIPTION This manual page documents briefly the AMiner command. For more details see packaged documentation at /usr/share/doc/logdata-anomaly-miner. OPTIONS with long options starting with two dashes (`-'). A summary of options is included below. For a complete description, see the info 1 files. Specify the configuration file, otherwise /etc/aminer/config.py is used. See /etc/aminer/config.py.template for configuration file template, /usr/share/doc/logdata-anomaly-miner/demo for examples. With this parameter, AMiner will not detach from the terminal and daemonize. When not in foreground mode, AMiner will also change the working directory to /, hence relative path in configuration file will not work. INTERNAL PARAMETER - DO NOT USE. It is just documented here for completeness. FILES /etc/aminer/config.py The main configuration file for the AMiner daemon. See /etc/aminer/config.py.template for configuration file template, /usr/share/doc/logdata-anomaly-miner/demo for examples. BUGS Report bugs via your distribution's bug tracking system. For bugs in the the software trunk, report via at . At startup, AMiner will quite likely print out some security warnings to increase transparency. They are here just to remind you of the limitations the current implementation. They should be the same as for nearly all other programs on your platform, just that others do not tell you. See the source code documentation for a short explanation, why a given part of the implementation is not that secure as it could be when leveraging the security features a platform could provide you. SEE ALSO AMinerRemoteControl1
deb-build/debian/preinst0000755000000000000000000000332013326562314014242 0ustar rootroot#!/bin/sh # preinst script for logdata-anomaly-miner # # see: dh_installdeb(1) set -e # summary of how this script can be called: # * `install' # * `install' # * `upgrade' # * `abort-upgrade' # for details, see https://www.debian.org/doc/debian-policy/ or # the debian-policy package case "$1" in install) # Create the user to run the analysis service. analysisGroup="aminer" if [ "$(getent group "${analysisGroup}")" = "" ]; then # Add a separate group for aitmon. # The group does not need to be a system group, but low gid is # preferable to avoid mixing with user groups. Using '--system' # flag would cause gid allocation to go down from UID_MIN, not # up from SYS_GID_MIN, so avoid using --system. groupadd -K GID_MIN=100 -K GID_MAX=1000 "${analysisGroup}" fi analysisUser="aminer" if [ "$(getent passwd "${analysisUser}")" = "" ]; then # Add a system user, set home directory to nonexisting directory # to avoid loading of user-defined files. Create user without # using '--system' flag, thus allocating UIDs upwards. useradd -M --shell /usr/sbin/nologin --gid "${analysisGroup}" -K PASS_MAX_DAYS=-1 -K UID_MIN=100 -K UID_MAX=999 --home /nonexistent "${analysisUser}" # There is no way to make useradd ommit assignment of subuids, # so remove them immediately on affected systems. if test -e /etc/subuid; then usermod --del-subuids 1-4294967295 --del-subgids 1-4294967295 "${analysisUser}" fi fi ;; esac # dh_installdeb will replace this with shell code automatically # generated by other debhelper scripts. #DEBHELPER# exit 0 deb-build/debian/NEWS0000644000000000000000000001744113361114573013337 0ustar rootrootlogdata-anomaly-miner (1.0.0-1) unstable; urgency=low * Ported code to Python 3 * Code cleanup using pylint * Added util/JsonUtil.py to encode byte strings for storing them as json objects * Added docs/development-procedures.txt which documents development procedures * New MissingMatchPathListValueDetector to detect stream interuption * Added parsing support for kernel IP layer martian package messages * Systemd parsing of apt invocation messages. Bugfixes: * AnalysisChild: handle remote control client connection errors correctly * Various bugfixes -- Markus Wurzenberger Tue, 2 Oct 2018 17:00:00 +0000 logdata-anomaly-miner (0.0.8-1) unstable; urgency=low Apart from bugfixes, new parsing and analysis components were added: * Base64StringModelElement * DecimalFloatValueModelElement * StringRegexMatchRule * EnhancedNewMatchPathValueComboDetector -- Roman Fiedler Tue, 30 May 2017 17:00:00 +0000 logdata-anomaly-miner (0.0.7-1) unstable; urgency=low The datetime parsing DateTimeModelElement was reimplemented to fix various shortcomings of strptime in Python and libc. This will require changes in configuration due to API changes, e.g.: -timeModel=DateTimeModelElement('time', '%b %d %H:%M:%S', 15, False) +timeModel=DateTimeModelElement('time', '%b %d %H:%M:%S') See /usr/lib/logdata-anomaly-miner/aminer/parsing/DateTimeModelElement.py source code documentation for currently supported datetime format options. The code for reading log input was improved to allow also input from UNIX sockets. Thus the configuration was changed to support those modes: -configProperties['LogFileList']=['/var/log/auth.log', ... +configProperties['LogResourceList'] = ['file:///var/log/auth.log', ... -- Roman Fiedler Mon, 9 Jan 2017 18:00:00 +0000 logdata-anomaly-miner (0.0.6-1) unstable; urgency=low The input IO-handling was redesigned, thus introducing following API changes. The changes are flaged with (D)eveloper and (U)ser to indicate if only developers of own AMiner addons are affected or also users may need to migrate their configuration. * Upper layers receive LogAtom objects instead of log lines, parsing data as separate parameters. Thus also separate paths for forwarding of parsed and unparsed atoms are not required any more. See below for details (D, U): * Update any own UnparsedAtomHandler/ParsedAtomHandlerInterface implementations to use new interface "input.AtomHandlerInterface" and access to additional information to new methods and fields (D): -from aminer.parsing import ParsedAtomHandlerInterface +from aminer.input import AtomHandlerInterface -class YourHandler(ParsedAtomHandlerInterface, ... +class YourHandler(AtomHandlerInterface, - def receiveParsedAtom(self, atomData, parserMatch): + def receiveAtom(self, logAtom): - timestamp=parserMatch.getDefaultTimestamp() + timestamp=logAtom.getTimestamp() + parserMatch=logAtom.parserMatch - print '%s' % atomData + print '%s' % logAtom.rawData * With parsed/unparsed atom processing path convergence, naming of other classes does not make sense any more (U): -from aminer.analysis import VolatileLogarithmicBackoffParsedAtomHistory +from aminer.util import VolatileLogarithmicBackoffAtomHistory - from aminer.analysis import ParsedAtomFilters + from aminer.analysis import AtomFilters - matchAction=Rules.ParsedAtomFilterMatchAction(... + matchAction=Rules.AtomFilterMatchAction(... - parsedAtomHandlers=[] - unparsedAtomHandlers=[] - analysisContext.atomizerFactory=SimpleByteStreamLineAtomizerFactory( - parsingModel, parsedAtomHandlers, unparsedAtomHandlers, ... + atomFilter=AtomFilters.SubhandlerFilter(None) + analysisContext.atomizerFactory=SimpleByteStreamLineAtomizerFactory( + parsingModel, [atomFilter], ... For handling of unparsed atoms: - unparsedAtomHandlers.append(SimpleUnparsedAtomHandler(anomalyEventHandlers)) + atomFilter.addHandler(SimpleUnparsedAtomHandler(anomalyEventHandlers), + stopWhenHandledFlag=True) For handling of parsed atoms: - parsedAtomHandlers.append(... + atomFilter.addHandler(... -- Roman Fiedler Fri, 4 Nov 2016 18:00:00 +0000 logdata-anomaly-miner (0.0.5-1) unstable; urgency=low Following API changes were introduced: * Lower input layers dealing with binary data stream reading, splitting into atoms and forwarding data to the parsing model were redesigned. Following configuration changes are required to adapt "config.py" and probably "analysis.py" to the new API: * analysisContext.registerComponent(): registerAsRawAtomHandler parameter not needed any more, can be removed. * SimpleParsingModelRawAtomHandler is not needed any more, that part can be replaced by configuration: # Now define the AtomizerFactory using the model. A simple line # based one is usually sufficient. from aminer.input import SimpleByteStreamLineAtomizerFactory analysisContext.atomizerFactory=SimpleByteStreamLineAtomizerFactory( parsingModel, parsedAtomHandlers, unparsedAtomHandlers, anomalyEventHandlers, defaultTimestampPath='/model/syslog/time') * SimpleUnparsedAtomHandler was moved from "aminer.events" to "aminer.input". -- Roman Fiedler Mon, 11 Oct 2016 18:00:00 +0000 logdata-anomaly-miner (0.0.4-1) unstable; urgency=low Following API changes were introduced: * Event handling (general): Change of EventHandlerInterface to include also eventSource as last parameter. See /usr/lib/logdata-anomaly-miner/aminer/events/__init__.py * VolatileLogarithmicBackoffEventHistory: Added event ID and source to stored tuple to allow unique identification of events. Split result of "getHistory()" to include "eventId, eventType, eventMessage, sortedLogLines, eventData, eventSource". -- Roman Fiedler Fri, 26 Aug 2016 15:15:00 +0000 logdata-anomaly-miner (0.0.3-1) unstable; urgency=low Following API changes were introduced: * To improve readability of configuration files, main parser, analysis and event classes were added to the submodule namespaces. After imports directly from the submodule, e.g. "from aminer.parsing import FixedDataModelElement", the name duplication "FixedDataModelElement.FixedDataModelElement" is not required any more, "FixedDataModelElement" is sufficient. Use "sed -i -e 's/Name.Name/Name/g' [files]" to adapt. * Component timing was restructured to allow forensic/realtime triggering. Therefore also clean interface was added, which is now also used to reduce redundant code in component registration. Old way: analysisContext.registerComponent(newMatchPathDetector, componentName=None, registerAsRawAtomHandler=False, registerAsTimeTriggeredHandler=True) New way: analysisContext.registerComponent(newMatchPathDetector, registerAsRawAtomHandler=False) For own custom time-triggered components, make sure to implement the "aminer.util.TimeTriggeredComponentInterface". Use any standard component, e.g. "/usr/lib/logdata-anomaly-miner/aminer/analysis/NewMatchPathDetector.py" as example. * Introduction of "AnalysisContext" to have common handle for all data required to perform the analysis. Therefore also the signature of "buildAnalysisPipeline" in "config.py/analysis.py" has changed from def buildAnalysisPipeline(aminerConfig): to def buildAnalysisPipeline(analysisContext): Old references to "aminerConfig" within the configuration script have to be replaced by "analysisContext.aminerConfig". -- Roman Fiedler Thu, 21 Jul 2016 19:00:00 +0000 deb-build/debian/logdata-anomaly-miner.manpages0000644000000000000000000000005513326562314020530 0ustar rootrootdebian/AMiner.1 debian/AMinerRemoteControl.1 deb-build/debian/changelog0000644000000000000000000000326113361126416014504 0ustar rootrootlogdata-anomaly-miner (1.0.0-1) unstable; urgency=low New upstream release V1.0.0, see https://launchpad.net/logdata-anomaly-miner/+milestone/v1.0.0 -- Markus Wurzenberger Tue, 2 Oct 2018 17:00:00 +0000 logdata-anomaly-miner (0.0.8-1) unstable; urgency=low New upstream release V0.0.8, see https://launchpad.net/logdata-anomaly-miner/+milestone/v0.0.8 -- Roman Fiedler Tue, 30 May 2017 17:00:00 +0000 logdata-anomaly-miner (0.0.7-1) unstable; urgency=low New upstream release V0.0.7, see https://launchpad.net/logdata-anomaly-miner/+milestone/v0.0.7 -- Roman Fiedler Mon, 9 Jan 2017 18:00:00 +0000 logdata-anomaly-miner (0.0.6-1) unstable; urgency=low New upstream release V0.0.6, see https://launchpad.net/logdata-anomaly-miner/+milestone/v0.0.6 -- Roman Fiedler Fri, 4 Nov 2016 18:00:00 +0000 logdata-anomaly-miner (0.0.5-1) unstable; urgency=low * New upstream release (Closes: #840447). -- Roman Fiedler Tue, 11 Oct 2016 18:00:00 +0000 logdata-anomaly-miner (0.0.3-2) unstable; urgency=low * Packaging fix: unowned directory after purge (Closes: #832347). -- Roman Fiedler Tue, 2 Aug 2016 15:15:00 +0000 logdata-anomaly-miner (0.0.3-1) unstable; urgency=low * New upstream release (Closes: #832058). -- Roman Fiedler Thu, 21 Jul 2016 19:00:00 +0000 logdata-anomaly-miner (0.0.2-1) unstable; urgency=low * Initial inclusion of logdata-anomaly-miner to Debian (Closes: #813096) -- Roman Fiedler Thu, 9 Jun 2016 12:00:00 +0000 deb-build/debian/AMinerRemoteControl.1.xml0000644000000000000000000002363213354671562017420 0ustar rootroot .
will be generated. You may view the manual page with: nroff -man .
| less'. A typical entry in a Makefile or Makefile.am is: DB2MAN = /usr/share/sgml/docbook/stylesheet/xsl/docbook-xsl/manpages/docbook.xsl XP = xsltproc -''-nonet -''-param man.charmap.use.subset "0" manpage.1: manpage.xml $(XP) $(DB2MAN) $< The xsltproc binary is found in the xsltproc package. The XSL files are in docbook-xsl. A description of the parameters you can use can be found in the docbook-xsl-doc-* packages. Please remember that if you create the nroff version in one of the debian/rules file targets (such as build), you will need to include xsltproc and docbook-xsl in your Build-Depends control field. Alternatively use the xmlto command/package. That will also automatically pull in xsltproc and docbook-xsl. Notes for using docbook2x: docbook2x-man does not automatically create the AUTHOR(S) and COPYRIGHT sections. In this case, please add them manually as ... . To disable the automatic creation of the AUTHOR(S) and COPYRIGHT sections read /usr/share/doc/docbook-xsl/doc/manpages/authors.html. This file can be found in the docbook-xsl-doc-html package. Validation can be done using: `xmllint -''-noout -''-valid manpage.xml` General documentation about man-pages and man-page-formatting: man(1), man(7), http://www.tldp.org/HOWTO/Man-Page/ --> ]> &dhtitle; &dhpackage; &dhfirstname; &dhsurname; Wrote this manpage for the Debian system.
&dhemail;
2016 &dhusername; This manual page was written for the Debian system (and may be used by others). Permission is granted to copy, distribute and/or modify this document under the terms of the GNU General Public License, Version 3. On Debian systems, the complete text of the GNU General Public License can be found in /usr/share/common-licenses/GPL.
AMINERREMOTECONTROL &dhsection; AMinerRemoteControl lightweight tool for log checking, log analysis AMinerRemoteControl DESCRIPTION This manual page documents briefly the AMinerRemoteControl command. The command executes arbitrary remote control commands in a running AMiner child process. As child process is usually running with lowered privileges or SELinux/AppArmor confinement, you may observe unexpected results when accessing resources outside the child process, e.g. files. For more details see also packaged documentation at /usr/share/doc/logdata-anomaly-miner. Example usecases: Just a test: /usr/bin/AMinerRemoteControl --Data '["Some text", 123]' --Exec 'remoteControlResponse="It works! Data %s" % repr(remoteControlData)' Query remote module configuration: /usr/bin/AMinerRemoteControl --Exec 'remoteControlResponse=analysisContext.getRegisteredComponentIds()' OPTIONS with long options starting with two dashes (`-'). A summary of options is included below. For a complete description, see the info 1 files. Specify the Unix domain remote control socket path, otherwise /var/run/aminer-remote.socket is used. The socket is opened by AMiner when 'RemoteControlSocket' feature is enabled in configuration. As the socket is of SOCK_STREAM type, it may also be forwarded via any other stream forwarders, e.g. socat (see UNIX-CONNECT and UNIX-LISTEN) and SSH (see LocalForward, DynamicForward). Access control is only done by file system permissions (DAC) of the socket, so make sure not to widen the access on error. For each --Exec option, the next argument is sent in a separate remote execution request using additional execution data (see --Data). The code is executed in a separate separate execution namespace with only some variables added to the local namespace, e.g. execution data is available as 'remoteControlData'. When setting the local variable 'remoteControlResponse' within the executed code, the object is serialized using json and sent back in the response. For each --ExecFile option, the named file is loaded and content submitted in the very same way as if --Exec parameter with content as string would have been used. This parameter defines a json string defining Python objects, that will be sent with all subsequent --Exec operations until changed again using another --Data option. Take into account, that there are size limits for the request, very large data objects may exceed those limits. The execution context will expose the data as variable 'remoteControlData'. When set, AMinerRemoteControl will not pass the result to repr. The returned object is just converted to a plain string via str(object) and the result is printed to avoid escaping of quotation marks, newlines, .... WARNING: This might be insecure: without escaping the printed data may contain terminal control sequences to exploit vulnerabilities or misconfiguration of your terminal to execute code with privileges of terminal or the process calling AMinerRemoteControl (usually root). FILES /var/run/aminer-remote.socket This is the default remote control socket used when not changed using the --ControlSocket option. BUGS Report bugs via your distribution's bug tracking system. For bugs in the the software trunk, report via at . SEE ALSO AMiner1
deb-build/debian/upstream/0000755000000000000000000000000013361127351014467 5ustar rootrootdeb-build/debian/upstream/signing-key.asc0000644000000000000000000000612313361127357017413 0ustar rootroot-----BEGIN PGP PUBLIC KEY BLOCK----- mQINBFu3GpMBEADFuwv0p+h49MR4l/TnZK85WrCxs8PRtMpAHJGQ9aqVrHUzvTxc enXcd1ilk2P+93bBxa/Kee5USE/3oZESmA7C57Dfl2HBDRmJYLxpX8csQmyPRlE/ 6soM2JXLLT/yhssGuwdZ3Bdz0vELs7/i0vne19sPy3mq9EtkxInGxLtHiiObRU3R KizJ0+GABEacFKjukCpMe48z3T/Fq67FRyKBsQ9yNBTfAC1tS/8l+Z5J3UV6q0/u W9Z08ZFlippMJW1XnI2Z8c9KaN+X5QQJIS38apAyJ2Fo/TZHCuoLD3a7Eau7+BH2 Gf9g5u2CidlRyGiW/lZhFi0EgpluHp2XCPFkV+fDDCf1t6oy6ihwEMWKSxTbO96A FcUJPVg24jXBDboBW482dd3UuVdj443DY+OkEbx0Muk5pfKXuY0sLCWt5kVDafY0 o0wvbDzKSlFss6SaoH1ULimsXpDkpmk68WWyvRNbFb71ZOEYTXbqep3I8XTTOxlq TgDVipnL7LnwofXBi2Q0kpfpn5HMA1yxwYd/16Xze55En49AUT/cercI0sbzxKTh 6Mn0AVdKfy4VkFksNfeLfoYo7NeH46UPcBJZmyf4HM2ljVLcuqFBBXUIdzbsBoq4 mep/W/4H8HulJ6czGfGvnJ93CTNuHjvEQeJu4JwrD4hIJNROPuu/BtpmmwARAQAB tDNNYXJrdXMgV3VyemVuYmVyZ2VyIDxtYXJrdXMud3VyemVuYmVyZ2VyQGFpdC5h Yy5hdD6JAk4EEwEKADgWIQQg3Ug6hVyYZ/WizGHfFAu/xyvVHQUCW7cakwIbAwUL CQgHAgYVCgkICwIEFgIDAQIeAQIXgAAKCRDfFAu/xyvVHRTWEACyRlG+NWkYrkiJ xrmT6rfiUVR16LebfJWcpVXMVNrxDbNmaw7KlW56DMVak1T6DVTBqH3bMO2b5b7g Kea3n6rHR9cjo9dGx7+V4p9XNipDrODZeK0rs1BWuYPRxCFNRXJv0i88BLW/x+CV vIo6HccAgeVGn+eMVTopKHOjcDb5cN65qSf9IqdyKAZDwJAMCrAys870Ats7qrxU /uhez/CxqYKj9jaomm0UK/vmcCoLiayRFNwmWsJGPiWoV6UabjbX1Yb2jza6Woge uGthz7kJTYniEnxs47JqNDJqdL/wMi3ZFV4kkXBvmpbL74kAXa1AkrRwdhtsYrq2 UohRFyRnHR6VR7DlT6PuAZ1ZQsSfx8UNOkx90KVBnhAb5VYfpXs8Z5NsSZzz0kBd yj1YQBqatxZIm3fWWb+GoSEi/Rd2O5RuWor5nIKg58R3LngsMdoyKB3X6g9wGWHK dvA5ZVi5Yc0TEPZi7lcNdX0d7qzNOzqUlWlk9GBfzFD++qMKxqHAnRnVeuIHecSb 2X9UJbE7i0It7cWVedbYNbX8cUUk3RC+L8kknqC5giya0cHlpxUn7ceoS3RlICKT 1lHyH5JhRSYUGYdV5eOkpE15BEewP4mczT3gKJ5xlhg8Q2pjVKzQkFV36W9Zn9Sc ka4EDJkpWVu251Y3jk/1YLcVe+tXh7kCDQRbtxqTARAA7u5p3xsm0SUHuOXJmae+ qFrDoPLvWda/dDMtqXIcU1VPpH3aY35UaAsdM7yAqow5Tg2GG5I1fH4gYP2lMj84 5fGeyi9nZHzD3SAY6kBbidqyrNBfflBcgZQbU29VVLHy4onJhQvpbapsYt4O03iW SXAO5DhcYo1ZXvyL0TlhD4LYx8hU7VsFKS8+JwTeExE5rexGuQ20vJqphH+YvPVb Zo5dScpvjjg09MJEVpMBcqjHw7vVxV3ibHcd5j4Kk1+XanBO9Pc30kj2b/RTZwcx 1wAXAU1l5JhxaXu0YQRFNoSzwrZhW/jePe2wop3E6EjhqVkjcXmhOXsOu5D8sGJt 45ivo9qcqZSNa8smXmOrEHnrRo0dVRPESCTupYNftO+mMzG9GbRDM5qgyV7+rnkT SgScV+ARhf6F7JF7Or8Vjb8bPaumCbwH5/HQmi/b099juEZPn1Bi9/YClk2xNRIr mW5o6QkvMBMxkCvdNG9ov0fZ72iaUcTd5i21vwnN8oeLPzBGVq+ipFbeKBzMnPHv 8zugEYmX7kdyFanQlvuf7s4sBXIbcvSETcuFc4U2V5Ig+FHqVWK2S14t1ulpUHcY IVILZABYlqTMKf6tERt7IiYIlWf1GPZ8ISavIfK9ngUw/p3mTac7IkMOuVYS5e9e SyyN3k+GHAyKWW4dYVjjJ3kAEQEAAYkCNgQYAQoAIBYhBCDdSDqFXJhn9aLMYd8U C7/HK9UdBQJbtxqTAhsMAAoJEN8UC7/HK9UdkEwP/j991EVS3IxwaOcpDL4m+XdW k/CjetQc5DjHgQQO9WSUhBePbSD/obnjqdwX0j0zovWV0OBbT/B9jdmwXePhKq1y 3xgGNy7vwQ+Hrbdhs64qEkUfee1qOgxyUIzhBY4NdkSI4mXdxuaidXGMIUzcwZhJ yS6S/GfZpShzVq6c+G2jYahp9dOck7T2WxD9qMZhBq7d/vsWQ/IdlpQfLLrqFD6j m7+cbuS/F1lCcR90g6pca1dejoTBF0Sl5/21KzrfQoPa5gUKdtKk6egLNR3kIYKh UaeJMtO7Ps5k2kYyJFcfYf3zGrbSnUVZbukf9glg9cYjc3GQjRkltooKBon6ntGg 0mMi3NrcsbjPnKoaea65yM58KoFSETbiH3bNukwjY6OzyZlr6czKrDz7XS7SmieH 0QfQxHN8rnlgzJgBDGt0RbE0xeRfj9uILYTnDHpTOzYbNSSaOoryEYN3PREdPD8y hNPlnTzLNI5zjfwv7ZOxlukSBbBJi4bw9Sg5s4bmB0v1yXqruTecK9v0lbGd5/+F ZeWT1K3rHRnwp4kKqvluB4z6QtsQCFCbYq6x6E5k8B3DvgDD1aF7say8WtRxyETF QmuwBU3cvZ6HUONQ1IDqtgRZeDqdMrl8DQn78l6vvpfo7pBGu9EzkICUSDY2Kv2V 4NjOAe+zq8rISsuuEUn3 =++gV -----END PGP PUBLIC KEY BLOCK----- deb-build/debian/compat0000644000000000000000000000000313361077531014032 0ustar rootroot11 deb-build/debian/postinst0000755000000000000000000000313313326562314014443 0ustar rootroot#!/bin/sh # postinst script for logdata-anomaly-miner # # see: dh_installdeb(1) set -e # summary of how this script can be called: # * `configure' # * `abort-upgrade' # * `abort-remove' `in-favour' # # * `abort-remove' # * `abort-deconfigure' `in-favour' # `removing' # # for details, see https://www.debian.org/doc/debian-policy/ or # the debian-policy package case "$1" in configure) analysisUser="aminer" analysisGroup="aminer" # Prohibit read access to configuration for other processes if ! dpkg-statoverride --list /etc/aminer > /dev/null; then chown "root.${analysisGroup}" -- /etc/aminer chmod 00750 -- /etc/aminer fi if ! dpkg-statoverride --list /var/lib/aminer > /dev/null; then chmod 00700 -- /var/lib/aminer chown "${analysisUser}.${analysisGroup}" -- /var/lib/aminer fi ;; esac # We do not use pyc-files anyway, so disable generation. alias pycompile=/bin/true # dh_installdeb will replace this with shell code automatically # generated by other debhelper scripts. #DEBHELPER# # Start the service only when it was already enabled before updating. if deb-systemd-helper debian-installed aminer.service && [ -d /run/systemd/system ]; then systemctl --system daemon-reload >/dev/null || true deb-systemd-invoke start aminer.service >/dev/null || true fi exit 0 deb-build/debian/dirs0000644000000000000000000000001713326562314013514 0ustar rootrootvar/lib/aminer deb-build/debian/copyright0000644000000000000000000000214713326562314014571 0ustar rootrootFormat: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ Upstream-Name: logdata-anomaly-miner Source: git+ssh://git.launchpad.net/logdata-anomaly-miner Files: * Copyright: 2016 Roman Fiedler License: GPL-3.0+ Files: debian/* Copyright: 2016 Roman Fiedler License: GPL-3.0+ License: GPL-3.0+ This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. . This package is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. . You should have received a copy of the GNU General Public License along with this program. If not, see . . On Debian systems, the complete text of the GNU General Public License version 3 can be found in "/usr/share/common-licenses/GPL-3". deb-build/debian/watch0000644000000000000000000000024713361126264013665 0ustar rootrootversion=3 opts=pgpsigurlmangle=s/$/.sig/ https://launchpad.net/logdata-anomaly-miner/+download .*/trunk/[a-z0-9.]+/\+download/logdata-anomaly-miner-([0-9~_.]+).tar.gz deb-build/debian/rules0000755000000000000000000000136613361127642013720 0ustar rootroot#!/usr/bin/make -f # -*- makefile -*- # Uncomment this to turn on verbose mode. # export DH_VERBOSE=1 %: dh $@ --with=python3 override_dh_auto_build: xsltproc --nonet \ --param make.year.ranges 1 \ --param make.single.year.ranges 1 \ --param man.charmap.use.subset 0 \ -o debian/ \ http://docbook.sourceforge.net/release/xsl/current/manpages/docbook.xsl \ debian/AMiner.1.xml debian/AMinerRemoteControl.1.xml dh_auto_build # Modify startup behaviour in auto-generated code in postinst: # Do not attempt to add aminer.service to autostart if user does # not want to have it running explicitely. See "Running as a Service" # from /usr/share/doc/aminer/Readme.txt.gz for more information. override_dh_installsystemd: dh_installsystemd --no-enable deb-build/debian/source/0000755000000000000000000000000013361054315014126 5ustar rootrootdeb-build/debian/source/format0000644000000000000000000000001413361054315015334 0ustar rootroot3.0 (quilt) deb-build/debian/control0000644000000000000000000000272113361140013014222 0ustar rootrootSource: logdata-anomaly-miner Section: admin Priority: optional Maintainer: Markus Wurzenberger Build-Depends: debhelper (>= 11.0.0), dh-python, docbook-xsl, docbook-xml, python3-all , xsltproc Standards-Version: 4.2.1 Homepage: https://launchpad.net/logdata-anomaly-miner/ Vcs-Git: https://git.launchpad.net/logdata-anomaly-miner Vcs-Browser: https://git.launchpad.net/logdata-anomaly-miner/tree/ Package: logdata-anomaly-miner Architecture: all Depends: ${python3:Depends}, python3-tz, ${misc:Depends} Suggests: python-scipy Description: This tool allows one to create log analysis pipelines to analyze log data streams and detect violations or anomalies in it. It can be run from console, as daemon with e-mail alerting or embedded as library into own programs. It was designed to run the analysis with limited resources and lowest possible permissions to make it suitable for production server use. Analysis methods include: . * static check patterns similar to logcheck but with extended syntax and options. * detection of new data elements (IPs, user names, MAC addresses) * statistical anomalies in log line values and frequencies * correlation rules between log lines as described in th AECID approach http://dx.doi.org/10.1016/j.cose.2014.09.006 . The tool is suitable to replace logcheck but also to operate as a sensor feeding a SIEM. . Please report bugs at https://bugs.launchpad.net/logdata-anomaly-miner/+filebug deb-build/debian/logdata-anomaly-miner.links0000644000000000000000000000020613326562314020053 0ustar rootroot/usr/lib/logdata-anomaly-miner/AMiner /usr/bin/AMiner /usr/lib/logdata-anomaly-miner/AMinerRemoteControl /usr/bin/AMinerRemoteControl deb-build/debian/prerm0000755000000000000000000000131413326562314013704 0ustar rootroot#!/bin/sh # prerm script for logdata-anomaly-miner # # see: dh_installdeb(1) set -e # summary of how this script can be called: # * `remove' # * `upgrade' # * `failed-upgrade' # * `remove' `in-favour' # * `deconfigure' `in-favour' # `removing' # # for details, see https://www.debian.org/doc/debian-policy/ or # the debian-policy package # dh_installdeb will replace this with shell code automatically # generated by other debhelper scripts. #DEBHELPER# exit 0 deb-build/debian/logdata-anomaly-miner.install0000644000000000000000000000001113326562314020373 0ustar rootrootroot/* . deb-build/debian/.debhelper/0000755000000000000000000000000013361111230014624 5ustar rootrootdeb-build/debian/.debhelper/generated/0000755000000000000000000000000013361111230016562 5ustar rootrootdeb-build/debian/.debhelper/generated/logdata-anomaly-miner/0000755000000000000000000000000013361111231022744 5ustar rootrootdeb-build/debian/.debhelper/generated/logdata-anomaly-miner/installed-by-dh_installdocs0000644000000000000000000000000013361111230030233 0ustar rootrootdeb-build/debian/.debhelper/generated/logdata-anomaly-miner/postrm.service0000644000000000000000000000026713361111231025657 0ustar rootroot# Automatically added by dh_installsystemd/11.1.6ubuntu2 if [ -d /run/systemd/system ]; then systemctl --system daemon-reload >/dev/null || true fi # End automatically added section deb-build/debian/.debhelper/generated/logdata-anomaly-miner/prerm.service0000644000000000000000000000032413361111231025452 0ustar rootroot# Automatically added by dh_installsystemd/11.1.6ubuntu2 if [ -d /run/systemd/system ] && [ "$1" = remove ]; then deb-systemd-invoke stop 'aminer.service' >/dev/null || true fi # End automatically added section deb-build/debian/.debhelper/generated/logdata-anomaly-miner/installed-by-dh_install0000644000000000000000000000004113361111230027367 0ustar rootroot./root/etc ./root/lib ./root/usr deb-build/debian/.debhelper/generated/logdata-anomaly-miner/installed-by-dh_installman0000644000000000000000000000006113361111231030066 0ustar rootroot./debian/AMiner.1 ./debian/AMinerRemoteControl.1 deb-build/debian/.debhelper/generated/logdata-anomaly-miner/postinst.service0000644000000000000000000000245313361111231026215 0ustar rootroot# Automatically added by dh_installsystemd/11.1.6ubuntu2 if [ "$1" = "configure" ] || [ "$1" = "abort-upgrade" ] || [ "$1" = "abort-deconfigure" ] || [ "$1" = "abort-remove" ] ; then # This will only remove masks created by d-s-h on package removal. deb-systemd-helper unmask 'aminer.service' >/dev/null || true # was-enabled defaults to true, so new installations run enable. if deb-systemd-helper --quiet was-enabled 'aminer.service'; then # Enables the unit on first installation, creates new # symlinks on upgrades if the unit file has changed. deb-systemd-helper enable 'aminer.service' >/dev/null || true else # Update the statefile to add new symlinks (if any), which need to be # cleaned up on purge. Also remove old symlinks. deb-systemd-helper update-state 'aminer.service' >/dev/null || true fi fi # End automatically added section # Automatically added by dh_installsystemd/11.1.6ubuntu2 if [ "$1" = "configure" ] || [ "$1" = "abort-upgrade" ] || [ "$1" = "abort-deconfigure" ] || [ "$1" = "abort-remove" ] ; then if [ -d /run/systemd/system ]; then systemctl --system daemon-reload >/dev/null || true if [ -n "$2" ]; then _dh_action=restart else _dh_action=start fi deb-systemd-invoke $_dh_action 'aminer.service' >/dev/null || true fi fi # End automatically added section deb-build/debian/postrm0000755000000000000000000000151713326562314014110 0ustar rootroot#!/bin/sh # postrm script for logdata-anomaly-miner # # see: dh_installdeb(1) set -e # summary of how this script can be called: # * `remove' # * `purge' # * `upgrade' # * `failed-upgrade' # * `abort-install' # * `abort-install' # * `abort-upgrade' # * `disappear' # # for details, see https://www.debian.org/doc/debian-policy/ or # the debian-policy package case "$1" in remove) # Delete user, will also delete group. userdel "aminer" ;; esac # dh_installdeb will replace this with shell code automatically # generated by other debhelper scripts. #DEBHELPER# exit 0