pyaff4-0.26.post6/0000755000175000017500000000000013552346257014212 5ustar rhertzogrhertzogpyaff4-0.26.post6/pyaff4/0000755000175000017500000000000013552346257015403 5ustar rhertzogrhertzogpyaff4-0.26.post6/pyaff4/rdfvalue.py0000664000175000017500000002254513211617552017566 0ustar rhertzogrhertzog# Copyright 2014 Google Inc. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); you may not # use this file except in compliance with the License. You may obtain a copy of # the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations under # the License. """RDF Values are responsible for serialization.""" from __future__ import unicode_literals from future import standard_library standard_library.install_aliases() from builtins import str from builtins import object import functools import urllib.parse import urllib.request, urllib.parse, urllib.error import binascii import posixpath import rdflib from pyaff4 import registry from pyaff4 import utils # pylint: disable=protected-access class Memoize(object): def __call__(self, f): f.memo_pad = {} @functools.wraps(f) def Wrapped(self, *args): key = tuple(args) if len(f.memo_pad) > 100: f.memo_pad.clear() if key not in f.memo_pad: f.memo_pad[key] = f(self, *args) return f.memo_pad[key] return Wrapped class RDFValue(object): datatype = "" def __init__(self, initializer=None): self.Set(initializer) def GetRaptorTerm(self): return rdflib.Literal(self.SerializeToString(), datatype=self.datatype) def SerializeToString(self): """Serializes to a sequence of bytes.""" return "" def UnSerializeFromString(self, string): """Unserializes from bytes.""" raise NotImplementedError def Set(self, string): raise NotImplementedError def __bytes__(self): return self.SerializeToString() def __eq__(self, other): return utils.SmartStr(self) == utils.SmartStr(other) def __req__(self, other): return utils.SmartStr(self) == utils.SmartStr(other) def __hash__(self): return hash(self.SerializeToString()) class RDFBytes(RDFValue): value = b"" datatype = rdflib.XSD.hexBinary def SerializeToString(self): return binascii.hexlify(self.value) def UnSerializeFromString(self, string): self.Set(binascii.unhexlify(string)) def Set(self, data): self.value = data def __eq__(self, other): if isinstance(other, RDFBytes): return self.value == other.value class XSDString(RDFValue): """A unicode string.""" datatype = rdflib.XSD.string def SerializeToString(self): return utils.SmartStr(self.value) def UnSerializeFromString(self, string): self.Set(utils.SmartUnicode(string)) def Set(self, data): self.value = utils.SmartUnicode(data) def __str__(self): return self.value @functools.total_ordering class XSDInteger(RDFValue): datatype = rdflib.XSD.integer def SerializeToString(self): return utils.SmartStr(self.value) def UnSerializeFromString(self, string): self.Set(int(string)) def Set(self, data): self.value = int(data) def __eq__(self, other): if isinstance(other, XSDInteger): return self.value == other.value return self.value == other def __int__(self): return self.value def __long__(self): return int(self.value) def __cmp__(self, o): return self.value - o def __add__(self, o): return self.value + o def __lt__(self, o): return self.value < o def __str__(self): return str(self.value) class RDFHash(XSDString): # value is the hex encoded digest. def __eq__(self, other): if isinstance(other, RDFHash): if self.datatype == other.datatype: return self.value == other.value return utils.SmartStr(self.value) == utils.SmartStr(other) def __ne__(self, other): return not self == other def digest(self): return binascii.unhexlify(self.value) class SHA512Hash(RDFHash): datatype = rdflib.URIRef("http://aff4.org/Schema#SHA512") class SHA256Hash(RDFHash): datatype = rdflib.URIRef("http://aff4.org/Schema#SHA256") class SHA1Hash(RDFHash): datatype = rdflib.URIRef("http://aff4.org/Schema#SHA1") class Blake2bHash(RDFHash): datatype = rdflib.URIRef("http://aff4.org/Schema#Blake2b") class MD5Hash(RDFHash): datatype = rdflib.URIRef("http://aff4.org/Schema#MD5") class SHA512BlockMapHash(RDFHash): datatype = rdflib.URIRef("http://aff4.org/Schema#blockMapHashSHA512") class URN(RDFValue): """Represent a URN. According to RFC1738 URLs must be encoded in ASCII. Therefore the internal representation of a URN is bytes. When creating the URN from other forms (e.g. filenames, we assume UTF8 encoding if the filename is a unicode string. """ # The encoded URN as a unicode string. value = None original_filename = None @classmethod def FromFileName(cls, filename): """Parse the URN from filename. Filename may be a unicode string, in which case it will be UTF8 encoded into the URN. URNs are always ASCII. """ result = cls("file:%s" % urllib.request.pathname2url(filename)) result.original_filename = filename return result @classmethod def NewURNFromFilename(cls, filename): return cls.FromFileName(filename) def ToFilename(self): # For file: urls we exactly reverse the conversion applied in # FromFileName. if self.value.startswith("file:"): return urllib.request.url2pathname(self.value[5:]) components = self.Parse() if components.scheme == "file": return components.path def GetRaptorTerm(self): return rdflib.URIRef(self.value) def SerializeToString(self): components = self.Parse() return utils.SmartStr(urllib.parse.urlunparse(components)) def UnSerializeFromString(self, string): utils.AssertStr(string) self.Set(utils.SmartUnicode(string)) return self def Set(self, data): if data is None: return elif isinstance(data, URN): self.value = data.value else: utils.AssertUnicode(data) self.value = data def Parse(self): return self._Parse(self.value) # URL parsing seems to be slow in Python so we cache it as much as possible. @Memoize() def _Parse(self, value): components = urllib.parse.urlparse(value) # dont normalise path for http URI's if components.scheme and not components.scheme == "http": normalized_path = posixpath.normpath(components.path) if normalized_path == ".": normalized_path = "" components = components._replace(path=normalized_path) if not components.scheme: # For file:// URNs, we need to parse them from a filename. components = components._replace( netloc="", path=urllib.request.pathname2url(value), scheme="file") self.original_filename = value return components def Scheme(self): components = self.Parse() return components.scheme def Append(self, component, quote=True): components = self.Parse() if quote: component = urllib.parse.quote(component) # Work around usual posixpath.join bug. component = component.lstrip("/") new_path = posixpath.normpath(posixpath.join( "/", components.path, component)) components = components._replace(path=new_path) return URN(urllib.parse.urlunparse(components)) def RelativePath(self, urn): urn_value = str(urn) if urn_value.startswith(self.value): return urn_value[len(self.value):] def __str__(self): return self.value def __lt__(self, other): return self.value < utils.SmartUnicode(other) def __repr__(self): return "<%s>" % self.value def AssertURN(urn): if not isinstance(urn, URN): raise TypeError("Expecting a URN.") def AssertURN(urn): if not isinstance(urn, URN): raise TypeError("Expecting a URN.") registry.RDF_TYPE_MAP.update({ rdflib.XSD.hexBinary: RDFBytes, rdflib.XSD.string: XSDString, rdflib.XSD.integer: XSDInteger, rdflib.XSD.int: XSDInteger, rdflib.XSD.long: XSDInteger, rdflib.URIRef("http://aff4.org/Schema#SHA512"): SHA512Hash, rdflib.URIRef("http://aff4.org/Schema#SHA256"): SHA256Hash, rdflib.URIRef("http://aff4.org/Schema#SHA1"): SHA1Hash, rdflib.URIRef("http://aff4.org/Schema#MD5"): MD5Hash, rdflib.URIRef("http://aff4.org/Schema#Blake2b"): Blake2bHash, rdflib.URIRef("http://aff4.org/Schema#blockMapHashSHA512"): SHA512BlockMapHash, rdflib.URIRef("http://afflib.org/2009/aff4#SHA512"): SHA512Hash, rdflib.URIRef("http://afflib.org/2009/aff4#SHA256"): SHA256Hash, rdflib.URIRef("http://afflib.org/2009/aff4#SHA1"): SHA1Hash, rdflib.URIRef("http://afflib.org/2009/aff4#MD5"): MD5Hash, rdflib.URIRef("http://afflib.org/2009/aff4#blockMapHashSHA512"): SHA512BlockMapHash }) pyaff4-0.26.post6/pyaff4/aff4_file.py0000664000175000017500000001150613211617552017570 0ustar rhertzogrhertzog# Copyright 2015 Google Inc. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); you may not # use this file except in compliance with the License. You may obtain a copy of # the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations under # the License. """An implementation of AFF4 file backed objects.""" from __future__ import unicode_literals from future import standard_library standard_library.install_aliases() from builtins import str import logging import os import io from pyaff4 import aff4 from pyaff4 import aff4_utils from pyaff4 import lexicon from pyaff4 import rdfvalue from pyaff4 import registry BUFF_SIZE = 64 * 1024 LOGGER = logging.getLogger("pyaff4") class FileBackedObject(aff4.AFF4Stream): def _GetFilename(self): filename = self.resolver.Get(self.urn, lexicon.AFF4_FILE_NAME) if filename: return filename # Only file:// URNs are supported. if self.urn.Scheme() == "file": return self.urn.ToFilename() @staticmethod def _CreateIntermediateDirectories(components): """Recursively create intermediate directories.""" path = os.sep if aff4.WIN32: # On windows we do not want a leading \ (e.g. C:\windows not # \C:\Windows) path = "" for component in components: path = path + component + os.sep LOGGER.info("Creating intermediate directories %s", path) if os.isdir(path): continue # Directory does not exist - Try to make it. try: aff4_utils.MkDir(path) continue except IOError as e: LOGGER.error( "Unable to create intermediate directory: %s", e) raise def LoadFromURN(self): flags = "rb" filename = self._GetFilename() if not filename: raise IOError("Unable to find storage for %s" % self.urn) filename = str(filename) directory_components = os.sep.split(filename) directory_components.pop(-1) mode = self.resolver.Get(self.urn, lexicon.AFF4_STREAM_WRITE_MODE) if mode == "truncate": flags = "w+b" self.resolver.Set(self.urn, lexicon.AFF4_STREAM_WRITE_MODE, rdfvalue.XSDString("append")) self.properties.writable = True self._CreateIntermediateDirectories(directory_components) elif mode == "append": flags = "a+b" self.properties.writable = True self._CreateIntermediateDirectories(directory_components) LOGGER.info("Opening file %s", filename) self.fd = open(filename, flags) try: self.fd.seek(0, 2) self.size = self.fd.tell() except IOError: self.properties.sizeable = False self.properties.seekable = False def Read(self, length): if self.fd.tell() != self.readptr: self.fd.seek(self.readptr) result = self.fd.read(length) self.readptr += len(result) return result def WriteStream(self, stream, progress=None): """Copy the stream into this stream.""" while True: data = stream.read(BUFF_SIZE) if not data: break self.Write(data) progress.Report(self.readptr) def Write(self, data): self.MarkDirty() if self.fd.tell() != self.readptr: self.fd.seek(self.readptr) self.fd.write(data) # self.fd.flush() self.readptr += len(data) def Flush(self): if self.IsDirty(): self.fd.flush() super(FileBackedObject, self).Flush() def Prepare(self): self.readptr = 0 def Truncate(self): self.fd.truncate(0) def Size(self): self.fd.seek(0, 2) return self.fd.tell() def GenericFileHandler(resolver, urn): if os.path.isdir(urn.ToFilename()): directory_handler = registry.AFF4_TYPE_MAP[lexicon.AFF4_DIRECTORY_TYPE] result = directory_handler(resolver) resolver.Set(result.urn, lexicon.AFF4_STORED, urn) return result return FileBackedObject(resolver, urn) registry.AFF4_TYPE_MAP["file"] = GenericFileHandler registry.AFF4_TYPE_MAP[lexicon.AFF4_FILE_TYPE] = FileBackedObject class AFF4MemoryStream(FileBackedObject): def __init__(self, *args, **kwargs): super(AFF4MemoryStream, self).__init__(*args, **kwargs) self.fd = io.BytesIO() pyaff4-0.26.post6/pyaff4/aff4_utils.py0000664000175000017500000000440213211617552020006 0ustar rhertzogrhertzogfrom __future__ import unicode_literals from future import standard_library standard_library.install_aliases() from builtins import chr import os import re import shutil import string import urllib.parse from pyaff4 import rdfvalue from pyaff4 import utils PRINTABLES = set(string.printable) for i in "!$\\:*%?\"<>|]": PRINTABLES.discard(i) PRINTABLES_NO_SLASH = PRINTABLES.copy() PRINTABLES_NO_SLASH.discard('/') def member_name_for_urn(member_urn, base_urn=None, slash_ok=True): if slash_ok: acceptable_set = PRINTABLES else: acceptable_set = PRINTABLES_NO_SLASH filename = base_urn.RelativePath(member_urn) # The member is not related to the base URN, just concatenate them together. if filename is None: filename = os.path.join( base_urn.Parse().path, member_urn.SerializeToString()) if filename.startswith("/"): filename = filename[1:] # original implementations of AFF4 (and Evimetry) escape the leading aff4:// if filename.startswith("aff4://"): return filename.replace("aff4://", "aff4%3A%2F%2F") # Escape chars which are non printable. escaped_filename = [] for c in filename: if c in acceptable_set: escaped_filename.append(c) else: escaped_filename.append("%%%02x" % ord(c)) return "".join(escaped_filename) def urn_from_member_name(member, base_urn): """Returns a URN object from a zip file's member name.""" member = utils.SmartUnicode(member) # Remove %xx escapes. member = re.sub( "%(..)", lambda x: chr(int("0x" + x.group(1), 0)), member) # This is an absolute URN. if urllib.parse.urlparse(member).scheme == "aff4": result = member else: # Relative member becomes relative to the volume's URN. result = base_urn.Append(member, quote=False) return rdfvalue.URN(result) def MkDir(path): try: os.mkdir(path) except OSError as e: if "File exists" in e.strerror: return raise def RemoveDirectory(path): try: shutil.rmtree(path) except OSError: pass def EnsureDirectoryExists(path): dirname = os.path.dirname(path) try: os.makedirs(dirname) except OSError: pass pyaff4-0.26.post6/pyaff4/block_hasher.py0000664000175000017500000005021013211617552020370 0ustar rhertzogrhertzogfrom __future__ import division from __future__ import print_function from __future__ import absolute_import from __future__ import unicode_literals # Copyright 2016,2017 Schatz Forensic Pty Ltd. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); you may not # use this file except in compliance with the License. You may obtain a copy of # the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations under # the License. from builtins import next from builtins import str from builtins import range from past.utils import old_div from builtins import object import binascii import collections import hashlib import six from pyaff4 import container from pyaff4 import data_store from pyaff4 import hashes from pyaff4 import lexicon from pyaff4 import rdfvalue from pyaff4 import zip class InvalidBlockHashComparison(Exception): pass class InvalidHashComparison(Exception): pass class InconsistentHashMethod(Exception): pass # the following is for ordering hashes when calculating hashOrderingMap = { lexicon.HASH_MD5 : 1, lexicon.HASH_SHA1: 2, lexicon.HASH_SHA256 : 3, lexicon.HASH_SHA512 : 4, lexicon.HASH_BLAKE2B: 5} class ValidationListener(object): def __init__(self): pass def onValidBlockHash(self, a): pass def onInvalidBlockHash(self, a, b, imageStreamURI, offset): raise InvalidBlockHashComparison( "Invalid block hash comarison for stream %s at offset %d" % (imageStreamURI, offset)) def onValidHash(self, typ, hash, imageStreamURI): print("Validation of %s %s succeeded. Hash = %s" % (imageStreamURI, typ, hash)) def onInvalidHash(self, typ, a, b, streamURI): raise InvalidHashComparison("Invalid %s comarison for stream %s" % (typ, streamURI)) class BlockHashesHash(object): def __init__(self, blockHashAlgo, hash, hashDataType): self.blockHashAlgo = blockHashAlgo self.hash = hash self.hashDataType = hashDataType def __eq__(self, other): if self.blockHashAlgo == other.blockHashAlgo and self.hash == other.hash and self.hashDataType == other.hashDataType: return True else: return False def __ne__(self, other): return not self.__eq__(other) def digest(self): return binascii.unhexlify(self.hash) class Validator(object): def __init__(self, listener=None): if listener == None: self.listener = ValidationListener() else: self.listener = listener self.delegate = None def validateContainer(self, urn): lex = container.Container.identifyURN(urn) resolver = data_store.MemoryDataStore(lex) with zip.ZipFile.NewZipFile(resolver, urn) as zip_file: if lex == lexicon.standard: self.delegate = InterimStdValidator(resolver, lex, self.listener) elif lex == lexicon.legacy: self.delegate = PreStdValidator(resolver, lex, self.listener) else: raise ValueError self.delegate.doValidateContainer() def validateContainerMultiPart(self, urn_a, urn_b): # in this simple example, we assume that both files passed are # members of the Container lex = container.Container.identifyURN(urn_a) resolver = data_store.MemoryDataStore(lex) with zip.ZipFile.NewZipFile(resolver, urn_a) as zip_filea: with zip.ZipFile.NewZipFile(resolver, urn_b) as zip_fileb: if lex == lexicon.standard: self.delegate = InterimStdValidator(resolver, lex, self.listener) elif lex == lexicon.legacy: self.delegate = PreStdValidator(resolver, lex, self.listener) else: raise ValueError self.delegate.doValidateContainer() def validateBlockMapHash(self, mapStreamURI, imageStreamURI): storedHash = next(self.resolver.QuerySubjectPredicate( mapStreamURI, self.lexicon.blockMapHash)) calculalatedHash = self.calculateBlockMapHash(mapStreamURI, imageStreamURI, storedHash.datatype) if storedHash != calculalatedHash: self.listener.onInvalidHash("BlockMapHash", storedHash, calculalatedHash, mapStreamURI) else: self.listener.onValidHash("BlockMapHash", storedHash, mapStreamURI) return calculalatedHash def findLocalImageStreamOfMap(self, mapStreamURI): mapContainer = self.resolver.findContainerOfStream(mapStreamURI) for dependentStream in self.resolver.QuerySubjectPredicate(mapStreamURI, self.lexicon.dependentStream): container = self.resolver.findContainerOfStream(dependentStream) if container == mapContainer: return dependentStream raise Exception def calculateBlockMapHash(self, mapStreamURI, imageStreamURI, storedHashDataType): storedBlockHashesHash = sorted( self.getStoredBlockHashes(str(imageStreamURI)), key=lambda x: hashOrderingMap[x.blockHashAlgo]) calculatedHash = hashes.new(storedHashDataType) for hash in storedBlockHashesHash: bytes = hash.digest() calculatedHash.update(bytes) for hash in self.resolver.QuerySubjectPredicate(mapStreamURI, self.lexicon.mapPointHash): calculatedHash.update(hash.digest()) for hash in self.resolver.QuerySubjectPredicate(mapStreamURI, self.lexicon.mapIdxHash): calculatedHash.update(hash.digest()) for hash in self.resolver.QuerySubjectPredicate(mapStreamURI, self.lexicon.mapPathHash): calculatedHash.update(hash.digest()) return hashes.newImmutableHash(calculatedHash.hexdigest(), storedHashDataType) def calculateBlockHashesHash(self, imageStreamURI): hash = self.getStoredBlockHashes(imageStreamURI) with self.resolver.AFF4FactoryOpen(imageStreamURI) as imageStream: calculatedBlockHashes = [] for h in hash: calculatedBlockHashes.append(hashes.new(h.hashDataType)) offset = 0 while offset < imageStream.size: imageStream.seek(offset) block = imageStream.Read(imageStream.chunk_size) for i in range(len(hash)): calculatedBlockHashesHash = calculatedBlockHashes[i] hashDataType = hash[i].blockHashAlgo # verify the block hash h = hashes.new(hashDataType) h.update(block) calculatedBlockHash = h.hexdigest() chunkIdx = old_div(offset, imageStream.chunk_size) storedBlockHash = imageStream.readBlockHash(chunkIdx, hashDataType) if calculatedBlockHash != storedBlockHash: self.listener.onInvalidBlockHash( calculatedBlockHash, storedBlockHash.value, imageStreamURI, offset) else: self.listener.onValidBlockHash(calculatedBlockHash) calculatedBlockHashesHash.update(h.digest()) offset = offset + imageStream.chunk_size # we now have the block hashes hash calculated res = [] for i in range(len(hash)): a = hash[i].blockHashAlgo b = calculatedBlockHashes[i].hexdigest() c = hash[i].hashDataType blockHashesHash = BlockHashesHash(a, b, c) res.append(blockHashesHash) return res def getStoredBlockHashes(self, imageStreamURI): hashes = [] for hash in self.resolver.QuerySubjectPredicate(imageStreamURI, self.lexicon.blockHashesHash): blockHashAlgo = hash.datatype digest = hash.value digestDataType = hash.datatype hashes.append(BlockHashesHash(blockHashAlgo, digest, digestDataType)) return hashes def validateBlockHashesHash(self, imageStreamURI): storedHashes = self.getStoredBlockHashes(imageStreamURI) calculatedHashes = self.calculateBlockHashesHash(imageStreamURI) for i in range(len(storedHashes)): a = storedHashes[i] b = calculatedHashes[i] if a != b: self.listener.onInvalidHash("BlockHashesHash", a, b, imageStreamURI) else: self.listener.onValidHash("BlockHashesHash", a, imageStreamURI) def validateMapIdxHash(self, map_uri): for stored_hash in self.resolver.QuerySubjectPredicate( map_uri, self.lexicon.mapIdxHash): return self.validateSegmentHash( map_uri, "mapIdxHash", self.calculateMapIdxHash( map_uri, stored_hash.datatype)) def calculateMapIdxHash(self, mapURI, hashDataType): return self.calculateSegmentHash(mapURI, "idx", hashDataType) def validateMapPointHash(self, map_uri): for stored_hash in self.resolver.QuerySubjectPredicate( map_uri, self.lexicon.mapPointHash): return self.validateSegmentHash( map_uri, "mapPointHash", self.calculateMapPointHash( map_uri, stored_hash.datatype)) def calculateMapPointHash(self, mapURI, storedHashDataType): return self.calculateSegmentHash(mapURI, "map", storedHashDataType) def validateMapPathHash(self, map_uri): for stored_hash in self.resolver.QuerySubjectPredicate( map_uri, self.lexicon.mapPathHash): return self.validateSegmentHash( map_uri, "mapPathHash", self.calculateMapPathHash( map_uri, stored_hash.datatype)) def calculateMapPathHash(self, mapURI, storedHashDataType): return self.calculateSegmentHash(mapURI, "mapPath", storedHashDataType) def validateMapHash(self, map_uri): for stored_hash in self.resolver.QuerySubjectPredicate( map_uri, self.lexicon.mapHash): return self.validateSegmentHash( map_uri, "mapHash", self.calculateMapHash( map_uri, stored_hash.datatype)) def calculateMapHash(self, mapURI, storedHashDataType): calculatedHash = hashes.new(storedHashDataType) calculatedHash.update(self.readSegment(mapURI, "map")) calculatedHash.update(self.readSegment(mapURI, "idx")) try: calculatedHash.update(self.readSegment(mapURI, "mapPath")) except: pass return hashes.newImmutableHash(calculatedHash.hexdigest(), storedHashDataType) def validateSegmentHash(self, mapURI, hashType, calculatedHash): storedHash = next(self.resolver.QuerySubjectPredicate(mapURI, self.lexicon.base + hashType)) if storedHash != calculatedHash: self.listener.onInvalidHash(hashType, storedHash, calculatedHash, mapURI) else: self.listener.onValidHash(hashType, storedHash, mapURI) def readSegment(self, parentURI, subSegment): parentURI = rdfvalue.URN(parentURI) segment_uri = parentURI.Append(subSegment) with self.resolver.AFF4FactoryOpen(segment_uri) as segment: data = segment.Read(segment.Size()) return data def calculateSegmentHash(self, parentURI, subSegment, hashDataType): calculatedHash = hashes.new(hashDataType) data = self.readSegment(parentURI, subSegment) if data != None: calculatedHash.update(data) b = calculatedHash.hexdigest() return hashes.newImmutableHash(b, hashDataType) else: raise Exception def checkSame(self, a, b): if a != b: raise InconsistentHashMethod() # A block hash validator for AFF4 Pre-Standard images produced by Evimetry 1.x-2.1 class PreStdValidator(Validator): def __init__(self, resolver, lex, listener=None): Validator.__init__(self, listener) self.resolver = resolver self.lexicon = lex def validateContainer(self, urn): with zip.ZipFile.NewZipFile(self.resolver, urn) as zip_file: self.doValidateContainer() # pre AFF4 standard Evimetry uses the contains relationship to locate the local # image stream of a Map def findLocalImageStreamOfMap(self, mapStreamURI): imageStreamURI = next(self.resolver.QuerySubjectPredicate(mapStreamURI, self.lexicon.contains)) return imageStreamURI def doValidateContainer(self): types = list(self.resolver.QueryPredicateObject( lexicon.AFF4_TYPE, self.lexicon.Image)) if not types: return imageURI = types[0] # For block based hashing our starting point is the map if self.isMap(imageURI): with self.resolver.AFF4FactoryOpen(imageURI) as mapStream: for target in mapStream.targets: if self.resolver.isImageStream(target): self.validateBlockHashesHash(target) self.validateMapIdxHash(imageURI) self.validateMapPointHash(imageURI) self.validateMapPathHash(imageURI) self.validateMapHash(imageURI) self.validateBlockMapHash(imageURI, target) # in AFF4 pre-standard Evimetry stores what we now call the blockMapHash in the Map, with the # name blockHashesHash def validateBlockMapHash(self, mapStreamURI, imageStreamURI): storedHash = next(self.resolver.QuerySubjectPredicate(mapStreamURI, self.lexicon.blockHashesHash)) calculalatedHash = self.calculateBlockMapHash(mapStreamURI, imageStreamURI, storedHash.datatype) if storedHash != calculalatedHash: self.listener.onInvalidHash("BlockMapHash", storedHash, calculalatedHash, mapStreamURI) else: self.listener.onValidHash("BlockMapHash", storedHash, mapStreamURI) def isMap(self, stream): types = self.resolver.QuerySubjectPredicate(stream, lexicon.AFF4_TYPE) if self.lexicon.map in types: return True return False # A block hash validator for AFF4 Interim Standard images produced by Evimetry 3.0 class InterimStdValidator(Validator): def __init__(self, resolver, lex, listener=None): Validator.__init__(self, listener) self.resolver = resolver self.lexicon = lex def validateContainer(self, urn): with zip.ZipFile.NewZipFile(self.resolver, urn) as zip_file: self.doValidateContainer() def getParentMap(self, imageStreamURI): imageStreamVolume = next(self.resolver.QuerySubjectPredicate(imageStreamURI, self.lexicon.stored)) for map in self.resolver.QuerySubjectPredicate(imageStreamURI, self.lexicon.target): mapVolume = next(self.resolver.QuerySubjectPredicate(map, self.lexicon.stored)) if mapVolume == imageStreamVolume: return map raise Exception("Illegal State") def doValidateContainer(self): # FIXME: This should further restrict by container URN since # the same data store may be used for multiple containers with # many images. for image in self.resolver.QueryPredicateObject( lexicon.AFF4_TYPE, self.lexicon.Image): datastreams = list(self.resolver.QuerySubjectPredicate( image, self.lexicon.dataStream)) calculated_hashes = collections.OrderedDict() hash_datatype = None for stream in datastreams: if self.isMap(stream): for image_stream_uri in self.resolver.QuerySubjectPredicate( stream, self.lexicon.dependentStream): parent_map = self.getParentMap(image_stream_uri) if parent_map == stream: # only validate the map and stream pair in the same container self.validateBlockHashesHash(image_stream_uri) self.validateMapIdxHash(parent_map) self.validateMapPointHash(parent_map) self.validateMapPathHash(parent_map) self.validateMapHash(parent_map) calculated_hash = self.validateBlockMapHash( parent_map, image_stream_uri) calculated_hashes[parent_map] = calculated_hash # Assume all block hashes are the same type. if (hash_datatype is not None and hash_datatype != calculated_hash.datatype): raise AttributeError( "Block hashes are not all the same type.") else: hash_datatype = calculated_hash.datatype for stored_hash in self.resolver.QuerySubjectPredicate( image, self.lexicon.hash): hasha = "" hashb = "" parent_map = None # TODO: handle more cleanly the sematic difference between datatypes if len(calculated_hashes) == 1: # This is a single part image # The single AFF4 hash is just the blockMapHash parent_map, calculated_hash = calculated_hashes.popitem() hasha = stored_hash hashb = calculated_hash else: # This is a multiple part image The single AFF4 # hash is one layer up in the Merkel tree again, # with the subordinate nodes being the # blockMapHashes for the map stored in each # container volume # The hash algorithm we use for the single AFF4 # hash is the same algorithm we use for all of the # Merkel tree inner nodes current_hash = hashes.new(hash_datatype) # FIXME: This is a flaw in the scheme since there # is no reasonable order specified. We temporarily # sort the results to get the test to pass but # this needs to be properly addressed. # We rely on the natural ordering of the map URN's # as they are stored in the map to order the # blockMapHashes in the Merkel tree. for parent_map, calculated_hash in sorted(calculated_hashes.items()): current_hash.update(calculated_hash.digest()) hasha = stored_hash.value hashb = current_hash.hexdigest() if hasha != hashb: self.listener.onInvalidHash("AFF4Hash", hasha, hashb, parent_map) else: self.listener.onValidHash("AFF4Hash", hasha, parent_map) def getStoredBlockHashes(self, image_stream_uri): res = [] for block_hash_uri in self.resolver.SelectSubjectsByPrefix(str(image_stream_uri) + "/blockhash."): for hash in self.resolver.QuerySubjectPredicate(block_hash_uri, self.lexicon.hash): extension = block_hash_uri.Parse().path.split(".")[-1] block_hash_algo_type = hashes.fromShortName(extension) hash = BlockHashesHash(block_hash_algo_type, hash.value, hash.datatype) res.append(hash) return res def isMap(self, stream): types = self.resolver.QuerySubjectPredicate(stream, lexicon.AFF4_TYPE) if self.lexicon.map in types: return True return False pyaff4-0.26.post6/pyaff4/zip.py0000664000175000017500000010160013211617552016546 0ustar rhertzogrhertzog# Copyright 2014 Google Inc. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); you may not # use this file except in compliance with the License. You may obtain a copy of # the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations under # the License. """An implementation of the ZipFile based AFF4 volume.""" from __future__ import unicode_literals from future import standard_library standard_library.install_aliases() from builtins import range from builtins import object import copy import logging import io import zlib import struct from pyaff4 import aff4 from pyaff4 import aff4_file from pyaff4 import aff4_utils from pyaff4 import lexicon from pyaff4 import rdfvalue from pyaff4 import registry from pyaff4 import struct_parser from pyaff4 import utils LOGGER = logging.getLogger("pyaff4") # Compression modes we support inside Zip files (Note this is not the same as # the aff4_image compression. ZIP_STORED = 0 ZIP_DEFLATE = 8 # The field size at which we switch to zip64 semantics. ZIP32_MAX_SIZE = 2**32 -1 BUFF_SIZE = 64 * 1024 class EndCentralDirectory(struct_parser.CreateStruct( "EndCentralDirectory_t", definition=""" uint32_t magic = 0x6054b50; uint16_t number_of_this_disk = 0; uint16_t disk_with_cd = 0; uint16_t total_entries_in_cd_on_disk; uint16_t total_entries_in_cd; uint32_t size_of_cd = 0xFFFFFFFF; uint32_t offset_of_cd = 0xFFFFFFFF; uint16_t comment_len = 0; """)): magic_string = b'PK\x05\x06' def IsValid(self): return self.magic == 0x6054b50 @classmethod def FromBuffer(cls, buffer): """Instantiate an EndCentralDirectory from this buffer.""" # Not enough data to contain an EndCentralDirectory if len(buffer) > cls.sizeof(): # Scan the buffer backwards for an End of Central Directory magic end = len(buffer) - cls.sizeof() + 4 while True: index = buffer.rfind(cls.magic_string, 0, end) if index < 0: break end_cd = cls(buffer[index:]) if end_cd.IsValid(): return end_cd, index end = index raise IOError("Unable to find EndCentralDirectory") class CDFileHeader(struct_parser.CreateStruct( "CDFileHeader_t", """ uint32_t magic = 0x2014b50; uint16_t version_made_by = 0x317; uint16_t version_needed = 0x14; uint16_t flags = 0x8; uint16_t compression_method; uint16_t dostime; uint16_t dosdate; uint32_t crc32; uint32_t compress_size = 0xFFFFFFFF; uint32_t file_size = 0xFFFFFFFF; uint16_t file_name_length; uint16_t extra_field_len = 0; uint16_t file_comment_length = 0; uint16_t disk_number_start = 0; uint16_t internal_file_attr = 0; uint32_t external_file_attr = 0o644 << 16L; uint32_t relative_offset_local_header = 0xffffffff; """)): def IsValid(self): return self.magic == 0x2014b50 class ZipFileHeader(struct_parser.CreateStruct( "ZipFileHeader_t", """ uint32_t magic = 0x4034b50; uint16_t version = 0x14; uint16_t flags = 0x8; uint16_t compression_method; uint16_t lastmodtime; uint16_t lastmoddate; uint32_t crc32; int32_t compress_size; int32_t file_size; uint16_t file_name_length; uint16_t extra_field_len = 0; """)): def IsValid(self): return self.magic == 0x4034b50 # see APPNOTE.txt 4.5.3 -Zip64 Extended Information Extra Field (0x0001): class Zip64FileHeaderExtensibleField(object): fields = [ ["uint16_t", "header_id", 1], ["uint16_t", "data_size", 0], ["uint64_t", "file_size", None], ["uint64_t", "compress_size", None], ["uint64_t", "relative_offset_local_header", None], ["uint32_t", "disk_number_start", None] ] def __init__(self): self.fields = copy.deepcopy(self.fields) def format_string(self): return "<" + "".join( [struct_parser.format_string_map[t] for t, _, d in self.fields if d is not None]) def sizeof(self): """Calculate the total size of the header.""" return struct.calcsize(self.format_string()) def empty(self): return [] == [d for _, _, d in self.fields[2:] if d is not None] def Pack(self): self.data_size = self.sizeof() return struct.pack(self.format_string(), *[v for t, _, v in self.fields if v is not None]) def Get(self, field): for row in self.fields: if row[1] == field: return row[2] raise AttributeError("Unknown field %s." % field) def Set(self, field, value): for row in self.fields: if row[1] == field: row[2] = value return raise AttributeError("Unknown field %s." % field) @classmethod def FromBuffer(cls, fileRecord, buffer): result = cls() result.header_id = struct.unpack("H", buffer[0:2])[0] if result.header_id != 1: raise IOError("Invalid Zip64 Extended Information Extra Field") result.data_size = struct.unpack("H", buffer[2:4])[0] offset = 4 if fileRecord.file_size == 0xFFFFFFFF: result.Set("file_size", struct.unpack("Q", buffer[offset:offset + 8])[0]) offset += 8 if fileRecord.compress_size == 0xFFFFFFFF: result.Set("compress_size", struct.unpack("Q", buffer[offset:offset + 8])[0]) offset += 8 if fileRecord.relative_offset_local_header == 0xFFFFFFFF: result.Set("relative_offset_local_header", struct.unpack("Q", buffer[offset:offset + 8])[0]) offset += 8 if fileRecord.disk_number_start == 0xFFFF: result.Set("disk_number_start", struct.unpack("I", buffer[offset:offset + 4])[0]) offset += 4 return (result, offset) class Zip64EndCD(struct_parser.CreateStruct( "Zip64EndCD_t", """ uint32_t magic = 0x06064b50; uint64_t size_of_header = 0; uint16_t version_made_by = 0x2d; uint16_t version_needed = 0x2d; uint32_t number_of_disk = 0; uint32_t number_of_disk_with_cd = 0; uint64_t number_of_entries_in_volume; uint64_t total_entries_in_cd; uint64_t size_of_cd; uint64_t offset_of_cd; """)): magic_string = b'PK\x06\x06' def IsValid(self): return self.magic == 0x06064b50 @classmethod def FromBuffer(cls, buffer): """Instantiate an EndCentralDirectory from this buffer.""" # Not enough data to contain an EndCentralDirectory if len(buffer) > cls.sizeof(): # Scan the buffer backwards for an End of Central Directory magic end = len(buffer) - cls.sizeof() + 4 while True: index = buffer.rfind(cls.magic_string, 0, end) if index < 0: break end_cd = cls(buffer[index:]) if end_cd.IsValid(): return end_cd, index end = index raise IOError("Unable to find EndCentralDirectory") class Zip64CDLocator(struct_parser.CreateStruct( "Zip64CDLocator_t", """ uint32_t magic = 0x07064b50; uint32_t disk_with_cd = 0; uint64_t offset_of_end_cd; uint32_t number_of_disks = 1; """)): def IsValid(self): return (self.magic == 0x07064b50 and self.disk_with_cd == 0 and self.number_of_disks == 1) class ZipInfo(object): def __init__(self, compression_method=0, compress_size=0, file_size=0, filename="", local_header_offset=0, crc32=0, lastmoddate=0, lastmodtime=0): self.compression_method = compression_method self.compress_size = compress_size self.file_size = file_size self.filename = filename self.local_header_offset = local_header_offset self.crc32 = crc32 self.lastmoddate = lastmoddate self.lastmodtime = lastmodtime self.file_header_offset = None def WriteFileHeader(self, backing_store): if self.file_header_offset is None: self.file_header_offset = backing_store.Tell() header = ZipFileHeader( crc32=self.crc32, compress_size=self.compress_size, file_size=self.file_size, file_name_length=len(self.filename), compression_method=self.compression_method, lastmodtime=self.lastmodtime, lastmoddate=self.lastmoddate, extra_field_len=0) extra_header_64 = Zip64FileHeaderExtensibleField() if self.file_size > ZIP32_MAX_SIZE: header.file_size = 0xFFFFFFFF extra_header_64.Set("file_size", self.file_size) if self.compress_size > ZIP32_MAX_SIZE: header.compress_size = 0xFFFFFFFF extra_header_64.Set("compress_size", self.compress_size) # Only write the extra header if we have to. if not extra_header_64.empty(): header.extra_field_len = extra_header_64.sizeof() backing_store.Seek(self.file_header_offset) backing_store.Write(header.Pack()) backing_store.write(utils.SmartStr(self.filename)) if not extra_header_64.empty(): backing_store.Write(extra_header_64.Pack()) def WriteCDFileHeader(self, backing_store): header = CDFileHeader( compression_method=self.compression_method, file_size=self.file_size, compress_size=self.compress_size, relative_offset_local_header=self.local_header_offset, crc32=self.crc32, file_name_length=len(self.filename), dostime=self.lastmodtime, dosdate=self.lastmoddate) extra_header_64 = Zip64FileHeaderExtensibleField() if self.file_size > ZIP32_MAX_SIZE: header.file_size = 0xFFFFFFFF extra_header_64.Set("file_size", self.file_size) if self.compress_size > ZIP32_MAX_SIZE: header.compress_size = 0xFFFFFFFF extra_header_64.Set("compress_size", self.compress_size) if self.local_header_offset > ZIP32_MAX_SIZE: header.relative_offset_local_header = 0xFFFFFFFF extra_header_64.Set("relative_offset_local_header", self.local_header_offset) # Only write the extra header if we have to. if not extra_header_64.empty(): header.extra_field_len = extra_header_64.sizeof() backing_store.write(header.Pack()) backing_store.write(utils.SmartStr(self.filename)) if not extra_header_64.empty(): backing_store.write(extra_header_64.Pack()) class FileWrapper(object): """Maps a slice from a file URN.""" def __init__(self, resolver, file_urn, slice_offset, slice_size): self.file_urn = file_urn self.resolver = resolver self.slice_size = slice_size self.slice_offset = slice_offset self.readptr = 0 def seek(self, offset, whence=0): if whence == 0: self.readptr = offset elif whence == 1: self.readptr += offset elif whence == 2: self.readptr = self.slice_size + offset def tell(self): return self.readptr def read(self, length): with self.resolver.AFF4FactoryOpen(self.file_urn) as fd: fd.seek(self.slice_offset + self.readptr) to_read = min(self.slice_size - self.readptr, length) result = fd.read(to_read) self.readptr += len(result) return result def DecompressBuffer(buffer): """Decompress using deflate a single buffer. We assume the buffer is not too large. """ decompressor = zlib.decompressobj(-15) result = decompressor.decompress(buffer, len(buffer)) return result + decompressor.flush() class ZipFileSegment(aff4_file.FileBackedObject): compression_method = ZIP_STORED def LoadFromURN(self): owner_urn = self.resolver.Get(self.urn, lexicon.AFF4_STORED) with self.resolver.AFF4FactoryOpen(owner_urn) as owner: self.LoadFromZipFile(owner) def LoadFromZipFile(self, owner): """Read the segment data from the ZipFile owner.""" # Parse the ZipFileHeader for this filename. zip_info = owner.members.get(self.urn) if zip_info is None: # The owner does not have this file yet - we add it when closing. self.fd = io.BytesIO() return backing_store_urn = owner.backing_store_urn with self.resolver.AFF4FactoryOpen(backing_store_urn) as backing_store: backing_store.Seek( zip_info.local_header_offset + owner.global_offset, 0) file_header = ZipFileHeader( backing_store.Read(ZipFileHeader.sizeof())) if not file_header.IsValid(): raise IOError("Local file header invalid!") # The filename should be null terminated. file_header_filename = backing_store.Read( file_header.file_name_length).split(b"\x00")[0] if file_header_filename != zip_info.filename: msg = (u"Local filename %s different from " u"central directory %s.") % ( file_header_filename, zip_info.filename) LOGGER.error(msg) raise IOError(msg) backing_store.Seek(file_header.extra_field_len, aff4.SEEK_CUR) buffer_size = zip_info.file_size if file_header.compression_method == ZIP_DEFLATE: # We write the entire file in a memory buffer if we need to # deflate it. self.compression_method = ZIP_DEFLATE c_buffer = backing_store.Read(zip_info.compress_size) decomp_buffer = DecompressBuffer(c_buffer) if len(decomp_buffer) != buffer_size: LOGGER.info("Unable to decompress file %s", self.urn) raise IOError() self.fd = io.BytesIO(decomp_buffer) elif file_header.compression_method == ZIP_STORED: # Otherwise we map a slice into it. self.fd = FileWrapper(self.resolver, backing_store_urn, backing_store.Tell(), buffer_size) else: LOGGER.info("Unsupported compression method.") raise NotImplementedError() def WriteStream(self, stream, progress=None): owner_urn = self.resolver.Get(self.urn, lexicon.AFF4_STORED) with self.resolver.AFF4FactoryOpen(owner_urn) as owner: owner.StreamAddMember( self.urn, stream, compression_method=self.compression_method, progress=progress) def Flush(self): if self.IsDirty(): owner_urn = self.resolver.Get(self.urn, lexicon.AFF4_STORED) with self.resolver.AFF4FactoryOpen(owner_urn) as owner: self.Seek(0) # Copy ourselves into the owner. owner.StreamAddMember( self.urn, self, self.compression_method) super(ZipFileSegment, self).Flush() class ZipFile(aff4.AFF4Volume): def __init__(self, *args, **kwargs): super(ZipFile, self).__init__(*args, **kwargs) self.children = set() # The members of this zip file. Keys is member URN, value is zip info. self.members = {} self.global_offset = 0 def parse_cd(self, backing_store_urn): with self.resolver.AFF4FactoryOpen(backing_store_urn) as backing_store: # Find the End of Central Directory Record - We read about 4k of # data and scan for the header from the end, just in case there is # an archive comment appended to the end. backing_store.Seek(-BUFF_SIZE, 2) ecd_real_offset = backing_store.Tell() buffer = backing_store.Read(BUFF_SIZE) end_cd, buffer_offset = EndCentralDirectory.FromBuffer(buffer) urn_string = None ecd_real_offset += buffer_offset # Fetch the volume comment. if end_cd.comment_len > 0: backing_store.Seek(ecd_real_offset + end_cd.sizeof()) urn_string = backing_store.Read(end_cd.comment_len) LOGGER.info("Loaded AFF4 volume URN %s from zip file.", urn_string) #if end_cd.size_of_cd == 0xFFFFFFFF: # end_cd, buffer_offset = Zip64EndCD.FromBuffer(buffer) #LOGGER.info("Found ECD at %#x", ecd_real_offset) # There is a catch 22 here - before we parse the ZipFile we dont # know the Volume's URN, but we need to know the URN so the # AFF4FactoryOpen() can open it. Therefore we start with a random # URN and then create a new ZipFile volume. After parsing the # central directory we discover our URN and therefore we can delete # the old, randomly selected URN. if urn_string and self.urn != urn_string: self.resolver.DeleteSubject(self.urn) self.urn.Set(utils.SmartUnicode(urn_string)) # Set these triples so we know how to open the zip file again. self.resolver.Set(self.urn, lexicon.AFF4_TYPE, rdfvalue.URN( lexicon.AFF4_ZIP_TYPE)) self.resolver.Set(self.urn, lexicon.AFF4_STORED, rdfvalue.URN( backing_store_urn)) self.resolver.Set(backing_store_urn, lexicon.AFF4_CONTAINS, self.urn) directory_offset = end_cd.offset_of_cd directory_number_of_entries = end_cd.total_entries_in_cd # Traditional zip file - non 64 bit. if directory_offset > 0 and directory_offset != 0xffffffff: # The global difference between the zip file offsets and real # file offsets. This is non zero when the zip file was appended # to another file. self.global_offset = ( # Real ECD offset. ecd_real_offset - end_cd.size_of_cd - # Claimed CD offset. directory_offset) LOGGER.info("Global offset: %#x", self.global_offset) # This is a 64 bit archive, find the Zip64EndCD. else: locator_real_offset = ecd_real_offset - Zip64CDLocator.sizeof() backing_store.Seek(locator_real_offset, 0) locator = Zip64CDLocator( backing_store.Read(Zip64CDLocator.sizeof())) if not locator.IsValid(): raise IOError("Zip64CDLocator invalid or not supported.") # Although it may appear that we can use the Zip64CDLocator to # locate the Zip64EndCD record via it's offset_of_cd record this # is not quite so. If the zip file was appended to another file, # the offset_of_cd field will not be valid, as it still points # to the old offset. In this case we also need to know the # global shift. backing_store.Seek( locator_real_offset - Zip64EndCD.sizeof(), 0) end_cd = Zip64EndCD( backing_store.Read(Zip64EndCD.sizeof())) if not end_cd.IsValid(): LOGGER.error("Zip64EndCD magic not correct @%#x", locator_real_offset - Zip64EndCD.sizeof()) raise RuntimeError("Zip64EndCD magic not correct") directory_offset = end_cd.offset_of_cd directory_number_of_entries = end_cd.number_of_entries_in_volume # The global offset is now known: self.global_offset = ( # Real offset of the central directory. locator_real_offset - Zip64EndCD.sizeof() - end_cd.size_of_cd - # The directory offset in zip file offsets. directory_offset) LOGGER.info("Global offset: %#x", self.global_offset) # Now iterate over the directory and read all the ZipInfo structs. entry_offset = directory_offset for _ in range(directory_number_of_entries): backing_store.Seek(entry_offset + self.global_offset, 0) entry = CDFileHeader( backing_store.Read(CDFileHeader.sizeof())) if not entry.IsValid(): LOGGER.info( "CDFileHeader at offset %#x invalid", entry_offset) raise RuntimeError() zip_info = ZipInfo( filename=backing_store.Read(entry.file_name_length), local_header_offset=entry.relative_offset_local_header, compression_method=entry.compression_method, compress_size=entry.compress_size, file_size=entry.file_size, crc32=entry.crc32, lastmoddate=entry.dosdate, lastmodtime=entry.dostime) # Zip64 local header - parse the Zip64 extended information extra field. # This field isnt a struct, its a serialization #if zip_info.local_header_offset < 0 or zip_info.local_header_offset == 0xffffffff: if entry.extra_field_len > 0: extrabuf = backing_store.Read(entry.extra_field_len) extra, readbytes = Zip64FileHeaderExtensibleField.FromBuffer( entry, extrabuf) extrabuf = extrabuf[readbytes:] if extra.header_id == 1: if extra.Get("relative_offset_local_header") is not None: zip_info.local_header_offset = ( extra.Get("relative_offset_local_header")) if extra.Get("file_size") is not None: zip_info.file_size = extra.Get("file_size") if extra.Get("compress_size") is not None: zip_info.compress_size = extra.Get("compress_size") #break LOGGER.info("Found file %s @ %#x", zip_info.filename, zip_info.local_header_offset) # Store this information in the resolver. Ths allows # segments to be directly opened by URN. member_urn = aff4_utils.urn_from_member_name( zip_info.filename, self.urn) self.resolver.Set( member_urn, lexicon.AFF4_TYPE, rdfvalue.URN( lexicon.AFF4_ZIP_SEGMENT_TYPE)) self.resolver.Set(member_urn, lexicon.AFF4_STORED, self.urn) self.resolver.Set(member_urn, lexicon.AFF4_STREAM_SIZE, rdfvalue.XSDInteger(zip_info.file_size)) self.members[member_urn] = zip_info # Go to the next entry. entry_offset += (entry.sizeof() + entry.file_name_length + entry.extra_field_len + entry.file_comment_length) @staticmethod def NewZipFile(resolver, backing_store_urn): rdfvalue.AssertURN(backing_store_urn) result = ZipFile(resolver, urn=None) resolver.Set(result.urn, lexicon.AFF4_TYPE, rdfvalue.URN(lexicon.AFF4_ZIP_TYPE)) resolver.Set(result.urn, lexicon.AFF4_STORED, rdfvalue.URN(backing_store_urn)) return resolver.AFF4FactoryOpen(result.urn) def CreateMember(self, child_urn): member_filename = aff4_utils.member_name_for_urn(child_urn, self.urn) return self.CreateZipSegment(member_filename) def CreateZipSegment(self, filename): self.MarkDirty() segment_urn = aff4_utils.urn_from_member_name(filename, self.urn) # Is it in the cache? res = self.resolver.CacheGet(segment_urn) if res: return res self.resolver.Set( segment_urn, lexicon.AFF4_TYPE, rdfvalue.URN(lexicon.AFF4_ZIP_SEGMENT_TYPE)) self.resolver.Set(segment_urn, lexicon.AFF4_STORED, self.urn) # Keep track of all the segments we issue. self.children.add(segment_urn) result = ZipFileSegment(resolver=self.resolver, urn=segment_urn) result.LoadFromZipFile(self) LOGGER.info("Creating ZipFileSegment %s", result.urn.SerializeToString()) # Add the new object to the object cache. return self.resolver.CachePut(result) def OpenZipSegment(self, filename): # Is it already in the cache? segment_urn = aff4_utils.urn_from_member_name(filename, self.urn) if segment_urn not in self.members: raise IOError("Segment %s does not exist yet" % filename) res = self.resolver.CacheGet(segment_urn) if res: LOGGER.info("Openning ZipFileSegment (cached) %s", res.urn) return res result = ZipFileSegment(resolver=self.resolver, urn=segment_urn) result.LoadFromZipFile(owner=self) LOGGER.info("Openning ZipFileSegment %s", result.urn) return self.resolver.CachePut(result) def LoadFromURN(self): self.backing_store_urn = self.resolver.Get( self.urn, lexicon.AFF4_STORED) if not self.backing_store_urn: raise IOError("Unable to load backing urn.") try: self.parse_cd(self.backing_store_urn) except IOError: # If we can not parse a CD from the zip file, this is fine, we just # append an AFF4 volume to it, or make a new file. return # Load the turtle metadata. with self.OpenZipSegment("information.turtle") as fd: self.resolver.LoadFromTurtle(fd) def StreamAddMember(self, member_urn, stream, compression_method=ZIP_STORED, progress=None): """An efficient interface to add a new archive member. Args: member_urn: The new member URN to be added. stream: A file-like object (with read() method) that generates data to be written as the member. compression_method: How to compress the member. """ if progress is None: progress = aff4.EMPTY_PROGRESS backing_store_urn = self.resolver.Get(self.urn, lexicon.AFF4_STORED) with self.resolver.AFF4FactoryOpen(backing_store_urn) as backing_store: LOGGER.info("Writing member %s", member_urn) # Append member at the end of the file. backing_store.Seek(0, aff4.SEEK_END) # zip_info offsets are relative to the start of the zip file (take # global_offset into account). zip_info = ZipInfo( local_header_offset=backing_store.Tell() - self.global_offset, filename=aff4_utils.member_name_for_urn(member_urn, self.urn), file_size=0, crc32=0, compression_method=compression_method) # For now we do not support streamed writing so we need to seek back # to this position later with an updated crc32. zip_info.WriteFileHeader(backing_store) if compression_method == ZIP_DEFLATE: zip_info.compression_method = ZIP_DEFLATE compressor = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, zlib.DEFLATED, -15) while True: data = stream.read(BUFF_SIZE) if not data: break c_data = compressor.compress(data) zip_info.compress_size += len(c_data) zip_info.file_size += len(data) # Python 2 erronously returns a signed int here. zip_info.crc32 = zlib.crc32(data, zip_info.crc32) & 0xffffffff backing_store.Write(c_data) progress.Report(zip_info.file_size) # Finalize the compressor. c_data = compressor.flush() zip_info.compress_size += len(c_data) backing_store.Write(c_data) # Just write the data directly. elif compression_method == ZIP_STORED: zip_info.compression_method = ZIP_STORED while True: data = stream.read(BUFF_SIZE) if not data: break zip_info.compress_size += len(data) zip_info.file_size += len(data) # Python 2 erronously returns a signed int here. zip_info.crc32 = zlib.crc32(data, zip_info.crc32) & 0xffffffff progress.Report(zip_info.file_size) backing_store.Write(data) else: raise RuntimeError("Unsupported compression method") # Update the local file header now that CRC32 is calculated. zip_info.WriteFileHeader(backing_store) self.members[member_urn] = zip_info def Flush(self): # If the zip file was changed, re-write the central directory. if self.IsDirty(): # First Flush all our children, but only if they are still in the # cache. while len(self.children): for child in list(self.children): with self.resolver.CacheGet(child) as obj: obj.Flush() self.children.remove(child) # Add the turtle file to the volume. with self.CreateZipSegment(u"information.turtle") as turtle_segment: turtle_segment.compression_method = ZIP_DEFLATE self.resolver.DumpToTurtle(stream=turtle_segment) turtle_segment.Flush() # Write the central directory. self.write_zip64_CD() super(ZipFile, self).Flush() def write_zip64_CD(self): backing_store_urn = self.resolver.Get(self.urn, lexicon.AFF4_STORED) with self.resolver.AFF4FactoryOpen(backing_store_urn) as backing_store: # We write to a memory stream first, and then copy it into the # backing_store at once. This really helps when we have lots of # members in the zip archive. cd_stream = io.BytesIO() # Append a new central directory to the end of the zip file. backing_store.Seek(0, aff4.SEEK_END) # The real start of the ECD. ecd_real_offset = backing_store.Tell() total_entries = len(self.members) for urn, zip_info in list(self.members.items()): LOGGER.info("Writing CD entry for %s", urn) zip_info.WriteCDFileHeader(cd_stream) locator = Zip64CDLocator( offset_of_end_cd=(cd_stream.tell() + ecd_real_offset - self.global_offset)) size_of_cd = cd_stream.tell() end_cd = Zip64EndCD( size_of_header=Zip64EndCD.sizeof()-12, number_of_entries_in_volume=total_entries, number_of_entries_in_total=total_entries, size_of_cd=size_of_cd, offset_of_cd=locator.offset_of_end_cd - size_of_cd) urn_string = self.urn.SerializeToString() end = EndCentralDirectory( total_entries_in_cd_on_disk=len(self.members), total_entries_in_cd=len(self.members), comment_len=len(urn_string)) LOGGER.info("Writing Zip64EndCD at %#x", cd_stream.tell() + ecd_real_offset) cd_stream.write(end_cd.Pack()) cd_stream.write(locator.Pack()) LOGGER.info("Writing ECD at %#x", cd_stream.tell() + ecd_real_offset) cd_stream.write(end.Pack()) cd_stream.write(urn_string) # Now copy the cd_stream into the backing_store in one write # operation. backing_store.write(cd_stream.getvalue()) registry.AFF4_TYPE_MAP[lexicon.AFF4_ZIP_TYPE] = ZipFile registry.AFF4_TYPE_MAP[lexicon.AFF4_ZIP_SEGMENT_TYPE] = ZipFileSegment pyaff4-0.26.post6/pyaff4/data_store.py0000664000175000017500000003704513211617552020104 0ustar rhertzogrhertzogfrom __future__ import print_function from __future__ import unicode_literals # Copyright 2014 Google Inc. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); you may not # use this file except in compliance with the License. You may obtain a copy of # the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations under # the License. from builtins import str from builtins import object import collections import logging import rdflib import re import six from pyaff4 import aff4 from pyaff4 import lexicon from pyaff4 import rdfvalue from pyaff4 import registry from pyaff4 import stream_factory from pyaff4 import utils LOGGER = logging.getLogger("pyaff4") def CHECK(condition, error): if not condition: raise RuntimeError(error) class AFF4ObjectCacheEntry(object): def __init__(self, key, aff4_obj): self.next = self.prev = self self.key = key self.aff4_obj = aff4_obj self.use_count = 0 def unlink(self): self.next.prev = self.prev self.prev.next = self.next self.next = self.prev = self def append(self, entry): CHECK(entry.next == entry.prev, "Appending an element already in the list") entry.next = self.next self.next.prev = entry entry.prev = self self.next = entry def __iter__(self): entry = self.next while entry != self: yield entry entry = entry.next class AFF4ObjectCache(object): def __init__(self, max_items): self.max_items = max_items self.in_use = {} self.lru_map = {} self.lru_list = AFF4ObjectCacheEntry(None, None) self.volume_file_map = {} def _Trim(self, size=None): max_items = size or self.max_items while len(self.lru_map) > max_items: older_item = self.lru_list.prev LOGGER.debug("Trimming %s from cache" % older_item.key) self.lru_map.pop(older_item.key) older_item.unlink() # Ensure we flush the trimmed objects. older_item.aff4_obj.Flush() def Put(self, aff4_obj, in_use_state=False): key = aff4_obj.urn.SerializeToString() CHECK(key not in self.in_use, "Object %s Put in cache while already in use." % key) CHECK(key not in self.lru_map, "Object %s Put in cache while already in cache." % key) entry = AFF4ObjectCacheEntry(key, aff4_obj) if in_use_state: entry.use_count = 1 self.in_use[key] = entry return self.lru_list.append(entry) self.lru_map[key] = entry self._Trim() def Get(self, urn): key = rdfvalue.URN(urn).SerializeToString() entry = self.in_use.get(key) if entry is not None: entry.use_count += 1 return entry.aff4_obj # Hold onto the entry. entry = self.lru_map.pop(key, None) if entry is None: return None entry.use_count = 1 # Remove it from the LRU list. entry.unlink() self.in_use[key] = entry return entry.aff4_obj def Return(self, aff4_obj): key = aff4_obj.urn.SerializeToString() entry = self.in_use.get(key) CHECK(entry is not None, "Object %s Returned to cache, but it is not in use!" % key) CHECK(entry.use_count > 0, "Returned object %s is not used." % key) entry.use_count -= 1 if entry.use_count == 0: self.lru_list.append(entry) self.lru_map[key] = entry self.in_use.pop(key) self._Trim() def Remove(self, aff4_obj): key = aff4_obj.urn.SerializeToString() entry = self.lru_map.pop(key, None) if entry is not None: entry.unlink() entry.aff4_obj.Flush() return # Is the item in use? entry = self.in_use.pop(key, None) if entry is not None: entry.unlink() entry.aff4_obj.Flush() return CHECK(False, "Object %s removed from cache, but was never there." % key) def Dump(self): # Now dump the objects in use. print("Objects in use:") for key, entry in list(self.in_use.items()): print("%s - %s" % (key, entry.use_count)) print("Objects in cache:") for entry in self.lru_list: print("%s - %s" % (entry.key, entry.use_count)) def Flush(self): # It is an error to flush the object cache while there are still items # in use. if len(self.in_use): self.Dump() CHECK(len(self.in_use) == 0, "ObjectCache flushed while some objects in use!") # First flush all objects without deleting them since some flushed # objects may still want to use other cached objects. It is also # possible that new objects are added during object deletion. Therefore # we keep doing it until all objects are clean. while 1: dirty_objects_found = False for it in self.lru_list: if it.aff4_obj.IsDirty(): dirty_objects_found = True it.aff4_obj.Flush() if not dirty_objects_found: break # Now delete all entries. for it in list(self.lru_map.values()): it.unlink() # Clear the map. self.lru_map.clear() class MemoryDataStore(object): aff4NS = None def __init__(self, lex=lexicon.standard): self.lexicon = lex self.suppressed_rdftypes = dict() self.suppressed_rdftypes[lexicon.AFF4_ZIP_SEGMENT_TYPE] = set(( lexicon.AFF4_STORED, lexicon.AFF4_TYPE)) self.suppressed_rdftypes[lexicon.AFF4_ZIP_TYPE] = set(( lexicon.AFF4_STORED, lexicon.AFF4_TYPE)) self.store = collections.OrderedDict() self.ObjectCache = AFF4ObjectCache(10) self.flush_callbacks = {} if self.lexicon == lexicon.legacy: self.streamFactory = stream_factory.PreStdStreamFactory( self, self.lexicon) else: self.streamFactory = stream_factory.StdStreamFactory( self, self.lexicon) def __enter__(self): return self def __exit__(self, exc_type, exc_value, traceback): self.Flush() def Flush(self): # Flush and expunge the cache. self.ObjectCache.Flush() for cb in list(self.flush_callbacks.values()): cb() def DeleteSubject(self, subject): self.store.pop(rdfvalue.URN(subject), None) def Add(self, subject, attribute, value): subject = rdfvalue.URN(subject).SerializeToString() attribute = rdfvalue.URN(attribute).SerializeToString() CHECK(isinstance(value, rdfvalue.RDFValue), "Value must be an RDFValue") if attribute not in self.store.setdefault( subject, collections.OrderedDict()): self.store.get(subject)[attribute] = value else: oldvalue = self.store.get(subject)[attribute] t = type(oldvalue) if t != type([]): self.store.get(subject)[attribute] = [oldvalue, value] else: self.store.get(subject)[attribute].append(value) def Set(self, subject, attribute, value): subject = rdfvalue.URN(subject).SerializeToString() attribute = rdfvalue.URN(attribute).SerializeToString() CHECK(isinstance(value, rdfvalue.RDFValue), "Value must be an RDFValue") self.store.setdefault(subject, {})[attribute] = value def Get(self, subject, attribute): subject = rdfvalue.URN(subject).SerializeToString() attribute = rdfvalue.URN(attribute).SerializeToString() return self.store.get(subject, {}).get(attribute) def CacheGet(self, urn): result = self.ObjectCache.Get(urn) if result is None: result = aff4.NoneObject("Not present") return result def CachePut(self, obj): self.ObjectCache.Put(obj, True) return obj def Return(self, obj): #LOGGER.debug("Returning %s" % obj.urn) self.ObjectCache.Return(obj) def Close(self, obj): self.ObjectCache.Remove(obj) def DumpToTurtle(self, stream=None, verbose=False): g = rdflib.Graph() for urn, items in self.store.items(): urn = rdflib.URIRef(utils.SmartUnicode(urn)) type = items.get(utils.SmartStr(lexicon.AFF4_TYPE)) if type is None: continue for attr, value in list(items.items()): attr = utils.SmartUnicode(attr) # We suppress certain facts which can be deduced from the file # format itself. This ensures that we do not have conflicting # data in the data store. The data in the data store is a # combination of explicit facts and implied facts. if not verbose: if attr.startswith(lexicon.AFF4_VOLATILE_NAMESPACE): continue if attr in self.suppressed_rdftypes.get(type, ()): continue attr = rdflib.URIRef(attr) if not isinstance(value, list): value = [value] for item in value: g.add((urn, attr, item.GetRaptorTerm())) result = g.serialize(format='turtle') if stream: stream.write(result) return result def LoadFromTurtle(self, stream): data = stream.read(1000000) g = rdflib.Graph() g.parse(data=data, format="turtle") for urn, attr, value in g: urn = utils.SmartUnicode(urn) attr = utils.SmartUnicode(attr) serialized_value = value if isinstance(value, rdflib.URIRef): value = rdfvalue.URN(utils.SmartUnicode(serialized_value)) elif value.datatype in registry.RDF_TYPE_MAP: dt = value.datatype value = registry.RDF_TYPE_MAP[value.datatype]( serialized_value) else: # Default to a string literal. value = rdfvalue.XSDString(value) self.Add(urn, attr, value) # look for the AFF4 namespace defined in the turtle for (_, b) in g.namespace_manager.namespaces(): if (str(b) == lexicon.AFF4_NAMESPACE or str(b) == lexicon.AFF4_LEGACY_NAMESPACE): self.aff4NS = b def AFF4FactoryOpen(self, urn): urn = rdfvalue.URN(urn) # Is the object cached? cached_obj = self.ObjectCache.Get(urn) if cached_obj: cached_obj.Prepare() #LOGGER.debug("AFF4FactoryOpen (Cached): %s" % urn) return cached_obj if self.streamFactory.isSymbolicStream(urn): obj = self.streamFactory.createSymbolic(urn) else: uri_types = self.Get(urn, lexicon.AFF4_TYPE) handler = None # TODO: this could be cleaner. RDF properties have multiple values if type(uri_types) == type([]): for typ in uri_types: if typ in registry.AFF4_TYPE_MAP: handler = registry.AFF4_TYPE_MAP.get(typ) break else: handler = registry.AFF4_TYPE_MAP.get(uri_types) if handler is None: # Try to instantiate the handler based on the URN scheme alone. components = urn.Parse() handler = registry.AFF4_TYPE_MAP.get(components.scheme) if handler is None: raise IOError("Unable to create object %s" % urn) obj = handler(resolver=self, urn=urn) obj.LoadFromURN() # Cache the object for next time. self.ObjectCache.Put(obj, True) #LOGGER.debug("AFF4FactoryOpen (new instance): %s" % urn) obj.Prepare() return obj def Dump(self, verbose=False): print(self.DumpToTurtle(verbose=verbose)) self.ObjectCache.Dump() def isImageStream(self, subject): try: po = self.store[subject] if po == None: return False else: o = po[lexicon.AFF4_TYPE] if o == None: return False else: if type(o) == type([]): for ent in o: if ent.value == lexicon.AFF4_LEGACY_IMAGE_TYPE or ent.value == lexicon.AFF4_IMAGE_TYPE : return True return False else: if o.value == lexicon.AFF4_LEGACY_IMAGE_TYPE or o.value == lexicon.AFF4_IMAGE_TYPE : return True else: return False except: return False def QuerySubject(self, subject_regex=None): subject_regex = re.compile(utils.SmartStr(subject_regex)) for subject in self.store: if subject_regex is not None and subject_regex.match(subject): yield rdfvalue.URN().UnSerializeFromString(subject) def QueryPredicate(self, predicate): """Yields all subjects which have this predicate.""" predicate = utils.SmartStr(predicate) for subject, data in six.iteritems(self.store): for pred, values in six.iteritems(data): if pred == predicate: if type(values) != type([]): values = [values] for value in values: yield (rdfvalue.URN().UnSerializeFromString(subject), rdfvalue.URN().UnSerializeFromString(predicate), value) def QueryPredicateObject(self, predicate, object): predicate = utils.SmartStr(predicate) for subject, data in list(self.store.items()): for pred, value in list(data.items()): if pred == predicate: if type(value) != type([]): value = [value] if object in value: yield rdfvalue.URN().UnSerializeFromString(subject) def QuerySubjectPredicate(self, subject, predicate): subject = utils.SmartStr(subject) predicate = utils.SmartStr(predicate) for s, data in six.iteritems(self.store): if s == subject: for pred, value in six.iteritems(data): if pred == predicate: if type(value) != type([]): value = [value] for o in value: yield o def SelectSubjectsByPrefix(self, prefix): # Keys are bytes. prefix = utils.SmartStr(prefix) for subject in self.store: if subject.startswith(prefix): yield rdfvalue.URN().UnSerializeFromString(subject) def QueryPredicatesBySubject(self, subject): subject = utils.SmartStr(subject) for pred, value in list(self.store.get(subject, {}).items()): yield (rdfvalue.URN().UnSerializeFromString(pred), value) pyaff4-0.26.post6/pyaff4/hashes.py0000664000175000017500000000362113211617552017223 0ustar rhertzogrhertzogfrom __future__ import unicode_literals # Copyright 2016,2017 Schatz Forensic Pty Ltd. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); you may not # use this file except in compliance with the License. You may obtain a copy of # the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations under # the License. from pyaff4 import lexicon from pyaff4.rdfvalue import * import hashlib import pyblake2 def new(datatype): return map[datatype]() def newImmutableHash(value, datatype): if datatype == lexicon.HASH_SHA1: h = SHA1Hash() elif datatype == lexicon.HASH_MD5: h = MD5Hash() elif datatype == lexicon.HASH_SHA512: h = SHA512Hash() elif datatype == lexicon.HASH_SHA256: h = SHA256Hash() elif datatype == lexicon.HASH_BLAKE2B: h = Blake2bHash() elif datatype == lexicon.HASH_BLOCKMAPHASH_SHA512: h = SHA512BlockMapHash() else: raise Exception h.Set(value) return h def toShortAlgoName(datatype): return map[datatype]().name def fromShortName(name): return nameMap[name] def length(datatype): return map[datatype]().digest_size map = { lexicon.HASH_SHA1: hashlib.sha1, lexicon.HASH_SHA256: hashlib.sha256, lexicon.HASH_SHA512: hashlib.sha512, lexicon.HASH_MD5: hashlib.md5, lexicon.HASH_BLAKE2B: pyblake2.blake2b } nameMap = { "md5" : lexicon.HASH_MD5, "sha1" : lexicon.HASH_SHA1, "sha256" : lexicon.HASH_SHA256, "sha512" : lexicon.HASH_SHA512, "blake2b" : lexicon.HASH_BLAKE2B, "blockMapHashSHA512" : lexicon.HASH_BLOCKMAPHASH_SHA512 }pyaff4-0.26.post6/pyaff4/rdfvalue_test.py0000664000175000017500000000361313211617552020620 0ustar rhertzogrhertzogfrom __future__ import unicode_literals from pyaff4 import rdfvalue import unittest class URNTest(unittest.TestCase): def testXSDInt(self): i1 = rdfvalue.XSDInteger("100") self.assertLess(99, i1) self.assertEqual(100, i1) self.assertGreater(101, 100) self.assertGreater(101, i1) self.assertTrue(99 < i1) self.assertTrue(101 > i1) self.assertTrue(100 == i1) def testURN(self): url = "http://www.google.com/path/to/element#hash_data" self.assertEquals(rdfvalue.URN(url), url) self.assertEquals(rdfvalue.URN("//etc/passwd"), "file://etc/passwd") def testTrailingSlashURN(self): url = "http://code.google.com/p/snappy/" test = rdfvalue.URN(url) self.assertEquals(test.SerializeToString(), b"http://code.google.com/p/snappy/") def testAppend(self): test = rdfvalue.URN("http://www.google.com") self.assertEquals(test.Append("foobar").SerializeToString(), b"http://www.google.com/foobar") self.assertEquals(test.Append("/foobar").SerializeToString(), b"http://www.google.com/foobar") self.assertEquals(test.Append("..").SerializeToString(), b"http://www.google.com/") self.assertEquals(test.Append("../../../..").SerializeToString(), b"http://www.google.com/") self.assertEquals(test.Append("aa/bb/../..").SerializeToString(), b"http://www.google.com/") self.assertEquals(test.Append("aa//../c").SerializeToString(), b"http://www.google.com/c") self.assertEquals( test.Append("aa///////////.///./c").SerializeToString(), b"http://www.google.com/aa/c") if __name__ == '__main__': unittest.main() pyaff4-0.26.post6/pyaff4/symbolic_streams.py0000664000175000017500000000555713211617552021341 0ustar rhertzogrhertzogfrom __future__ import division from __future__ import absolute_import from __future__ import unicode_literals # Copyright 2016,2017 Schatz Forensic Pty Ltd. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); you may not # use this file except in compliance with the License. You may obtain a copy of # the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations under # the License. from builtins import str from past.utils import old_div from pyaff4 import aff4 from pyaff4 import utils import sys import binascii import math class RepeatedStream(aff4.AFF4Stream): def __init__(self, resolver=None, urn=None, symbol=b"\x00"): super(RepeatedStream, self).__init__( resolver=resolver, urn=urn) self.symbol = symbol def Read(self, length): return self.symbol * length def Write(self, data): raise NotImplementedError() def WriteStream(self, source): raise NotImplementedError() def Tell(self): return self.readptr def Size(self): return sys.maxsize def read(self, length=1024*1024): return self.Read(length) def seek(self, offset, whence=0): self.Seek(offset, whence=whence) def write(self, data): self.Write(data) def tell(self): return self.Tell() def flush(self): self.Flush() def Prepare(self): self.Seek(0) class RepeatedStringStream(aff4.AFF4Stream): def __init__(self, resolver=None, urn=None, repeated_string=None): super(RepeatedStringStream, self).__init__( resolver=resolver, urn=urn) self.tile = repeated_string self.tilesize = len(self.tile) def Read(self, length): toRead = length res = b"" while toRead > 0: offsetInTile = self.readptr % self.tilesize chunk = self.tile[offsetInTile : offsetInTile + toRead] res += chunk toRead -= len(chunk) self.readptr += len(chunk) return res def Write(self, data): raise NotImplementedError() def WriteStream(self, source): raise NotImplementedError() def Tell(self): return self.readptr def Size(self): return sys.maxsize def read(self, length=1024*1024): return self.Read(length) def seek(self, offset, whence=0): self.Seek(offset, whence=whence) def write(self, data): self.Write(data) def tell(self): return self.Tell() def flush(self): self.Flush() def Prepare(self): self.Seek(0) pyaff4-0.26.post6/pyaff4/lexicon.py0000664000175000017500000002063713211617552017417 0ustar rhertzogrhertzog# Copyright 2014 Google Inc. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); you may not # use this file except in compliance with the License. You may obtain a copy of # the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations under # the License. """The AFF4 lexicon.""" from __future__ import unicode_literals # This is the version of the AFF4 specification we support - not the library # version itself. from builtins import object import rdflib AFF4_VERSION = "0.2" AFF4_MAX_READ_LEN = 1024*1024*100 AFF4_NAMESPACE = "http://aff4.org/Schema#" AFF4_LEGACY_NAMESPACE = "http://afflib.org/2009/aff4#" XSD_NAMESPACE = "http://www.w3.org/2001/XMLSchema#" RDF_NAMESPACE = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" AFF4_MEMORY_NAMESPACE = "http://aff4.org/Schema#memory/" AFF4_DISK_NAMESPACE = "http://aff4.org/Schema#disk/" # Attributes in this namespace will never be written to persistant # storage. They are simply used as a way for storing metadata about an AFF4 # object internally. AFF4_VOLATILE_NAMESPACE = "http://aff4.org/VolatileSchema#" # The configuration space of the library itself. All these should be volatile # and therefore not persistant or interoperable with other AFF4 implementations. AFF4_CONFIG_NAMESPACE = AFF4_NAMESPACE + "config" # Location of the cache (contains AFF4_FILE_NAME) AFF4_CONFIG_CACHE_DIR = AFF4_CONFIG_NAMESPACE + "/cache" # Commonly used RDF types. URNType = "URN" XSDStringType = (XSD_NAMESPACE + "string") RDFBytesType = (XSD_NAMESPACE + "hexBinary") XSDIntegerType = (XSD_NAMESPACE + "integer") XSDIntegerTypeInt = (XSD_NAMESPACE + "int") XSDIntegerTypeLong = (XSD_NAMESPACE + "long") XSDBooleanType = (XSD_NAMESPACE + "boolean") # Attribute names for different AFF4 objects. # Base AFF4Object AFF4_TYPE = (RDF_NAMESPACE + "type") AFF4_STORED = (AFF4_NAMESPACE + "stored") AFF4_CONTAINS = (AFF4_NAMESPACE + "contains") # Each container should have this file which contains the URN of the container. AFF4_CONTAINER_DESCRIPTION = "container.description" AFF4_CONTAINER_INFO_TURTLE = "information.turtle" AFF4_CONTAINER_INFO_YAML = "information.yaml" # AFF4 ZipFile containers. AFF4_ZIP_TYPE = (AFF4_NAMESPACE + "zip_volume") # AFF4Stream AFF4_STREAM_SIZE = (AFF4_NAMESPACE + "size") AFF4_LEGACY_STREAM_SIZE = (AFF4_LEGACY_NAMESPACE + "size") # The original filename the stream had. AFF4_STREAM_ORIGINAL_FILENAME = (AFF4_NAMESPACE + "original_filename") # Can be "read", "truncate", "append" AFF4_STREAM_WRITE_MODE = (AFF4_VOLATILE_NAMESPACE + "writable") # FileBackedObjects are either marked explicitly or using the file:// scheme. AFF4_FILE_TYPE = (AFF4_NAMESPACE + "file") # file:// based URNs do not always have a direct mapping to filesystem # paths. This volatile attribute is used to control the filename mapping. AFF4_FILE_NAME = (AFF4_VOLATILE_NAMESPACE + "filename") # The original filename the stream had. AFF4_STREAM_ORIGINAL_FILENAME = (AFF4_NAMESPACE + "original_filename") # ZipFileSegment AFF4_ZIP_SEGMENT_TYPE = (AFF4_NAMESPACE + "zip_segment") # AFF4 Image Stream - stores a stream using Bevies. AFF4_IMAGE_TYPE = (AFF4_NAMESPACE + "ImageStream") AFF4_LEGACY_IMAGE_TYPE = (AFF4_LEGACY_NAMESPACE + "stream") AFF4_SCUDETTE_IMAGE_TYPE = (AFF4_NAMESPACE + "image") AFF4_IMAGE_CHUNK_SIZE = (AFF4_NAMESPACE + "chunkSize") AFF4_LEGACY_IMAGE_CHUNK_SIZE = (AFF4_LEGACY_NAMESPACE + "chunkSize") AFF4_IMAGE_CHUNKS_PER_SEGMENT = (AFF4_NAMESPACE + "chunksInSegment") AFF4_LEGACY_IMAGE_CHUNKS_PER_SEGMENT = (AFF4_LEGACY_NAMESPACE + "chunksInSegment") AFF4_IMAGE_COMPRESSION = (AFF4_NAMESPACE + "compressionMethod") AFF4_LEGACY_IMAGE_COMPRESSION = (AFF4_LEGACY_NAMESPACE + "CompressionMethod") AFF4_IMAGE_COMPRESSION_ZLIB = "https://www.ietf.org/rfc/rfc1950.txt" AFF4_IMAGE_COMPRESSION_SNAPPY = "http://code.google.com/p/snappy/" AFF4_IMAGE_COMPRESSION_SNAPPY_SCUDETTE = "https://github.com/google/snappy" AFF4_IMAGE_COMPRESSION_STORED = (AFF4_NAMESPACE + "compression/stored") # AFF4Map - stores a mapping from one stream to another. AFF4_MAP_TYPE = (AFF4_NAMESPACE + "Map") AFF4_LEGACY_MAP_TYPE = (AFF4_LEGACY_NAMESPACE + "map") AFF4_SCUDETTE_MAP_TYPE = (AFF4_NAMESPACE + "map") # Categories describe the general type of an image. AFF4_CATEGORY = (AFF4_NAMESPACE + "category") # These represent standard attributes to describe memory forensics images. AFF4_MEMORY_PHYSICAL = (AFF4_MEMORY_NAMESPACE + "physical") AFF4_MEMORY_VIRTUAL = (AFF4_MEMORY_NAMESPACE + "virtual") AFF4_MEMORY_PAGEFILE = (AFF4_MEMORY_NAMESPACE + "pagefile") AFF4_MEMORY_PAGEFILE_NUM = (AFF4_MEMORY_NAMESPACE + "pagefile_number") AFF4_DISK_RAW = (AFF4_DISK_NAMESPACE + "raw") AFF4_DISK_PARTITION = (AFF4_DISK_NAMESPACE + "partition") AFF4_DIRECTORY_TYPE = (AFF4_NAMESPACE + "directory") #The constant stream is a psuedo stream which just returns a constant. AFF4_CONSTANT_TYPE = (AFF4_NAMESPACE + "constant") # The constant to repeat (default 0). AFF4_CONSTANT_CHAR = (AFF4_NAMESPACE + "constant_char") # An AFF4 Directory stores all members as files on the filesystem. Some # filesystems can not represent the URNs properly, hence we need a mapping # between the URN and the filename. This attribute stores the _relative_ path # of the filename for the member URN relative to the container's path. AFF4_DIRECTORY_CHILD_FILENAME = (AFF4_NAMESPACE + "directory/filename") HASH_SHA512 = rdflib.URIRef("http://aff4.org/Schema#SHA512") HASH_SHA256 = rdflib.URIRef("http://aff4.org/Schema#SHA256") HASH_SHA1 = rdflib.URIRef("http://aff4.org/Schema#SHA1") HASH_MD5 = rdflib.URIRef("http://aff4.org/Schema#MD5") HASH_BLAKE2B = rdflib.URIRef("http://aff4.org/Schema#Blake2b") HASH_BLOCKMAPHASH_SHA512 = rdflib.URIRef("http://aff4.org/Schema#blockMapHashSHA512") class Lexicon(object): def __init__(self): pass class StdLexicon(Lexicon): base = AFF4_NAMESPACE map = base + "Map" Image = base + "Image" stored = base + "stored" target = base + "target" contains = base + "contains" dataStream = base + "dataStream" blockMapHash = base + "blockMapHash" dependentStream = base + "dependentStream" mapPointHash = base + "mapPointHash" mapIdxHash = base + "mapIdxHash" mapPathHash = base + "mapPathHash" blockHashesHash = base + "blockHashesHash" mapHash = base + "mapHash" hash = base + "hash" chunksPerSegment = base + "chunksInSegment" chunkSize = base + "chunkSize" streamSize = base + "size" compressionMethod = base + "compressionMethod" memoryPageTableEntryOffset = base + "memoryPageTableEntryOffset" ntKernelBase = base + "NTKernelBase" OSXKernelPhysicalOffset = base + "OSXKernelPhysicalOffset" OSXKALSRSlide = base + "OSXKALSRSlide" OSXDTBPhysicalOffset = base + "OSXDTBPhysicalOffset" class LegacyLexicon(Lexicon): base = AFF4_LEGACY_NAMESPACE map = base + "map" stored = base + "stored" Image = base + "Image" blockHashesHash = base + "blockHashesHash" mapPointHash = base + "mapPointHash" mapIdxHash = base + "mapIdxHash" mapPathHash = base + "mapPathHash" mapHash = base + "mapHash" hash = base + "hash" chunksPerSegment = base + "chunksInSegment" chunkSize = base + "chunkSize" streamSize = base + "size" compressionMethod = base + "CompressionMethod" class ScudetteLexicon(Lexicon): base = AFF4_NAMESPACE map = base + "map" stored = base + "stored" Image = base + "Image" blockHashesHash = base + "blockHashesHash" mapPointHash = base + "mapPointHash" mapIdxHash = base + "mapIdxHash" mapPathHash = base + "mapPathHash" mapHash = base + "mapHash" hash = base + "hash" chunksPerSegment = base + "chunks_per_segment" chunkSize = base + "chunk_size" streamSize = base + "size" compressionMethod = base + "compression" category = base + "category" memoryPhysical = "http://aff4.org/Schema#memory/physical" legacy = LegacyLexicon() standard = StdLexicon() scudette = ScudetteLexicon() def AutoResolveAttribute(resolver, urn, attribute): """Iterate over all lexicons to autodetect the attribute.""" for lexicon in (standard, scudette, legacy): result = resolver.Get(urn, getattr(lexicon, attribute)) if result is not None: return result pyaff4-0.26.post6/pyaff4/test_memory.py0000664000175000017500000000557213211617552020326 0ustar rhertzogrhertzogfrom __future__ import print_function from __future__ import unicode_literals # Copyright 2016,2017 Schatz Forensic Pty Ltd. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); you may not # use this file except in compliance with the License. You may obtain a copy of # the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations under # the License. from builtins import str import unittest from pyaff4 import data_store from pyaff4 import lexicon from pyaff4 import plugins from pyaff4 import rdfvalue from pyaff4 import zip from pyaff4 import hashes from pyaff4.block_hasher import * from pyaff4.linear_hasher import * from volatility.conf import ConfObject from volatility.plugins.addrspaces import aff4 class ValidatorTest(unittest.TestCase): referenceImagesPath = "/Users/bradley/Desktop/LowrieSR/" referenceImagesPath2 = "/Users/bradley/Desktop/Images/" memoryImage= referenceImagesPath + "SRLowrie.MacBook.New.pmem.af4" memoryImage2 = referenceImagesPath2 + "MaverickPMem_PhysicalMemory.aff4" memoryImage3 = referenceImagesPath2 + "win10.aff4" def testLinearHashPreStdLinearImage(self): lex = Container.identify(self.memoryImage) resolver = data_store.MemoryDataStore(lex) validator = LinearHasher() hash = validator.hash(self.memoryImage, "aff4://ad0c6ce0-1e1c-4e8f-8114-7f876237138f/dev/pmem", lexicon.HASH_SHA1) print(dir(hash)) print(hash.value) self.assertEqual(hash.value, "5d5f183ae7355b8dc8938b67aab77c0215c29ab4") def testLinearHashPreStdLinearImage2(self): lex = Container.identify(self.memoryImage2) resolver = data_store.MemoryDataStore(lex) validator = LinearHasher() hash = validator.hash(self.memoryImage2, "aff4://a862d4b0-ff3d-4ccf-a1e9-5316a4f7b8fe", lexicon.HASH_SHA1) print(dir(hash)) print(hash.value) self.assertEqual(hash.value, "8667a82bafa7b4b3513838aac31bbd20498afe3f") def testOpenMemoryImage3(self): cont = Container.open(self.memoryImage3) for run in cont.GetRanges(): print(str(run)) address = 0x2b708ac baseBlock = 0x2900000 offset = address - baseBlock tenM = 10 * 1024 * 1024 cont.seek(baseBlock) data = cont.read(tenM) self.assertTrue(len(data) == tenM) idle = data[offset:offset+4] self.assertEqual(idle, "Idle") def testOpenPMEMMacImage(self): cont = Container.open(self.memoryImage) def testOpenRekallWin10Image(self): cont = Container.open(self.memoryImage3) pyaff4-0.26.post6/pyaff4/aff4.py0000664000175000017500000002070113211617552016566 0ustar rhertzogrhertzog# Copyright 2014 Google Inc. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); you may not # use this file except in compliance with the License. You may obtain a copy of # the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations under # the License. """This is the python AFF4 library.""" from __future__ import division from __future__ import unicode_literals from builtins import next from builtins import str from past.utils import old_div from builtins import object import platform import sys import time import uuid import weakref from pyaff4 import rdfvalue from pyaff4 import lexicon class NoneObject(object): """ A magical object which is like None but swallows bad dereferences, __getattr__, iterators etc to return itself. Instantiate with the reason for the error. """ def __init__(self, reason="", *args, **_): # Often None objects are instantiated on purpose so its not really that # important to see their reason. self.reason = reason self.args = args def __str__(self): return str(self).encode('utf-8') def __unicode__(self): return self.FormatReason() def FormatReason(self): if "%" in self.reason: return self.reason % self.args else: return self.reason.format(*self.args) def __repr__(self): return "<%s>" % self.FormatReason() ## Behave like an empty set def __iter__(self): return iter([]) def __len__(self): return 0 def __getattr__(self, attr): # By returning self for any unknown attribute and ensuring the self is # callable, we cover both properties and methods Override NotImplemented # functions in object with self return self def __bool__(self): return False def __bool__(self): return False # Comparisons. def __eq__(self, other): return other is None def __ne__(self, other): return other is not None def __gt__(self, _): return False __lt__ = __gt__ __le__ = __gt__ __ge__ = __gt__ ## Make us subscriptable obj[j] def __getitem__(self, item): return self def __call__(self, *arg, **kwargs): return self def __int__(self): return -1 __add__ = __call__ __sub__ = __call__ __mul__ = __call__ __floordiv__ = __call__ __mod__ = __call__ __div__ = __call__ __divmod__ = __call__ __pow__ = __call__ __lshift__ = __call__ __rshift__ = __call__ __and__ = __call__ __xor__ = __call__ __or__ = __call__ __radd__ = __call__ __rsub__ = __call__ __rmul__ = __call__ __rfloordiv__ = __call__ __rmod__ = __call__ __rdivmod__ = __call__ __rpow__ = __call__ __rlshift__ = __call__ __rrshift__ = __call__ __rand__ = __call__ __rxor__ = __call__ __ror__ = __call__ # Override these methods too. dereference_as = __call__ __getitem__ = __call__ def __enter__(self): return self def __exit__(self, exc_type, exc_value, traceback): # Do not allow exceptions to be propagated through us. return True # Keep track of all the AFF4 objects which are alive right now. This helps in # debugging memory leaks. AFF4_OBJECT_REFS = {} class AFF4StreamProperties(object): seekable = True sizeable = True writable = False class AFF4VolumeProperties(object): supports_compression = True writable = False files_are_directories = True class AFF4Object(object): def __init__(self, resolver, urn=None): self.resolver = resolver self._dirty = False if urn is None: urn = "aff4://%s" % uuid.uuid4() self.urn = rdfvalue.URN(urn) AFF4_OBJECT_REFS[id(self)] = weakref.proxy( self, lambda _, id=id(self), ref=AFF4_OBJECT_REFS: ref.pop(id)) def __enter__(self): return self def __exit__(self, exc_type, exc_value, traceback): # Return ourselves to the resolver cache. self.resolver.Return(self) def LoadFromURN(self): raise NotImplementedError def Prepare(self): pass def Flush(self): self._dirty = False def IsDirty(self): return self._dirty def MarkDirty(self): self._dirty = True class AFF4Volume(AFF4Object): def __init__(self, *args, **kwargs): super(AFF4Volume, self).__init__(*args, **kwargs) self.properties = AFF4VolumeProperties() def CreateMember(self, child): raise NotImplementedError class Image(AFF4Object): def __init__(self, *args, **kwargs): super(Image, self).__init__(*args, **kwargs) def getDTB(self): # UGLY: At the moment, the CR3 returned by MacPMEM doesnt seem to work, so we need to rely on scanning # that being the case, we dont return the dtb if it is OSX kaslrSlide = list(self.resolver.QuerySubjectPredicate(self.urn, lexicon.standard.OSXKALSRSlide)) if len(kaslrSlide) == 1 and int(kaslrSlide[0]) != 0: # it is Mac OS return 0 else: try: dtb = next(self.resolver.QuerySubjectPredicate(self.urn, lexicon.standard.memoryPageTableEntryOffset)) return int(dtb) except: # some early images generated by Rekall don't contain a CR3 return 0 SEEK_SET = 0 SEEK_CUR = 1 SEEK_END = 2 class AFF4Stream(AFF4Object): readptr = 0 size = 0 def __init__(self, *args, **kwargs): super(AFF4Stream, self).__init__(*args, **kwargs) self.properties = AFF4StreamProperties() def Read(self, length): raise NotImplementedError() def Write(self, data): raise NotImplementedError() def WriteStream(self, source): """Writes into this stream from a stream. The stream is a file-like object with read and tell() methods. """ raise NotImplementedError() def Seek(self, offset, whence=0): if whence == SEEK_SET: self.readptr = offset elif whence == SEEK_CUR: self.readptr += offset elif whence == SEEK_END: self.readptr = offset + self.Size() if self.readptr < 0: self.readptr = 0 def Tell(self): return self.readptr def Size(self): return self.size def read(self, length=1024*1024): return self.Read(length) def seek(self, offset, whence=0): self.Seek(offset, whence=whence) def write(self, data): self.Write(data) def tell(self): return self.Tell() def flush(self): self.Flush() def Prepare(self): self.Seek(0) class ProgressContext(object): last_time = 0 last_offset = 0 # The following are set in advance by users in order to get accurate # progress reports. # Start offset of this current range. start = 0 length = 0 def __init__(self, length=0): self.length = length self.last_time = self.now() def now(self): return time.time() * 1e6 def Report(self, readptr): """This will be called periodically to report the progress. Note that readptr is specified relative to the start of the range operation (WriteStream and CopyToStream) """ readptr = readptr + self.start now = self.now() if now > self.last_time + old_div(1000000,4): # Rate in MB/s. rate = ((readptr - self.last_offset) / (now - self.last_time) * 1000000 / 1024/1024) sys.stdout.write(" Reading %sMiB / %sMiB %s MiB/s\r\n" % ( readptr/1024/1024, self.length/1024/1024, rate)) sys.stdout.flush() self.last_time = now self.last_offset = readptr if aff4_abort_signaled: sys.stdout.write("\n\nAborted!\n") raise RuntimeError("Aborted") aff4_abort_signaled = False class EmptyProgressContext(ProgressContext): def Report(self, _): pass DEFAULT_PROGRESS = ProgressContext() EMPTY_PROGRESS = EmptyProgressContext() WIN32 = platform.system() == "Windows" pyaff4-0.26.post6/pyaff4/aff4_directory.py0000664000175000017500000001576713211617552020672 0ustar rhertzogrhertzog# Copyright 2014 Google Inc. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); you may not # use this file except in compliance with the License. You may obtain a copy of # the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations under # the License. """This module implements the Directory AFF4 Volume.""" from __future__ import unicode_literals import logging import os from pyaff4 import aff4 from pyaff4 import aff4_utils from pyaff4 import lexicon from pyaff4 import rdfvalue from pyaff4 import registry from pyaff4 import utils LOGGER = logging.getLogger("pyaff4") class AFF4Directory(aff4.AFF4Volume): root_path = "" @classmethod def NewAFF4Directory(cls, resolver, root_urn): result = AFF4Directory(resolver) result.root_path = root_urn.ToFilename() mode = resolver.Get(root_urn, lexicon.AFF4_STREAM_WRITE_MODE) if mode == "truncate": aff4_utils.RemoveDirectory(result.root_path) if not (os.path.isdir(result.root_path) or os.path.isfile(result.root_path)): if mode == "truncate" or mode == "append": aff4_utils.MkDir(result.root_path) else: raise RuntimeError("Unknown mode") resolver.Set(result.urn, lexicon.AFF4_TYPE, rdfvalue.URN(lexicon.AFF4_DIRECTORY_TYPE)) resolver.Set(result.urn, lexicon.AFF4_STORED, rdfvalue.URN(root_urn)) result.LoadFromURN() return resolver.CachePut(result) def __init__(self, *args, **kwargs): super(AFF4Directory, self).__init__(*args, **kwargs) self.children = set() def CreateMember(self, child_urn): # Check that child is a relative path in our URN. relative_path = self.urn.RelativePath(child_urn) if relative_path == child_urn.SerializeToString(): raise IOError("Child URN is not within container URN.") # Use this filename. Note that since filesystems can not typically # represent files and directories as the same path component we can not # allow slashes in the filename. Otherwise we will fail to create # e.g. stream/0000000 and stream/0000000/index. filename = aff4_utils.member_name_for_urn( child_urn, self.urn, slash_ok=False) # We are allowed to create any files inside the directory volume. self.resolver.Set(child_urn, lexicon.AFF4_TYPE, rdfvalue.URN(lexicon.AFF4_FILE_TYPE)) self.resolver.Set(child_urn, lexicon.AFF4_STREAM_WRITE_MODE, rdfvalue.XSDString("truncate")) self.resolver.Set(child_urn, lexicon.AFF4_DIRECTORY_CHILD_FILENAME, rdfvalue.XSDString(filename)) # Store the member inside our storage location. self.resolver.Set( child_urn, lexicon.AFF4_FILE_NAME, rdfvalue.XSDString(self.root_path + os.sep + filename)) result = self.resolver.AFF4FactoryOpen(child_urn) self.MarkDirty() self.children.add(child_urn) return result def LoadFromURN(self): self.storage = self.resolver.Get(self.urn, lexicon.AFF4_STORED) if not self.storage: LOGGER.error("Unable to find storage for AFF4Directory %s", self.urn) raise IOError("NOT_FOUND") # The actual filename for the root directory. self.root_path = self.storage.ToFilename() try: # We need to get the URN of the container before we can process # anything. with self.resolver.AFF4FactoryOpen( self.storage.Append( lexicon.AFF4_CONTAINER_DESCRIPTION)) as desc: if desc: urn_string = utils.SmartUnicode(desc.Read(1000)) if (urn_string and self.urn.SerializeToString() != urn_string): self.resolver.DeleteSubject(self.urn) self.urn.Set(urn_string) # Set these triples with the new URN so we know how to open # it. self.resolver.Set(self.urn, lexicon.AFF4_TYPE, rdfvalue.URN(lexicon.AFF4_DIRECTORY_TYPE)) self.resolver.Set(self.urn, lexicon.AFF4_STORED, rdfvalue.URN(self.storage)) LOGGER.info("AFF4Directory volume found: %s", self.urn) # Try to load the RDF metadata file from the storage. with self.resolver.AFF4FactoryOpen( self.storage.Append( lexicon.AFF4_CONTAINER_INFO_TURTLE)) as turtle_stream: if turtle_stream: self.resolver.LoadFromTurtle(turtle_stream) # Find all the contained objects and adjust their filenames. for subject in self.resolver.SelectSubjectsByPrefix( utils.SmartUnicode(self.urn)): child_filename = self.resolver.Get( subject, lexicon.AFF4_DIRECTORY_CHILD_FILENAME) if child_filename: self.resolver.Set( subject, lexicon.AFF4_FILE_NAME, rdfvalue.XSDString("%s%s%s" % ( self.root_path, os.sep, child_filename))) except IOError: pass def Flush(self): if self.IsDirty(): # Flush all children before us. This ensures that metadata is fully # generated for each child. for child_urn in list(self.children): obj = self.resolver.CacheGet(child_urn) if obj: obj.Flush() # Mark the container with its URN with self.CreateMember( self.urn.Append( lexicon.AFF4_CONTAINER_DESCRIPTION)) as desc: desc.Truncate() desc.Write(self.urn.SerializeToString()) desc.Flush() # Flush explicitly since we already flushed above. # Dump the resolver into the zip file. with self.CreateMember( self.urn.Append( lexicon.AFF4_CONTAINER_INFO_TURTLE)) as turtle_stream: # Overwrite the old turtle file with the newer data. turtle_stream.Truncate() self.resolver.DumpToTurtle(turtle_stream, verbose=False) turtle_stream.Flush() return super(AFF4Directory, self).Flush() registry.AFF4_TYPE_MAP[lexicon.AFF4_DIRECTORY_TYPE] = AFF4Directory pyaff4-0.26.post6/pyaff4/standards_test.py0000664000175000017500000000711113211617552020770 0ustar rhertzogrhertzogfrom __future__ import print_function from __future__ import unicode_literals # Copyright 2016,2017 Schatz Forensic Pty Ltd. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); you may not # use this file except in compliance with the License. You may obtain a copy of # the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations under # the License. from future import standard_library standard_library.install_aliases() import logging import os import io import unittest from pyaff4 import data_store from pyaff4 import lexicon from pyaff4 import plugins from pyaff4 import rdfvalue from pyaff4 import zip from pyaff4 import hashes LOGGER = logging.getLogger("pyaff4") referenceImagesPath = os.path.join(os.path.dirname(__file__), "..", "test_images") stdLinear = os.path.join(referenceImagesPath, "AFF4Std", "Base-Linear.aff4") def conditional_on_images(f): if not os.access(stdLinear, os.R_OK): LOGGER.info("Test images not cloned into repository. Tests disabled." "To enable type `git submodules init`") def _decorator(): print (f.__name__ + ' has been disabled') return _decorator return f class StandardsTest(unittest.TestCase): stdLinearURN = rdfvalue.URN.FromFileName(stdLinear) @conditional_on_images def testLocateImage(self): resolver = data_store.MemoryDataStore() with zip.ZipFile.NewZipFile(resolver, self.stdLinearURN) as zip_file: for subject in resolver.QueryPredicateObject( "http://www.w3.org/1999/02/22-rdf-syntax-ns#type", "http://aff4.org/Schema#DiskImage"): self.assertEquals( subject, "aff4://cf853d0b-5589-4c7c-8358-2ca1572b87eb") for subject in resolver.QueryPredicateObject( "http://www.w3.org/1999/02/22-rdf-syntax-ns#type", "http://aff4.org/Schema#Image"): self.assertEquals( subject, "aff4://cf853d0b-5589-4c7c-8358-2ca1572b87eb") for subject in resolver.QueryPredicateObject( "http://www.w3.org/1999/02/22-rdf-syntax-ns#type", "http://aff4.org/Schema#ContiguousImage"): self.assertEquals( subject, "aff4://cf853d0b-5589-4c7c-8358-2ca1572b87eb") @conditional_on_images def testReadMap(self): resolver = data_store.MemoryDataStore() with zip.ZipFile.NewZipFile(resolver, self.stdLinearURN) as zip_file: imageStream = resolver.AFF4FactoryOpen( "aff4://c215ba20-5648-4209-a793-1f918c723610") imageStream.Seek(0x163) res = imageStream.Read(17) self.assertEquals(res, b"Invalid partition") @conditional_on_images def testReadImageStream(self): resolver = data_store.MemoryDataStore() with zip.ZipFile.NewZipFile(resolver, self.stdLinearURN) as zip_file: mapStream = resolver.AFF4FactoryOpen( "aff4://c215ba20-5648-4209-a793-1f918c723610") mapStream.Seek(0x163) res = mapStream.Read(17) self.assertEquals(res, b"Invalid partition") if __name__ == '__main__': unittest.main() pyaff4-0.26.post6/pyaff4/stream_test.py0000664000175000017500000000500313211617552020276 0ustar rhertzogrhertzogfrom __future__ import unicode_literals # Copyright 2014 Google Inc. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); you may not # use this file except in compliance with the License. You may obtain a copy of # the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations under # the License. import os import unittest from pyaff4 import data_store from pyaff4 import lexicon from pyaff4 import plugins from pyaff4 import rdfvalue class StreamTest(unittest.TestCase): def streamTest(self, stream): self.assertEquals(0, stream.Tell()) self.assertEquals(0, stream.Size()) stream.Write(b"hello world") self.assertEquals(11, stream.Tell()) stream.Seek(0, 0) self.assertEquals(0, stream.Tell()) self.assertEquals(b"hello world", stream.Read(1000)) self.assertEquals(11, stream.Tell()) stream.Seek(-5, 2) self.assertEquals(6, stream.Tell()) self.assertEquals(b"world", stream.Read(1000)) stream.Seek(-5, 2) self.assertEquals(6, stream.Tell()) stream.Write(b"Cruel world") stream.Seek(0, 0) self.assertEquals(0, stream.Tell()) self.assertEquals(b"hello Cruel world", stream.Read(1000)) self.assertEquals(17, stream.Tell()) stream.Seek(0, 0) self.assertEquals(b"he", stream.Read(2)) stream.Write(b"I have %d arms and %#x legs." % (2, 1025)) self.assertEquals(31, stream.Tell()) stream.Seek(0, 0) self.assertEquals(b"heI have 2 arms and 0x401 legs.", stream.Read(1000)) def testFileBackedStream(self): filename = rdfvalue.URN.FromFileName("/tmp/test_filename.bin") resolver = data_store.MemoryDataStore() try: resolver.Set(filename, lexicon.AFF4_STREAM_WRITE_MODE, rdfvalue.XSDString("truncate")) with resolver.AFF4FactoryOpen(filename) as file_stream: self.streamTest(file_stream) finally: os.unlink(filename.Parse().path) if __name__ == '__main__': unittest.main() pyaff4-0.26.post6/pyaff4/hashing_test.py0000664000175000017500000001620313211617552020430 0ustar rhertzogrhertzogfrom __future__ import print_function from __future__ import unicode_literals # Copyright 2016,2017 Schatz Forensic Pty Ltd. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); you may not # use this file except in compliance with the License. You may obtain a copy of # the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations under # the License. import os import unittest from pyaff4 import data_store from pyaff4 import lexicon from pyaff4 import plugins from pyaff4 import rdfvalue from pyaff4 import zip from pyaff4 import hashes from pyaff4 import block_hasher from pyaff4 import linear_hasher referenceImagesPath = os.path.join(os.path.dirname(__file__), u"..", u"test_images") stdLinear = os.path.join(referenceImagesPath, u"AFF4Std", u"Base-Linear.aff4") preStdLinear = os.path.join(referenceImagesPath, u"AFF4PreStd/Base-Linear.af4") preStdAllocated = os.path.join(referenceImagesPath, u"AFF4PreStd", u"Base-Allocated.af4") stdLinear = os.path.join(referenceImagesPath, u"AFF4Std", u"Base-Linear.aff4") stdAllocated = os.path.join(referenceImagesPath, u"AFF4Std", u"Base-Allocated.aff4") stdLinearAllHashes = os.path.join(referenceImagesPath, u"AFF4Std", u"Base-Linear-AllHashes.aff4") stdLinearReadError = os.path.join(referenceImagesPath, u"AFF4Std", u"Base-Linear-ReadError.aff4") stripedLinearA = os.path.join(referenceImagesPath, u"AFF4Std", u"Striped", u"Base-Linear_1.aff4") stripedLinearB = os.path.join(referenceImagesPath, u"AFF4Std", u"Striped", u"Base-Linear_2.aff4") def conditional_on_images(f): if not os.access(preStdLinear, os.R_OK): LOGGER.info("Test images not cloned into repository. Tests disabled." "To enable type `git submodules init`") def _decorator(): print (f.__name__ + ' has been disabled') return _decorator return f class ValidatorTest(unittest.TestCase): preStdLinearURN = rdfvalue.URN.FromFileName(preStdLinear) preStdAllocatedURN = rdfvalue.URN.FromFileName(preStdAllocated) stdLinearURN = rdfvalue.URN.FromFileName(stdLinear) stdAllocatedURN = rdfvalue.URN.FromFileName(stdAllocated) stdLinearAllHashesURN = rdfvalue.URN.FromFileName(stdLinearAllHashes) stdLinearReadErrorURN = rdfvalue.URN.FromFileName(stdLinearReadError) stripedLinearAURN = rdfvalue.URN.FromFileName(stripedLinearA) stripedLinearBURN = rdfvalue.URN.FromFileName(stripedLinearB) @conditional_on_images def testBlockHashPreStdLinearImage(self): validator = block_hasher.Validator() validator.validateContainer(self.preStdLinearURN) @conditional_on_images def testLinearHashPreStdLinearImage(self): validator = linear_hasher.LinearHasher() hash = validator.hash( self.preStdLinearURN, u"aff4://085066db-6315-4369-a87e-bdc7bc777d45", lexicon.HASH_SHA1) print(dir(hash)) print(hash.value) self.assertEqual(hash.value, "5d5f183ae7355b8dc8938b67aab77c0215c29ab4") @conditional_on_images def testLinearHashPreStdPartialAllocatedImage(self): validator = linear_hasher.LinearHasher() hash = validator.hash( self.preStdAllocatedURN, u"aff4://48a85e17-1041-4bcc-8b2b-7fb2cd4f815b", lexicon.HASH_SHA1) print(dir(hash)) print(hash.value) self.assertEqual(hash.value, "a9f21b04a0a77613a5a34ecdd3af269464984035") @conditional_on_images def testBlockHashPreStdPartialAllocatedImage(self): validator = block_hasher.Validator() validator.validateContainer(self.preStdAllocatedURN) @conditional_on_images def testBlockHashStdLinearImage(self): validator = block_hasher.Validator() validator.validateContainer(self.stdLinearURN) @conditional_on_images def testBlockHashStdLinearReadError(self): validator = block_hasher.Validator() validator.validateContainer(self.stdLinearReadErrorURN) @conditional_on_images def testHashStdLinearImage(self): validator = linear_hasher.LinearHasher() hash = validator.hash( self.stdLinearURN, u"aff4://fcbfdce7-4488-4677-abf6-08bc931e195b", lexicon.HASH_SHA1) print(dir(hash)) print(hash.value) self.assertEqual(hash.value, "7d3d27f667f95f7ec5b9d32121622c0f4b60b48d") @conditional_on_images def testHashStdLinearReadError(self): validator = linear_hasher.LinearHasher() hash = validator.hash( self.stdLinearReadErrorURN, u"aff4://b282d5f4-333a-4f6a-b96f-0e5138bb18c8", lexicon.HASH_SHA1) print(dir(hash)) print(hash.value) self.assertEqual(hash.value, "67e245a640e2784ead30c1ff1a3f8d237b58310f") @conditional_on_images def testHashStdPartialAllocatedImage(self): validator = linear_hasher.LinearHasher() hash = validator.hash( self.stdAllocatedURN, u"aff4://e9cd53d3-b682-4f12-8045-86ba50a0239c", lexicon.HASH_SHA1) self.assertEqual(hash.value, "e8650e89b262cf0b4b73c025312488d5a6317a26") @conditional_on_images def testBlockHashStdLinearStriped(self): validator = block_hasher.Validator() validator.validateContainerMultiPart(self.stripedLinearBURN, self.stripedLinearAURN) @conditional_on_images def testHashStdLinearStriped(self): validator = linear_hasher.LinearHasher() hash = validator.hashMulti( self.stripedLinearBURN, self.stripedLinearAURN, u"aff4://2dd04819-73c8-40e3-a32b-fdddb0317eac", lexicon.HASH_SHA1) self.assertEqual(hash.value, "7d3d27f667f95f7ec5b9d32121622c0f4b60b48d") @conditional_on_images def testBlockHashStdContainerPartialAllocated(self): validator = block_hasher.Validator() validator.validateContainer(self.stdAllocatedURN) @conditional_on_images def testBlockHashPreStdLinearImage(self): validator = block_hasher.Validator() validator.validateContainer(self.preStdLinearURN) @conditional_on_images def testBlockHashStdLinearAllHashesImage(self): validator = block_hasher.Validator() validator.validateContainer(self.stdLinearAllHashesURN) @conditional_on_images def testHashStdLinearAllHashesImage(self): validator = linear_hasher.LinearHasher() hash = validator.hash( self.stdLinearAllHashesURN, u"aff4://2a497fe5-0221-4156-8b4d-176bebf7163f", lexicon.HASH_SHA1) print(dir(hash)) print(hash.value) self.assertEqual(hash.value, "7d3d27f667f95f7ec5b9d32121622c0f4b60b48d") if __name__ == '__main__': unittest.main() pyaff4-0.26.post6/pyaff4/aff4_map_test.py0000664000175000017500000002104413211617552020463 0ustar rhertzogrhertzogfrom __future__ import unicode_literals # Copyright 2014 Google Inc. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); you may not # use this file except in compliance with the License. You may obtain a copy of # the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations under # the License. import os import unittest from pyaff4 import aff4_file from pyaff4 import aff4_map from pyaff4 import data_store from pyaff4 import lexicon from pyaff4 import rdfvalue from pyaff4 import zip class AFF4MapTest(unittest.TestCase): filename = u"/tmp/aff4_test.zip" filename_urn = rdfvalue.URN.FromFileName(filename) image_name = u"image.dd" def tearDown(self): try: os.unlink(self.filename) except (IOError, OSError): pass def setUp(self): with data_store.MemoryDataStore() as resolver: resolver.Set(self.filename_urn, lexicon.AFF4_STREAM_WRITE_MODE, rdfvalue.XSDString("truncate")) with zip.ZipFile.NewZipFile(resolver, self.filename_urn) as zip_file: self.volume_urn = zip_file.urn self.image_urn = self.volume_urn.Append(self.image_name) # Write Map image sequentially (Seek/Write method). with aff4_map.AFF4Map.NewAFF4Map( resolver, self.image_urn, self.volume_urn) as image: # Maps are written in random order. image.Seek(50) image.Write(b"XX - This is the position.") image.Seek(0) image.Write(b"00 - This is the position.") # We can "overwrite" data by writing the same range again. image.Seek(50) image.Write(b"50") # Test the Stream method. with resolver.CachePut( aff4_file.AFF4MemoryStream(resolver)) as source: # Fill it with data. source.Write(b"AAAABBBBCCCCDDDDEEEEFFFFGGGGHHHH") # Make a temporary map that defines our plan. helper_map = aff4_map.AFF4Map(resolver) helper_map.AddRange(4, 0, 4, source.urn) # 0000AAAA helper_map.AddRange(0, 12, 4, source.urn) # DDDDAAAA helper_map.AddRange(12, 16, 4, source.urn)# DDDDAAAA0000EEEE image_urn_2 = self.volume_urn.Append( self.image_name).Append("streamed") with aff4_map.AFF4Map.NewAFF4Map( resolver, image_urn_2, self.volume_urn) as image: # Now we create the real map by copying the temporary # map stream. image.WriteStream(helper_map) def testAddRange(self): resolver = data_store.MemoryDataStore() # This is required in order to load and parse metadata from this volume # into a fresh empty resolver. with zip.ZipFile.NewZipFile(resolver, self.filename_urn) as zip_file: image_urn = zip_file.urn.Append(self.image_name) with resolver.AFF4FactoryOpen(image_urn) as map: a = rdfvalue.URN("aff4://a") b = rdfvalue.URN("aff4://b") # First test - overlapping regions: map.AddRange(0, 0, 100, a) map.AddRange(10, 10, 100, a) # Should be merged into a single range. ranges = map.GetRanges() self.assertEquals(len(ranges), 1) self.assertEquals(ranges[0].length, 110) map.Clear() # Repeating regions - should not be merged but first region should # be truncated. map.AddRange(0, 0, 100, a) map.AddRange(50, 0, 100, a) ranges = map.GetRanges() self.assertEquals(len(ranges), 2) self.assertEquals(ranges[0].length, 50) # Inserted region. Should split existing region into three. map.Clear() map.AddRange(0, 0, 100, a) map.AddRange(50, 0, 10, b) ranges = map.GetRanges() self.assertEquals(len(ranges), 3) self.assertEquals(ranges[0].length, 50) self.assertEquals(ranges[0].target_id, 0) self.assertEquals(ranges[1].length, 10) self.assertEquals(ranges[1].target_id, 1) self.assertEquals(ranges[2].length, 40) self.assertEquals(ranges[2].target_id, 0) # New range overwrites all the old ranges. map.AddRange(0, 0, 100, b) ranges = map.GetRanges() self.assertEquals(len(ranges), 1) self.assertEquals(ranges[0].length, 100) self.assertEquals(ranges[0].target_id, 1) # Simulate writing contiguous regions. These should be merged into a # single region automatically. map.Clear() map.AddRange(0, 100, 10, a) map.AddRange(10, 110, 10, a) map.AddRange(20, 120, 10, a) map.AddRange(30, 130, 10, a) ranges = map.GetRanges() self.assertEquals(len(ranges), 1) self.assertEquals(ranges[0].length, 40) self.assertEquals(ranges[0].target_id, 0) # Writing sparse image. map.Clear() map.AddRange(0, 100, 10, a) map.AddRange(30, 130, 10, a) ranges = map.GetRanges() self.assertEquals(len(ranges), 2) self.assertEquals(ranges[0].length, 10) self.assertEquals(ranges[0].target_id, 0) self.assertEquals(ranges[1].length, 10) self.assertEquals(ranges[1].map_offset, 30) self.assertEquals(ranges[1].target_id, 0) # Now merge. Adding the missing region makes the image not sparse. map.AddRange(10, 110, 20, a) ranges = map.GetRanges() self.assertEquals(len(ranges), 1) self.assertEquals(ranges[0].length, 40) def testCreateMapStream(self): resolver = data_store.MemoryDataStore() # This is required in order to load and parse metadata from this volume # into a fresh empty resolver. with zip.ZipFile.NewZipFile(resolver, self.filename_urn) as zip_file: image_urn = zip_file.urn.Append(self.image_name) image_urn_2 = image_urn.Append("streamed") # Check the first stream. self.CheckImageURN(resolver, image_urn) # The second stream must be the same. self.CheckStremImageURN(resolver, image_urn_2) def CheckStremImageURN(self, resolver, image_urn_2): with resolver.AFF4FactoryOpen(image_urn_2) as map: self.assertEquals(map.Size(), 16) self.assertEquals(map.Read(100), b"DDDDAAAA\x00\x00\x00\x00EEEE") # The data stream should be packed without gaps. with resolver.AFF4FactoryOpen(image_urn_2.Append("data")) as image: self.assertEquals(image.Read(100), b"DDDDAAAAEEEE") def CheckImageURN(self, resolver, image_urn): with resolver.AFF4FactoryOpen(image_urn) as map: map.Seek(50) self.assertEquals(map.Read(2), b"50") map.Seek(0) self.assertEquals(map.Read(2), b"00") ranges = map.GetRanges() self.assertEquals(len(ranges), 3) self.assertEquals(ranges[0].length, 26) self.assertEquals(ranges[0].map_offset, 0) self.assertEquals(ranges[0].target_offset, 26) # This is the extra "overwritten" 2 bytes which were appended to the # end of the target stream and occupy the map range from 50-52. self.assertEquals(ranges[1].length, 2) self.assertEquals(ranges[1].map_offset, 50) self.assertEquals(ranges[1].target_offset, 52) self.assertEquals(ranges[2].length, 24) self.assertEquals(ranges[2].map_offset, 52) self.assertEquals(ranges[2].target_offset, 2) # Test that reads outside the ranges null pad correctly. map.Seek(48) read_string = map.Read(4) self.assertEquals(read_string, b"\x00\x0050") if __name__ == '__main__': #logging.getLogger().setLevel(logging.DEBUG) unittest.main() pyaff4-0.26.post6/pyaff4/stream_factory.py0000664000175000017500000001436613211617552021002 0ustar rhertzogrhertzogfrom __future__ import unicode_literals # Copyright 2016,2017 Schatz Forensic Pty Ltd. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); you may not # use this file except in compliance with the License. You may obtain a copy of # the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations under # the License. from builtins import str from builtins import object from pyaff4.symbolic_streams import * from pyaff4 import rdfvalue import re class StreamFactory(object): def __init__(self, resolver, lex): self.lexicon = lex self.resolver = resolver self.symbolmatcher = re.compile("[0-9A-F]{2}") self.fixedSymbolics = [ self.lexicon.base + "Zero", self.lexicon.base + "UnknownData", self.lexicon.base + "UnreadableData", self.lexicon.base + "NoData"] # TODO: Refactor the below classes to split the subname from the NS # then do matching only on the subnname class PreStdStreamFactory(StreamFactory): def __init__(self, resolver, lex): StreamFactory.__init__(self, resolver, lex) self.fixedSymbolics.append(self.lexicon.base + "FF") def isSymbolicStream(self, urn): if type(urn) == rdfvalue.URN: urn = str(urn) if not urn.startswith("http://"): return False else: if urn in self.fixedSymbolics: return True # Pre-Std Evimetry Symbolic Streams are of the form # http://afflib.org/2009#FF if urn.startswith(self.lexicon.base) and len(urn) == len(self.lexicon.base) + 2: # now verify symbolic part shortName = urn[len(self.lexicon.base):].upper() if self.symbolmatcher.match(shortName) != None: return True if urn.startswith(self.lexicon.base + "SymbolicStream"): return True if urn.startswith("http://afflib.org/2012/SymbolicStream#"): return True return False def createSymbolic(self, urn): if type(urn) == rdfvalue.URN: urn = str(urn) if urn == self.lexicon.base + "Zero": return RepeatedStream(resolver=self.resolver, urn=urn, symbol=b"\x00") if urn == self.lexicon.base + "FF": return RepeatedStream(resolver=self.resolver, urn=urn, symbol=b"\xff") if urn == self.lexicon.base + "UnknownData": return RepeatedStringStream(resolver=self.resolver, urn=urn, repeated_string=GetUnknownString()) if (urn.startswith(self.lexicon.base + "SymbolicStream") and len(urn) == len(self.lexicon.base + "SymbolicStream") + 2): shortName = urn[len(self.lexicon.base + "SymbolicStream"):].upper() value = binascii.unhexlify(shortName) return RepeatedStream(resolver=self.resolver, urn=urn, symbol=value) if (urn.startswith("http://afflib.org/2012/SymbolicStream#") and len(urn) == len("http://afflib.org/2012/SymbolicStream#") + 2): shortName = urn[len("http://afflib.org/2012/SymbolicStream#"):].upper() value = binascii.unhexlify(shortName) return RepeatedStream(resolver=self.resolver, urn=urn, symbol=value) if urn.startswith(self.lexicon.base) and len(urn) == len(self.lexicon.base) + 2: shortName = urn[len(self.lexicon.base):].upper() value = binascii.unhexlify(shortName) return RepeatedStream(resolver=self.resolver, urn=urn, symbol=value) raise ValueError class StdStreamFactory(StreamFactory): def isSymbolicStream(self, urn): if type(urn) == rdfvalue.URN: urn = str(urn) if not urn.startswith("http://"): return False else: if urn in self.fixedSymbolics: return True if urn.startswith(self.lexicon.base + "SymbolicStream"): return True return False def createSymbolic(self, urn): if type(urn) == rdfvalue.URN: urn = str(urn) if urn == self.lexicon.base + "Zero": return RepeatedStream(resolver=self.resolver, urn=urn, symbol=b"\x00") if urn == self.lexicon.base + "UnknownData": return RepeatedStringStream( resolver=self.resolver, urn=urn, repeated_string=GetUnknownString()) if urn == self.lexicon.base + "UnreadableData": return RepeatedStringStream( resolver=self.resolver, urn=urn, repeated_string=GetUnreadableString()) if (urn.startswith(self.lexicon.base + "SymbolicStream") and len(urn) == len(self.lexicon.base + "SymbolicStream") + 2): shortName = urn[len(self.lexicon.base + "SymbolicStream"):].upper() value = binascii.unhexlify(shortName) return RepeatedStream(resolver=self.resolver, urn=urn, symbol=value) raise ValueError def _MakeTile(repeated_string): """Make exactly 1Mb tile of the repeated string.""" total_size = 1024*1024 tile = repeated_string * (total_size // len(repeated_string)) tile += repeated_string[:total_size % len(repeated_string)] return tile # Exactly 1Mb. _UNKNOWN_STRING = None def GetUnknownString(): global _UNKNOWN_STRING if _UNKNOWN_STRING is not None: return _UNKNOWN_STRING _UNKNOWN_STRING = _MakeTile(b"UNKNOWN") return _UNKNOWN_STRING # Exactly 1Mb. _UNREADABLE_STRING = None def GetUnreadableString(): global _UNREADABLE_STRING if _UNREADABLE_STRING is not None: return _UNREADABLE_STRING _UNREADABLE_STRING = _MakeTile(b"UNREADABLEDATA") return _UNREADABLE_STRING pyaff4-0.26.post6/pyaff4/aff4_image.py0000664000175000017500000004171313211617552017736 0ustar rhertzogrhertzog# Copyright 2014 Google Inc. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); you may not # use this file except in compliance with the License. You may obtain a copy of # the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations under # the License. """This module implements the standard AFF4 Image.""" from __future__ import division from __future__ import unicode_literals from builtins import range from builtins import str from past.utils import old_div from builtins import object import binascii import logging import struct import zlib from expiringdict import ExpiringDict import snappy from pyaff4 import aff4 from pyaff4 import lexicon from pyaff4 import rdfvalue from pyaff4 import registry from pyaff4 import hashes LOGGER = logging.getLogger("pyaff4") class _CompressorStream(object): """A stream which chunks up another stream. Each read() operation will return a compressed chunk. """ def __init__(self, owner, stream): self.owner = owner self.stream = stream self.chunk_count_in_bevy = 0 self.size = 0 self.bevy_index = [] self.bevy_length = 0 def tell(self): return self.stream.tell() def read(self, _): # Stop copying when the bevy is full. if self.chunk_count_in_bevy >= self.owner.chunks_per_segment: return "" chunk = self.stream.read(self.owner.chunk_size) if not chunk: return "" self.size += len(chunk) if self.owner.compression == lexicon.AFF4_IMAGE_COMPRESSION_ZLIB: compressed_chunk = zlib.compress(chunk) elif (snappy and self.owner.compression == lexicon.AFF4_IMAGE_COMPRESSION_SNAPPY): compressed_chunk = snappy.compress(chunk) elif self.owner.compression == lexicon.AFF4_IMAGE_COMPRESSION_STORED: compressed_chunk = chunk self.bevy_index.append((self.bevy_length, len(compressed_chunk))) self.bevy_length += len(compressed_chunk) self.chunk_count_in_bevy += 1 return compressed_chunk class AFF4Image(aff4.AFF4Stream): @staticmethod def NewAFF4Image(resolver, image_urn, volume_urn): with resolver.AFF4FactoryOpen(volume_urn) as volume: # Inform the volume that we have a new image stream contained within # it. volume.children.add(image_urn) resolver.Set(image_urn, lexicon.AFF4_TYPE, rdfvalue.URN( lexicon.AFF4_IMAGE_TYPE)) resolver.Set(image_urn, lexicon.AFF4_STORED, rdfvalue.URN(volume_urn)) return resolver.AFF4FactoryOpen(image_urn) def LoadFromURN(self): #volume_urn = self.resolver.Get(self.urn, lexicon.AFF4_STORED) #if not volume_urn: # raise IOError("Unable to find storage for urn %s" % self.urn) self.lexicon = self.resolver.lexicon self.chunk_size = int(lexicon.AutoResolveAttribute( self.resolver, self.urn, "chunkSize") or 32*1024) self.chunks_per_segment = int(lexicon.AutoResolveAttribute( self.resolver, self.urn, "chunksPerSegment") or 1024) self.size = int(lexicon.AutoResolveAttribute( self.resolver, self.urn, "streamSize") or 0) self.compression = (lexicon.AutoResolveAttribute( self.resolver, self.urn, "compressionMethod") or lexicon.AFF4_IMAGE_COMPRESSION_ZLIB) # A buffer for overlapped writes which do not fit into a chunk. self.buffer = b"" # Compressed chunks in the bevy. self.bevy = [] # Length of all chunks in the bevy. self.bevy_length = 0 # List of (bevy offsets, compressed chunk length). self.bevy_index = [] self.chunk_count_in_bevy = 0 self.bevy_number = 0 self.cache = ExpiringDict(max_len=1000, max_age_seconds=10) def _write_bevy_index(self, volume, bevy_urn, bevy_index, flush=False): """Write the index segment for the specified bevy_urn.""" bevy_index_urn = bevy_urn.Append("index") with volume.CreateMember(bevy_index_urn) as bevy_index_segment: # Old style index is just a list of lengths. bevy_index = [x[1] for x in bevy_index] bevy_index_segment.Write( struct.pack("<" + "I"*len(bevy_index), bevy_index)) if flush: self.resolver.Close(bevy_index_segment) def WriteStream(self, source_stream, progress=None): """Copy data from a source stream into this stream.""" if progress is None: progress = aff4.DEFAULT_PROGRESS volume_urn = self.resolver.Get(self.urn, lexicon.AFF4_STORED) if not volume_urn: raise IOError("Unable to find storage for urn %s" % self.urn) with self.resolver.AFF4FactoryOpen(volume_urn) as volume: # Write a bevy at a time. while 1: stream = _CompressorStream(self, source_stream) bevy_urn = self.urn.Append("%08d" % self.bevy_number) progress.start = (self.bevy_number * self.chunks_per_segment * self.chunk_size) with volume.CreateMember(bevy_urn) as bevy: bevy.WriteStream(stream, progress=progress) self._write_bevy_index(volume, bevy_urn, stream.bevy_index) # Make another bevy. self.bevy_number += 1 self.size += stream.size self.readptr += stream.size # Last iteration - the compressor stream quit before the bevy is # full. if stream.chunk_count_in_bevy != self.chunks_per_segment: break self._write_metadata() def Write(self, data): self.MarkDirty() self.buffer += data idx = 0 while len(self.buffer) - idx > self.chunk_size: chunk = self.buffer[idx:idx+self.chunk_size] idx += self.chunk_size self.FlushChunk(chunk) self.buffer = self.buffer[idx:] self.readptr += len(data) if self.readptr > self.size: self.size = self.readptr return len(data) def FlushChunk(self, chunk): bevy_offset = self.bevy_length if self.compression == lexicon.AFF4_IMAGE_COMPRESSION_ZLIB: compressed_chunk = zlib.compress(chunk) elif (snappy and self.compression == lexicon.AFF4_IMAGE_COMPRESSION_SNAPPY): compressed_chunk = snappy.compress(chunk) elif self.compression == lexicon.AFF4_IMAGE_COMPRESSION_STORED: compressed_chunk = chunk self.bevy_index.append((bevy_offset, len(compressed_chunk))) self.bevy.append(compressed_chunk) self.bevy_length += len(compressed_chunk) self.chunk_count_in_bevy += 1 if self.chunk_count_in_bevy >= self.chunks_per_segment: self._FlushBevy() def _FlushBevy(self): volume_urn = self.resolver.Get(self.urn, lexicon.AFF4_STORED) if not volume_urn: raise IOError("Unable to find storage for urn %s" % self.urn) # Bevy is empty nothing to do. if not self.bevy: return bevy_urn = self.urn.Append("%08d" % self.bevy_number) with self.resolver.AFF4FactoryOpen(volume_urn) as volume: self._write_bevy_index(volume, bevy_urn, self.bevy_index, flush=True) with volume.CreateMember(bevy_urn) as bevy: bevy.Write(b"".join(self.bevy)) # We dont need to hold these in memory any more. self.resolver.Close(bevy) # In Python it is more efficient to keep a list of chunks and then join # them at the end in one operation. self.chunk_count_in_bevy = 0 self.bevy_number += 1 self.bevy = [] self.bevy_index = [] self.bevy_length = 0 def _write_metadata(self): self.resolver.Set(self.urn, lexicon.AFF4_TYPE, rdfvalue.URN(lexicon.AFF4_IMAGE_TYPE)) self.resolver.Set(self.urn, lexicon.AFF4_IMAGE_CHUNK_SIZE, rdfvalue.XSDInteger(self.chunk_size)) self.resolver.Set(self.urn, lexicon.AFF4_IMAGE_CHUNKS_PER_SEGMENT, rdfvalue.XSDInteger(self.chunks_per_segment)) self.resolver.Set(self.urn, lexicon.AFF4_STREAM_SIZE, rdfvalue.XSDInteger(self.Size())) self.resolver.Set( self.urn, lexicon.AFF4_IMAGE_COMPRESSION, rdfvalue.URN(self.compression)) def Flush(self): if self.IsDirty(): # Flush the last chunk. self.FlushChunk(self.buffer) self._FlushBevy() self._write_metadata() return super(AFF4Image, self).Flush() def Read(self, length): length = int(length) if length == 0: return "" length = min(length, self.Size() - self.readptr) initial_chunk_id, initial_chunk_offset = divmod(self.readptr, self.chunk_size) final_chunk_id, _ = divmod(self.readptr + length - 1, self.chunk_size) # We read this many full chunks at once. chunks_to_read = final_chunk_id - initial_chunk_id + 1 chunk_id = initial_chunk_id result = b"" while chunks_to_read > 0: chunks_read, data = self._ReadPartial(chunk_id, chunks_to_read) if chunks_read == 0: break chunks_to_read -= chunks_read result += data if initial_chunk_offset: result = result[initial_chunk_offset:] result = result[:length] self.readptr += len(result) return result def _parse_bevy_index(self, bevy): """Read and return the bevy's index. This version deals with pre standard versions in which the index stream consists of a list of chunk offsets: - Evimetry uses a 1 based list (so the first entry in the index is the offset of the first chunk (and the 0'th chunk is assumed to start at 0). - Scudette's version always uses 0 for the offset of the first chunk and the last chunk's length is assumed from the total bevy size. """ bevy_index_urn = bevy.urn.Append("index") with self.resolver.AFF4FactoryOpen(bevy_index_urn) as bevy_index: bevy_index_data = bevy_index.Read(bevy_index.Size()) format_string = "<" + "I" * (bevy_index.Size() // struct.calcsize("I")) chunk_offsets = struct.unpack(format_string, bevy_index_data) # Convert the index into standard form: # list of (offset, compressed length) # Evimetry's implementation if chunk_offsets[0] != 0: result = [(0, chunk_offsets[0])] else: # Scudette's implementation. result = [] for i in range(len(chunk_offsets)-1): result.append( (chunk_offsets[i], chunk_offsets[i+1] - chunk_offsets[i])) # Last chunk's size is inferred from the rest of the bevy. if chunk_offsets[-1] < bevy.Size(): result.append((chunk_offsets[-1], bevy.Size() - chunk_offsets[-1])) return result def _ReadPartial(self, chunk_id, chunks_to_read): chunks_read = 0 result = b"" while chunks_to_read > 0: r = self.cache.get(chunk_id) if r != None: result += r chunks_to_read -= 1 chunk_id += 1 chunks_read += 1 continue bevy_id = old_div(chunk_id, self.chunks_per_segment) bevy_urn = self.urn.Append("%08d" % bevy_id) with self.resolver.AFF4FactoryOpen(bevy_urn) as bevy: while chunks_to_read > 0: r = self.cache.get(chunk_id) if r != None: result += r chunks_to_read -= 1 chunk_id += 1 chunks_read += 1 continue # Read a full chunk from the bevy. data = self._ReadChunkFromBevy(chunk_id, bevy) self.cache[chunk_id] = data result += data chunks_to_read -= 1 chunk_id += 1 chunks_read += 1 # This bevy is exhausted, get the next one. if bevy_id < old_div(chunk_id, self.chunks_per_segment): break return chunks_read, result def _ReadChunkFromBevy(self, chunk_id, bevy): bevy_index = self._parse_bevy_index(bevy) chunk_id_in_bevy = chunk_id % self.chunks_per_segment if not bevy_index: LOGGER.error("Index empty in %s: %s", self.urn, chunk_id) raise IOError("Index empty in %s: %s" % (self.urn, chunk_id)) # The segment is not completely full. if chunk_id_in_bevy >= len(bevy_index): LOGGER.error("Bevy index too short in %s: %s", self.urn, chunk_id) raise IOError("Bevy index too short in %s: %s" % ( self.urn, chunk_id)) # The index is a list of (offset, compressed_length) chunk_offset, chunk_size = bevy_index[chunk_id_in_bevy] bevy.Seek(chunk_offset, 0) cbuffer = bevy.Read(chunk_size) if self.compression == lexicon.AFF4_IMAGE_COMPRESSION_ZLIB : return zlib.decompress(cbuffer) # Backwards compatibility with Scudette's AFF4 implementation. if self.compression == lexicon.AFF4_IMAGE_COMPRESSION_SNAPPY_SCUDETTE: # Chunks are always compressed. return snappy.decompress(cbuffer) if self.compression == lexicon.AFF4_IMAGE_COMPRESSION_SNAPPY: # Buffer is not compressed. if len(cbuffer) == self.chunk_size: return cbuffer return snappy.decompress(cbuffer) if self.compression == lexicon.AFF4_IMAGE_COMPRESSION_STORED: return cbuffer raise RuntimeError( "Unable to process compression %s" % self.compression) # This class implements Evimetry's AFF4 pre standardisation effort class AFF4PreSImage(AFF4Image): def _get_block_hash_urn(self, bevy_id, hash_datatype): return self.urn.Append("%08d/blockHash.%s" % ( bevy_id, hashes.toShortAlgoName(hash_datatype))) def readBlockHash(self, chunk_id, hash_datatype): bevy_id = old_div(chunk_id, self.chunks_per_segment) bevy_blockHash_urn = self._get_block_hash_urn( bevy_id, hash_datatype) blockLength = hashes.length(hash_datatype) with self.resolver.AFF4FactoryOpen( bevy_blockHash_urn) as bevy_blockHashes: idx = chunk_id * blockLength bevy_blockHashes.Seek(idx) hash_value = bevy_blockHashes.Read(blockLength) return hashes.newImmutableHash( binascii.hexlify(hash_value), hash_datatype) class AFF4SImage(AFF4PreSImage): def _get_block_hash_urn(self, bevy_id, hash_datatype): return self.urn.Append("%08d.blockHash.%s" % ( bevy_id, hashes.toShortAlgoName(hash_datatype))) def _write_bevy_index(self, volume, bevy_urn, bevy_index, flush=False): """Write the index segment for the specified bevy_urn.""" bevy_index_urn = rdfvalue.URN("%s.index" % bevy_urn) with volume.CreateMember(bevy_index_urn) as bevy_index_segment: serialized_index = b"".join((struct.pack(" %s (length %s)", self.urn, storage_urn, self.Size()) gcs = get_client() urn_parts = storage_urn.Parse() # The bucket that contains this stream. bucket = gcs.get_bucket(urn_parts.netloc) blob_name = urn_parts.path.strip("/") blob = bucket.blob(blob_name) self.fd.seek(0) blob.upload_from_file(self.fd) LOGGER.info("Flushed %s (%s) in %s Sec", self.urn, self.Size(), int(time.time() - now)) except Exception as e: LOGGER.exception("Error: %s", e) def Flush(self): # Sync the internal cache with the blob store. if self.IsDirty(): self.resolver.CloudThreadPool.apply_async(self._async_flush) else: self.fd.close() super(AFF4GCSStream, self).Flush() def GenericGCSHandler(resolver, urn): # This is a bucket urn. if not urn.Parse().path.strip("/"): directory_handler = registry.AFF4_TYPE_MAP[lexicon.AFF4_DIRECTORY_TYPE] result = directory_handler(resolver) resolver.Set(result.urn, lexicon.AFF4_STORED, urn) return result return AFF4GCSStream(resolver, urn) registry.AFF4_TYPE_MAP["gs"] = GenericGCSHandler registry.AFF4_TYPE_MAP[AFF4_GCS_TYPE] = AFF4GStore registry.AFF4_TYPE_MAP[AFF4_GCS_STREAM_TYPE] = AFF4GCSStream pyaff4-0.26.post6/pyaff4/aff4_image_test.py0000664000175000017500000001105413211617552020770 0ustar rhertzogrhertzogfrom __future__ import unicode_literals # Copyright 2014 Google Inc. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); you may not # use this file except in compliance with the License. You may obtain a copy of # the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations under # the License. from future import standard_library standard_library.install_aliases() from builtins import range import os import io import unittest from pyaff4 import aff4_image from pyaff4 import data_store from pyaff4 import lexicon from pyaff4 import rdfvalue from pyaff4 import zip from pyaff4 import plugins class AFF4ImageTest(unittest.TestCase): filename = "/tmp/aff4_test.zip" filename_urn = rdfvalue.URN.FromFileName(filename) image_name = "image.dd" def tearDown(self): try: os.unlink(self.filename) except (IOError, OSError): pass def setUp(self): with data_store.MemoryDataStore() as resolver: resolver.Set(self.filename_urn, lexicon.AFF4_STREAM_WRITE_MODE, rdfvalue.XSDString("truncate")) with zip.ZipFile.NewZipFile(resolver, self.filename_urn) as zip_file: self.volume_urn = zip_file.urn image_urn = self.volume_urn.Append(self.image_name) # Use default compression. with aff4_image.AFF4Image.NewAFF4Image( resolver, image_urn, self.volume_urn) as image: image.chunk_size = 10 image.chunks_per_segment = 3 for i in range(100): image.Write(b"Hello world %02d!" % i) self.image_urn = image.urn # Write a snappy compressed image. self.image_urn_2 = self.image_urn.Append("2") with aff4_image.AFF4Image.NewAFF4Image( resolver, self.image_urn_2, self.volume_urn) as image_2: image_2.compression = lexicon.AFF4_IMAGE_COMPRESSION_SNAPPY image_2.Write(b"This is a test") # Use streaming API to write image. self.image_urn_3 = self.image_urn.Append("3") with aff4_image.AFF4Image.NewAFF4Image( resolver, self.image_urn_3, self.volume_urn) as image: image.chunk_size = 10 image.chunks_per_segment = 3 stream = io.BytesIO() for i in range(100): stream.write(b"Hello world %02d!" % i) stream.seek(0) image.WriteStream(stream) def testOpenImageByURN(self): resolver = data_store.MemoryDataStore() # This is required in order to load and parse metadata from this volume # into a fresh empty resolver. with zip.ZipFile.NewZipFile(resolver, self.filename_urn) as zip_file: image_urn = zip_file.urn.Append(self.image_name) with resolver.AFF4FactoryOpen(image_urn) as image: self.assertEquals(image.chunk_size, 10) self.assertEquals(image.chunks_per_segment, 3) self.assertEquals( b"Hello world 00!Hello world 01!Hello world 02!Hello world 03!" + b"Hello world 04!Hello world 05!Hello worl", image.Read(100)) self.assertEquals(1500, image.Size()) # Now test snappy decompression. with resolver.AFF4FactoryOpen(self.image_urn_2) as image_2: self.assertEquals( resolver.Get(image_2.urn, lexicon.AFF4_IMAGE_COMPRESSION), lexicon.AFF4_IMAGE_COMPRESSION_SNAPPY) data = image_2.Read(100) self.assertEquals(data, b"This is a test") # Now test streaming API image. with resolver.AFF4FactoryOpen(self.image_urn_3) as image_3: self.assertEquals(image_3.chunk_size, 10) self.assertEquals(image_3.chunks_per_segment, 3) self.assertEquals( b"Hello world 00!Hello world 01!Hello world 02!Hello world 03!"+ b"Hello world 04!Hello world 05!Hello worl", image_3.Read(100)) if __name__ == '__main__': #logging.getLogger().setLevel(logging.DEBUG) unittest.main() pyaff4-0.26.post6/pyaff4/data_store_test.py0000664000175000017500000001126413211617552021136 0ustar rhertzogrhertzogfrom __future__ import unicode_literals # Copyright 2015 Google Inc. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); you may not # use this file except in compliance with the License. You may obtain a copy of # the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations under # the License. from future import standard_library standard_library.install_aliases() from pyaff4 import aff4 from pyaff4 import data_store from pyaff4 import lexicon from pyaff4 import rdfvalue import unittest import io class DataStoreTest(unittest.TestCase): def setUp(self): self.hello_urn = rdfvalue.URN("aff4://hello") self.store = data_store.MemoryDataStore() self.store.Set( self.hello_urn, rdfvalue.URN(lexicon.AFF4_IMAGE_COMPRESSION_SNAPPY), rdfvalue.XSDString("foo")) self.store.Set( self.hello_urn, rdfvalue.URN(lexicon.AFF4_TYPE), rdfvalue.XSDString("bar")) def testDataStore(self): result = self.store.Get(self.hello_urn, rdfvalue.URN( lexicon.AFF4_IMAGE_COMPRESSION_SNAPPY)) self.assertEquals(type(result), rdfvalue.XSDString) self.assertEquals(result.SerializeToString(), b"foo") self.store.Set( self.hello_urn, rdfvalue.URN(lexicon.AFF4_IMAGE_COMPRESSION_SNAPPY), rdfvalue.XSDString("bar")) # In the current implementation a second Set() overwrites the previous # value. self.assertEquals( self.store.Get(self.hello_urn, rdfvalue.URN( lexicon.AFF4_IMAGE_COMPRESSION_SNAPPY)), rdfvalue.XSDString("bar")) def testTurtleSerialization(self): data = self.store.DumpToTurtle(verbose=True) new_store = data_store.MemoryDataStore() new_store.LoadFromTurtle(io.BytesIO(data)) res = new_store.Get(self.hello_urn, rdfvalue.URN( lexicon.AFF4_IMAGE_COMPRESSION_SNAPPY)) self.assertEquals(res, b"foo") class AFF4ObjectCacheMock(data_store.AFF4ObjectCache): def GetKeys(self): return [entry.key for entry in self.lru_list] def GetInUse(self): return [key for key in self.in_use] class AFF4ObjectCacheTest(unittest.TestCase): def testLRU(self): cache = AFF4ObjectCacheMock(3) resolver = data_store.MemoryDataStore() obj1 = aff4.AFF4Object(resolver, "a") obj2 = aff4.AFF4Object(resolver, "b") obj3 = aff4.AFF4Object(resolver, "c") obj4 = aff4.AFF4Object(resolver, "d") cache.Put(obj1) cache.Put(obj2) cache.Put(obj3) result = cache.GetKeys() # Keys are stored as serialized urns. self.assertEquals(result[0], b"file:///c") self.assertEquals(result[1], b"file:///b") self.assertEquals(result[2], b"file:///a") # This removes the object from the cache and places it in the in_use # list. self.assertEquals(cache.Get("file:///a"), obj1) # Keys are stored as serialized urns. result = cache.GetKeys() self.assertEquals(len(result), 2) self.assertEquals(result[0], b"file:///c") self.assertEquals(result[1], b"file:///b") # Keys are stored as serialized urns. in_use = cache.GetInUse() self.assertEquals(len(in_use), 1) self.assertEquals(in_use[0], b"file:///a") # Now we return the object. It should now appear in the lru lists. cache.Return(obj1) result = cache.GetKeys() self.assertEquals(len(result), 3) self.assertEquals(result[0], b"file:///a") self.assertEquals(result[1], b"file:///c") self.assertEquals(result[2], b"file:///b") in_use = cache.GetInUse() self.assertEquals(len(in_use), 0) # Over flow the cache - this should expire the older object. cache.Put(obj4) result = cache.GetKeys() self.assertEquals(len(result), 3) self.assertEquals(result[0], b"file:///d") self.assertEquals(result[1], b"file:///a") self.assertEquals(result[2], b"file:///c") # b is now expired so not in cache. self.assertEquals(cache.Get("file:///b"), None) # Check that remove works cache.Remove(obj4) self.assertEquals(cache.Get("file:///d"), None) result = cache.GetKeys() self.assertEquals(len(result), 2) if __name__ == '__main__': unittest.main() pyaff4-0.26.post6/pyaff4/zip_test.py0000664000175000017500000000631413211617552017613 0ustar rhertzogrhertzogfrom __future__ import unicode_literals # Copyright 2015 Google Inc. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); you may not # use this file except in compliance with the License. You may obtain a copy of # the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations under # the License. from future import standard_library standard_library.install_aliases() import os import io import unittest from pyaff4 import data_store from pyaff4 import lexicon from pyaff4 import plugins from pyaff4 import rdfvalue from pyaff4 import zip class ZipTest(unittest.TestCase): filename = "/tmp/aff4_test.zip" filename_urn = rdfvalue.URN.FromFileName(filename) segment_name = "Foobar.txt" streamed_segment = "streamed.txt" data1 = b"I am a segment!" data2 = b"I am another segment!" def setUp(self): with data_store.MemoryDataStore() as resolver: resolver.Set(self.filename_urn, lexicon.AFF4_STREAM_WRITE_MODE, rdfvalue.XSDString("truncate")) with zip.ZipFile.NewZipFile(resolver, self.filename_urn) as zip_file: self.volume_urn = zip_file.urn segment_urn = self.volume_urn.Append(self.segment_name) with zip_file.CreateMember(segment_urn) as segment: segment.Write(self.data1) with zip_file.CreateMember(segment_urn) as segment2: segment2.Seek(0, 2) segment2.Write(self.data2) streamed_urn = self.volume_urn.Append(self.streamed_segment) with zip_file.CreateMember(streamed_urn) as streamed: streamed.compression_method = zip.ZIP_DEFLATE src = io.BytesIO(self.data1) streamed.WriteStream(src) def tearDown(self): try: os.unlink(self.filename) except (IOError, OSError): pass def testStreamedSegment(self): resolver = data_store.MemoryDataStore() # This is required in order to load and parse metadata from this volume # into a fresh empty resolver. with zip.ZipFile.NewZipFile(resolver, self.filename_urn) as zip_file: segment_urn = zip_file.urn.Append(self.streamed_segment) with resolver.AFF4FactoryOpen(segment_urn) as segment: self.assertEquals(segment.Read(1000), self.data1) def testOpenSegmentByURN(self): resolver = data_store.MemoryDataStore() # This is required in order to load and parse metadata from this volume # into a fresh empty resolver. with zip.ZipFile.NewZipFile(resolver, self.filename_urn) as zip_file: segment_urn = zip_file.urn.Append(self.segment_name) with resolver.AFF4FactoryOpen(segment_urn) as segment: self.assertEquals(segment.Read(1000), self.data1 + self.data2) if __name__ == '__main__': unittest.main() pyaff4-0.26.post6/pyaff4/aff4_map.py0000664000175000017500000004422013211617552017425 0ustar rhertzogrhertzog# Copyright 2014 Google Inc. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); you may not # use this file except in compliance with the License. You may obtain a copy of # the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations under # the License. """This module implements the standard AFF4 Image.""" from __future__ import print_function from __future__ import unicode_literals from builtins import str from builtins import object import collections import intervaltree import logging import struct from pyaff4 import aff4 from pyaff4 import aff4_image from pyaff4 import lexicon from pyaff4 import rdfvalue from pyaff4 import registry from pyaff4 import utils LOGGER = logging.getLogger("pyaff4") class Range(collections.namedtuple( "Range", "map_offset length target_offset target_id")): """A class to manipulate a mapping range.""" __slots__ = () format_str = "[%x:%x)@%s>" % ( self.map_offset, self.length, self.target_offset, self.length, self.target_id) def Merge(self, other): """Merge two ranges together. Raises ValueError if the ranges can not be merged. """ if (other.target_id != self.target_id or self.target_offset_at_map_offset(self.map_offset) != other.target_offset_at_map_offset(self.map_offset)): raise ValueError("Ranges not mergeable") start = min(self.map_offset, other.map_offset) end = max(self.map_end, other.map_end) result = self._replace( map_offset=start, length=end-start, target_offset=self.target_offset_at_map_offset(start)) return result def left_clip(self, offset): """Clip this range at the left side with offset.""" if not self.map_offset <= offset <= self.map_end: raise ValueError("clip offset is not inside range") adjustment = offset - self.map_offset return self._replace(map_offset=self.map_offset + adjustment, target_offset=self.target_offset + adjustment, length=self.length - adjustment) def right_clip(self, offset): """Clip this range at the right side with offset.""" if not self.map_offset <= offset <= self.map_end: raise ValueError("clip offset is not inside range") adjustment = self.map_end - offset return self._replace(length=self.length - adjustment) class _MapStreamHelper(object): def __init__(self, resolver, source, destination): self.resolver = resolver self.range_offset = 0 self.readptr = 0 self.source = source self.destination = destination self.source_ranges = sorted(source.tree) if not self.source_ranges: raise RuntimeError("Source map is empty when calling WriteStream()") self.current_range_idx = 0 def tell(self): return self.source.tell() def read(self, length): # This is the data stream of the map we are writing to (i.e. the new # image we are creating). target = self.destination.GetBackingStream() result = b"" # Need more data - read more. while len(result) < length: # We are done! All source ranges read. if self.current_range_idx >= len(self.source_ranges): break current_range = self.source_ranges[self.current_range_idx].data # Add a range if we are at the beginning of a range. if self.range_offset == 0: self.destination.AddRange( current_range.map_offset, # This is the current offset in the data stream. self.readptr, current_range.length, target) # Read as much data as possible from this range. to_read = min( # How much we need. length - len(result), # How much is available in this range. current_range.length - self.range_offset) # Range is exhausted - get the next range. if to_read == 0: self.current_range_idx += 1 self.range_offset = 0 continue # Read and copy the data. source_urn = self.source.targets[current_range.target_id] with self.resolver.AFF4FactoryOpen(source_urn) as source: source.Seek(current_range.target_offset + self.range_offset) data = source.Read(to_read) if not data: break result += data self.range_offset += len(data) # Keep track of all the data we have released. self.readptr += len(data) return result class AFF4Map(aff4.AFF4Stream): def __init__(self, *args, **kwargs): super(AFF4Map, self).__init__(*args, **kwargs) self.targets = [] self.target_idx_map = {} self.tree = intervaltree.IntervalTree() self.last_target = None @staticmethod def NewAFF4Map(resolver, image_urn, volume_urn): with resolver.AFF4FactoryOpen(volume_urn) as volume: # Inform the volume that we have a new image stream contained within # it. volume.children.add(image_urn) resolver.Set(image_urn, lexicon.AFF4_TYPE, rdfvalue.URN( lexicon.AFF4_MAP_TYPE)) resolver.Set(image_urn, lexicon.AFF4_STORED, rdfvalue.URN(volume_urn)) return resolver.AFF4FactoryOpen(image_urn) def deserializeMapPoint(self, data): return Range.FromSerialized(data) def LoadFromURN(self): map_urn = self.urn.Append("map") map_idx_urn = self.urn.Append("idx") # Parse the map out of the map stream. If the stream does not exist yet # we just start with an empty map. try: with self.resolver.AFF4FactoryOpen(map_idx_urn) as map_idx: self.targets = [rdfvalue.URN(utils.SmartUnicode(x)) for x in map_idx.Read(map_idx.Size()).splitlines()] with self.resolver.AFF4FactoryOpen(map_urn) as map_stream: read_length = struct.calcsize(Range.format_str) while 1: data = map_stream.Read(read_length) if not data: break range = self.deserializeMapPoint(data) if range.length > 0: self.tree.addi(range.map_offset, range.map_end, range) except IOError: pass def Read(self, length): result = b"" for interval in sorted(self.tree[self.readptr:self.readptr+length]): range = interval.data # The start of the range is ahead of us - we pad with zeros. if range.map_offset > self.readptr: padding = min(length, range.map_offset - self.readptr) result += b"\x00" * padding self.readptr += padding length -= padding if length == 0: break target = self.targets[range.target_id] length_to_read_in_target = min(length, range.map_end - self.readptr) try: with self.resolver.AFF4FactoryOpen(target) as target_stream: target_stream.Seek( range.target_offset_at_map_offset(self.readptr)) buffer = target_stream.Read(length_to_read_in_target) assert len(buffer) == length_to_read_in_target result += buffer except IOError: LOGGER.debug("*** Stream %s not found. Substituting zeros. ***", target_stream) result += b"\x00" * length_to_read_in_target finally: length -= length_to_read_in_target self.readptr += length_to_read_in_target if result: return result return b"\x00" * length def Size(self): return self.tree.end() def AddRange(self, map_offset, target_offset, length, target): """Add a new mapping range.""" rdfvalue.AssertURN(target) self.last_target = target target_id = self.target_idx_map.get(target) if target_id is None: target_id = self.target_idx_map[target] = len(self.targets) self.targets.append(target) range = Range(map_offset, length, target_offset, target_id) # Try to merge with the left interval. left_interval = self.tree[range.map_offset-1] if left_interval: left_interval = left_interval.pop() try: range = range.Merge(left_interval.data) except ValueError: left_range = left_interval.data.right_clip(range.map_offset) # If the interval has not changed, then adding it to three will # not result in an additional interval (since the tree tries to # de-dup intervals). Therefore we will end up removing the # interval completely below. Therefore if clipping the interval # does not change it, we must discard the interval completely. if left_range == left_interval.data: left_interval = None else: self.tree.addi( left_range.map_offset, left_range.map_end, left_range) # Try to merge with the right interval. right_interval = self.tree[range.map_end+1] if right_interval: right_interval = right_interval.pop() try: range = range.Merge(right_interval.data) except ValueError: right_range = right_interval.data.left_clip(range.map_end) if right_range == right_interval.data: right_interval = None else: self.tree.addi( right_range.map_offset, right_range.map_end, right_range) # Remove the left and right intervals now. This must be done at this # point to allow for the case where left interval == right interval # (i.e. the same interval intersects both start and end). if left_interval: self.tree.remove(left_interval) if right_interval and right_interval != left_interval: self.tree.remove(right_interval) # Remove any intervals inside this range. self.tree.remove_envelop(range.map_offset, range.map_end) # Add the new interval. if range.length > 0: self.tree[range.map_offset:range.map_end] = range self.MarkDirty() def Flush(self): if self.IsDirty(): # Get the volume we are stored on. volume_urn = self.resolver.Get(self.urn, lexicon.AFF4_STORED) with self.resolver.AFF4FactoryOpen(volume_urn) as volume: with volume.CreateMember(self.urn.Append("map")) as map_stream: for interval in self.tree: map_stream.Write(interval.data.Serialize()) self.resolver.Close(map_stream) with volume.CreateMember(self.urn.Append("idx")) as idx_stream: idx_stream.Write(b"\n".join( [x.SerializeToString() for x in self.targets])) self.resolver.Close(idx_stream) for target in self.targets: with self.resolver.AFF4FactoryOpen(target) as stream: pass self.resolver.Close(stream) return super(AFF4Map, self).Flush() def WriteStream(self, source, progress=None): data_stream_urn = self.GetBackingStream() with self.resolver.AFF4FactoryOpen(data_stream_urn) as data_stream: # If we write from another map we need to wrap the map in the # helper, otherwise we just copy the source into our data stream and # create a single range over the whole stream. if isinstance(source, AFF4Map): data_stream.WriteStream( _MapStreamHelper(self.resolver, source, self), progress) else: data_stream.WriteStream(source, progress) # Add a single range to cover the bulk of the image. self.AddRange(0, data_stream.Size(), data_stream.Size(), data_stream.urn) def GetBackingStream(self): """Returns the URN of the backing data stream of this map.""" if self.targets: target = self.last_target else: target = self.urn.Append("data") try: with self.resolver.AFF4FactoryOpen(target) as stream: # Backing stream is fine - just use it. return stream.urn except IOError: # If the backing stream does not already exist, we make one. volume_urn = self.resolver.Get(self.urn, lexicon.AFF4_STORED) compression_urn = self.resolver.Get( target, lexicon.AFF4_IMAGE_COMPRESSION) LOGGER.info("Stream will be compressed with %s", compression_urn) # If the stream should not be compressed, it is more efficient to # use a native volume member (e.g. ZipFileSegment or # FileBackedObjects) than the more complex bevy based images. if compression_urn == lexicon.AFF4_IMAGE_COMPRESSION_STORED: with self.resolver.AFF4FactoryOpen(volume_urn) as volume: with volume.CreateMember(target) as member: return member.urn with aff4_image.AFF4Image.NewAFF4Image( self.resolver, target, volume_urn) as stream: return stream.urn def Write(self, data): self.MarkDirty() target = self.GetBackingStream() with self.resolver.AFF4FactoryOpen(target) as stream: self.AddRange(self.readptr, stream.Size(), len(data), target) # Append the data on the end of the stream. stream.Seek(stream.Size()) stream.Write(data) self.readptr += len(data) return len(data) def GetRanges(self): return sorted([x.data for x in self.tree]) def Clear(self): self.targets = [] self.target_idx_map.clear() self.tree.clear() # Rekall/libAFF4 accidentally swapped the struct in Evimetry's update map class ScudetteAFF4Map(AFF4Map): def deserializeMapPoint(self, data): # swap them back range = Range.FromSerialized(data) return Range(range[0], range[2], range[1], range[3]) class AFF4Map2(AFF4Map): def LoadFromURN(self): map_urn = self.urn.Append("map") map_idx_urn = self.urn.Append("idx") # Parse the map out of the map stream. If the stream does not exist yet # we just start with an empty map. try: with self.resolver.AFF4FactoryOpen(map_idx_urn) as map_idx: self.targets = [rdfvalue.URN(utils.SmartUnicode(x)) for x in map_idx.Read(map_idx.Size()).splitlines()] with self.resolver.AFF4FactoryOpen(map_urn) as map_stream: format_str = " 0: self.tree.addi(range.map_offset, range.map_end, range) lastUpperOffset = upperOffset lastLowerOffset = lowerOffset lastLength = length lastTarget = target range = Range.FromList([lastUpperOffset, lastLength, lastLowerOffset, lastTarget]) if range.length > 0: self.tree.addi(range.map_offset, range.map_end, range) except IOError: pass registry.AFF4_TYPE_MAP[lexicon.AFF4_MAP_TYPE] = AFF4Map2 registry.AFF4_TYPE_MAP[lexicon.AFF4_LEGACY_MAP_TYPE] = AFF4Map registry.AFF4_TYPE_MAP[lexicon.AFF4_SCUDETTE_MAP_TYPE] = ScudetteAFF4Map pyaff4-0.26.post6/pyaff4/_version.py0000664000175000017500000000641713211617552017602 0ustar rhertzogrhertzog # Machine Generated - do not edit! # This file is produced when the main "version.py update" command is run. That # command copies this file to all sub-packages which contain # setup.py. Configuration is maintain in version.yaml at the project's top # level. def get_versions(): return tag_version_data(raw_versions(), """version.yaml""") def raw_versions(): return json.loads(""" { "post": "6", "version": "0.26", "rc": "0" } """) import json import os import subprocess try: # We are looking for the git repo which contains this file. MY_DIR = os.path.dirname(os.path.abspath(__file__)) except: MY_DIR = None def is_tree_dirty(): try: return bool(subprocess.check_output( ["git", "diff", "--name-only"], stderr=subprocess.PIPE, cwd=MY_DIR, ).splitlines()) except (OSError, subprocess.CalledProcessError): return False def get_version_file_path(version_file="version.yaml"): try: return os.path.join(subprocess.check_output( ["git", "rev-parse", "--show-toplevel"], stderr=subprocess.PIPE, cwd=MY_DIR, ).decode("utf-8").strip(), version_file) except (OSError, subprocess.CalledProcessError): return None def number_of_commit_since(version_file="version.yaml"): """Returns the number of commits since version.yaml was changed.""" try: last_commit_to_touch_version_file = subprocess.check_output( ["git", "log", "--no-merges", "-n", "1", "--pretty=format:%H", version_file], cwd=MY_DIR, stderr=subprocess.PIPE, ).strip() all_commits = subprocess.check_output( ["git", "log", "--no-merges", "-n", "1000", "--pretty=format:%H"], stderr=subprocess.PIPE, cwd=MY_DIR, ).splitlines() return all_commits.index(last_commit_to_touch_version_file) except (OSError, subprocess.CalledProcessError, ValueError): return None def get_current_git_hash(): try: return subprocess.check_output( ["git", "log", "--no-merges", "-n", "1", "--pretty=format:%H"], stderr=subprocess.PIPE, cwd=MY_DIR, ).strip() except (OSError, subprocess.CalledProcessError): return None def tag_version_data(version_data, version_path="version.yaml"): current_hash = get_current_git_hash() # Not in a git repository. if current_hash is None: version_data["error"] = "Not in a git repository." else: version_data["revisionid"] = current_hash version_data["dirty"] = is_tree_dirty() version_data["dev"] = number_of_commit_since( get_version_file_path(version_path)) # Format the version according to pep440: pep440 = version_data["version"] if int(version_data.get("post", 0)) > 0: pep440 += ".post" + version_data["post"] elif int(version_data.get("rc", 0)) > 0: pep440 += ".rc" + version_data["rc"] if version_data.get("dev", 0): # A Development release comes _before_ the main release. last = version_data["version"].rsplit(".", 1) version_data["version"] = "%s.%s" % (last[0], int(last[1]) + 1) pep440 = version_data["version"] + ".dev" + str(version_data["dev"]) version_data["pep440"] = pep440 return version_data pyaff4-0.26.post6/pyaff4/test.sh0000775000175000017500000000007213211617552016711 0ustar rhertzogrhertzog#!/bin/bash python -m unittest discover -p '*test.py' -v pyaff4-0.26.post6/pyaff4/aff4_directory_test.py0000664000175000017500000000513513211617552021715 0ustar rhertzogrhertzogfrom __future__ import unicode_literals # Copyright 2015 Google Inc. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); you may not # use this file except in compliance with the License. You may obtain a copy of # the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations under # the License. import unittest from pyaff4 import aff4_directory from pyaff4 import aff4_utils from pyaff4 import data_store from pyaff4 import lexicon from pyaff4 import rdfvalue from pyaff4 import plugins class AFF4DirectoryTest(unittest.TestCase): root_path = "/tmp/aff4_directory/" segment_name = "Foobar.txt" def tearDown(self): aff4_utils.RemoveDirectory(self.root_path) def setUp(self): with data_store.MemoryDataStore() as resolver: root_urn = rdfvalue.URN.NewURNFromFilename(self.root_path) resolver.Set(root_urn, lexicon.AFF4_STREAM_WRITE_MODE, rdfvalue.XSDString("truncate")) with aff4_directory.AFF4Directory.NewAFF4Directory( resolver, root_urn) as volume: segment_urn = volume.urn.Append(self.segment_name) with volume.CreateMember(segment_urn) as member: member.Write(b"Hello world") resolver.Set( member.urn, lexicon.AFF4_STREAM_ORIGINAL_FILENAME, rdfvalue.XSDString(self.root_path + self.segment_name)) def testCreateMember(self): with data_store.MemoryDataStore() as resolver: root_urn = rdfvalue.URN.NewURNFromFilename(self.root_path) with aff4_directory.AFF4Directory.NewAFF4Directory( resolver, root_urn) as directory: # Check for member. child_urn = directory.urn.Append(self.segment_name) with resolver.AFF4FactoryOpen(child_urn) as child: self.assertEquals(child.Read(10000), b"Hello world") # Check that the metadata is carried over. filename = resolver.Get( child_urn, lexicon.AFF4_STREAM_ORIGINAL_FILENAME) self.assertEquals(filename, self.root_path + self.segment_name) if __name__ == '__main__': #logging.getLogger().setLevel(logging.DEBUG) unittest.main() pyaff4-0.26.post6/pyaff4/utils.py0000664000175000017500000000343313211617552017111 0ustar rhertzogrhertzog"""Some utility functions.""" from __future__ import unicode_literals __author__ = "Michael Cohen " import six from future import types def SmartStr(string, encoding="utf8"): """Forces the string to be an encoded byte string.""" if six.PY3: if isinstance(string, str): return string.encode(encoding, "ignore") elif isinstance(string, bytes): return string elif hasattr(string, "__bytes__"): return string.__bytes__() return str(string).encode(encoding) if six.PY2: if type(string) is str: return string elif type(string) is unicode: return string.encode(encoding) elif hasattr(string, "__bytes__"): return string.__bytes__() return unicode(string).encode(encoding) def SmartUnicode(string, encoding="utf8"): """Forces the string into a unicode object.""" if six.PY3: if isinstance(string, bytes): return string.decode(encoding) # Call the object's __str__ method which should return an unicode # object. return str(string) elif six.PY2: if isinstance(string, str): return string.decode(encoding) return unicode(string) def AssertStr(string): if six.PY3: if type(string) is not bytes: raise RuntimeError("String must be bytes.") elif six.PY2: if type(string) not in (str, types.newstr): raise RuntimeError("String must be bytes.") def AssertUnicode(string): if six.PY3: if type(string) is not str: raise RuntimeError("String must be unicode.") elif six.PY2: if type(string) not in (unicode, types.newstr): raise RuntimeError("String must be unicode.") pyaff4-0.26.post6/pyaff4/registry.py0000664000175000017500000000143013211617552017614 0ustar rhertzogrhertzog# Copyright 2014 Google Inc. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); you may not # use this file except in compliance with the License. You may obtain a copy of # the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations under # the License. """Various registries.""" from __future__ import unicode_literals # Global registry for AFF4 object implementations. AFF4_TYPE_MAP = {} # Registry for RDF type implementations. RDF_TYPE_MAP = {} pyaff4-0.26.post6/pyaff4/struct_parser.py0000664000175000017500000000476613211617552020663 0ustar rhertzogrhertzog"""An implementation of a struct parser which is fast and convenient.""" from __future__ import unicode_literals from builtins import zip from builtins import object import six import struct from pyaff4 import utils format_string_map = dict( uint64_t="Q", int64_t="q", uint32_t="I", uint16_t="H", int32_t="i", int16_t="h", ) class BaseParser(object): __slots__ = ("_data", "_fields", "_name", "_format_string", "_defaults") def __init__(self, data=None, **kwargs): if data is None: self._data = self._defaults[:] else: self._data = list( struct.unpack_from(self._format_string, data)) if kwargs: for k, v in list(kwargs.items()): setattr(self, k, v) def __str__(self): result = ["Struct %s" % self._name] for field, data in zip(self._fields, self._data): result.append(" %s: %s" % (field, data)) return "\n".join(result) def Pack(self): return struct.pack(self._format_string, *self._data) @classmethod def sizeof(cls): return struct.calcsize(cls._format_string) def CreateStruct(struct_name, definition): fields = [] format_string = ["<"] defaults = [] for line in definition.splitlines(): line = line.strip(" ;") components = line.split() if len(components) >= 2: type_format_char = format_string_map.get(components[0]) name = components[1] if type_format_char is None: raise RuntimeError("Invalid definition %r" % line) try: if components[2] != "=": raise RuntimeError("Invalid definition %r" % line) defaults.append(int(components[3], 0)) except IndexError: defaults.append(0) format_string.append(type_format_char) fields.append(name) properties = dict( _format_string="".join(format_string), _fields=fields, _defaults=defaults, _name=struct_name) # Make accessors for all fields. for i, field in enumerate(fields): def setx(self, value, i=i): self._data[i] = value def getx(self, i=i): return self._data[i] properties[field] = property(getx, setx) if six.PY2: return type(utils.SmartStr(struct_name), (BaseParser,), properties) else: return type(utils.SmartUnicode(struct_name), (BaseParser,), properties) pyaff4-0.26.post6/pyaff4/__init__.py0000664000175000017500000000120413211617552017502 0ustar rhertzogrhertzogfrom __future__ import unicode_literals from ._version import get_versions __version__ = get_versions()['pep440'] # Add dummy imports for pyinstaller. These should probably belong in # future since they are needed for pyinstaller to properly handle # future.standard_library.install_aliases(). See # https://github.com/google/rekall/issues/303 if 0: import UserList import UserString import UserDict import itertools import collections import future.backports.misc import commands import base64 import __buildin__ import math import reprlib import functools import re import subprocess pyaff4-0.26.post6/pyaff4/aff4_imager_utils.py0000664000175000017500000000134613211617552021336 0ustar rhertzogrhertzog# Copyright 2015 Google Inc. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); you may not # use this file except in compliance with the License. You may obtain a copy of # the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations under # the License. """Utilities for AFF4 imaging. These are mostly high level utilities used by the command line imager. """ from __future__ import unicode_literals pyaff4-0.26.post6/pyaff4/linear_hasher.py0000664000175000017500000001011213211617552020545 0ustar rhertzogrhertzogfrom __future__ import absolute_import from __future__ import unicode_literals # Copyright 2016,2017 Schatz Forensic Pty Ltd. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); you may not # use this file except in compliance with the License. You may obtain a copy of # the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations under # the License. from builtins import object import hashlib import rdflib from pyaff4 import block_hasher from pyaff4 import container from pyaff4 import data_store from pyaff4 import hashes from pyaff4 import lexicon from pyaff4 import rdfvalue from pyaff4 import zip class LinearHasher(object): def __init__(self, listener=None): if listener == None: self.listener = block_hasher.ValidationListener() else: self.listener = listener self.delegate = None def hash(self, urn, mapURI, hashDataType): lex = container.Container.identifyURN(urn) resolver = data_store.MemoryDataStore(lex) with zip.ZipFile.NewZipFile(resolver, urn) as zip_file: if lex == lexicon.standard: self.delegate = InterimStdLinearHasher(resolver, lex, self.listener) elif lex == lexicon.legacy: self.delegate = PreStdLinearHasher(resolver, lex, self.listener) elif lex == lexicon.scudette: self.delegate = ScudetteLinearHasher(resolver, lex, self.listener) else: raise ValueError return self.delegate.doHash(mapURI, hashDataType) def hashMulti(self, urna, urnb, mapURI, hashDataType): lex = container.Container.identifyURN(urna) resolver = data_store.MemoryDataStore(lex) with zip.ZipFile.NewZipFile(resolver, urna) as zip_filea: with zip.ZipFile.NewZipFile(resolver, urnb) as zip_fileb: if lex == lexicon.standard: self.delegate = InterimStdLinearHasher(resolver, lex, self.listener) elif lex == lexicon.legacy: self.delegate = PreStdLinearHasher(resolver, lex, self.listener) else: raise ValueError return self.delegate.doHash(mapURI, hashDataType) def doHash(self, mapURI, hashDataType): hash = hashes.new(hashDataType) if not self.isMap(mapURI): import pdb; pdb.set_trace() if self.isMap(mapURI): with self.resolver.AFF4FactoryOpen(mapURI) as mapStream: remaining = mapStream.Size() count = 0 while remaining > 0: toRead = min(32*1024, remaining) data = mapStream.Read(toRead) assert len(data) == toRead remaining -= len(data) hash.update(data) count = count + 1 b = hash.hexdigest() return hashes.newImmutableHash(b, hashDataType) raise Exception("IllegalState") def isMap(self, stream): for type in self.resolver.QuerySubjectPredicate(stream, lexicon.AFF4_TYPE): if self.lexicon.map == type: return True return False class PreStdLinearHasher(LinearHasher): def __init__(self, resolver, lex, listener=None): LinearHasher.__init__(self, listener) self.lexicon = lex self.resolver = resolver class InterimStdLinearHasher(LinearHasher): def __init__(self, resolver, lex, listener=None): LinearHasher.__init__(self, listener) self.lexicon = lex self.resolver = resolver class ScudetteLinearHasher(LinearHasher): def __init__(self, resolver, lex, listener=None): LinearHasher.__init__(self, listener) self.lexicon = lex self.resolver = resolver pyaff4-0.26.post6/pyaff4/container_test.py0000664000175000017500000000075213211617552020773 0ustar rhertzogrhertzogfrom __future__ import print_function from __future__ import unicode_literals import os import unittest from pyaff4 import container from pyaff4 import hashing_test class ContainerTest(unittest.TestCase): @hashing_test.conditional_on_images def testOpen(self): fd = container.Container.open(hashing_test.stdLinear) self.assertEqual(fd.urn, u"aff4://fcbfdce7-4488-4677-abf6-08bc931e195b") if __name__ == '__main__': unittest.main() pyaff4-0.26.post6/pyaff4/plugins.py0000664000175000017500000000154713211617552017436 0ustar rhertzogrhertzogfrom __future__ import unicode_literals # Copyright 2014 Google Inc. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); you may not # use this file except in compliance with the License. You may obtain a copy of # the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations under # the License. from pyaff4 import aff4 from pyaff4 import aff4_directory try: from pyaff4 import aff4_cloud except ImportError: pass from pyaff4 import aff4_file from pyaff4 import aff4_image from pyaff4 import aff4_map from pyaff4 import zip pyaff4-0.26.post6/pyaff4/container.py0000664000175000017500000001122113211617552017725 0ustar rhertzogrhertzogfrom __future__ import print_function from __future__ import absolute_import from __future__ import unicode_literals # Copyright 2016,2017 Schatz Forensic Pty Ltd. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); you may not # use this file except in compliance with the License. You may obtain a copy of # the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations under # the License. from builtins import next from builtins import str from builtins import object from pyaff4 import data_store from pyaff4 import hashes from pyaff4 import lexicon from pyaff4 import aff4_map from pyaff4 import rdfvalue from pyaff4 import aff4 import yaml from pyaff4 import zip localcache = {} class Container(object): def __init__(self): pass @staticmethod def identify(filename): """Public method to identify a filename as an AFF4 container.""" return Container.identifyURN(rdfvalue.URN.FromFileName(filename)) @staticmethod def identifyURN(urn): resolver = data_store.MemoryDataStore(lexicon.standard) with zip.ZipFile.NewZipFile(resolver, urn) as zip_file: if len(list(zip_file.members.keys())) == 0: # it's a new zipfile raise IOError("Not an AFF4 Volume") try: # AFF4 Std v1.0 introduced the version file version = zip_file.OpenZipSegment("version.txt") resolver.Close(version) return lexicon.standard except: if str(resolver.aff4NS) == lexicon.AFF4_NAMESPACE: # Rekall defined the new AFF4 namespace post the Wirespeed paper return lexicon.scudette else: # Wirespeed (Evimetry) 1.x and Evimetry 2.x stayed with the original namespace return lexicon.legacy def isMap(self, stream): types = self.resolver.QuerySubjectPredicate(stream, lexicon.AFF4_TYPE) if self.lexicon.map in types: return True return False @staticmethod def open(filename): """Public method to open a filename as an AFF4 container.""" return Container.openURN(rdfvalue.URN.FromFileName(filename)) @staticmethod def openURN(urn): try: cached = localcache[urn] return cached except: lex = Container.identifyURN(urn) resolver = data_store.MemoryDataStore(lex) with zip.ZipFile.NewZipFile(resolver, urn) as zip_file: if lex == lexicon.standard: image = next(resolver.QueryPredicateObject(lexicon.AFF4_TYPE, lex.Image)) datastreams = list(resolver.QuerySubjectPredicate(image, lex.dataStream)) for stream in datastreams: if lex.map in resolver.QuerySubjectPredicate(stream, lexicon.AFF4_TYPE): res = resolver.AFF4FactoryOpen(stream) localcache[urn] = res res.parent = aff4.Image(resolver, urn=image) return res elif lex == lexicon.scudette: m = next(resolver.QueryPredicateObject(lexicon.AFF4_TYPE, lex.map)) cat = next(resolver.QuerySubjectPredicate(m, lex.category)) if cat == lex.memoryPhysical: res = resolver.AFF4FactoryOpen(m) localcache[urn] = res res.parent = aff4.Image(resolver, urn=m) legacyYamlInfoURI = res.urn.Append("information.yaml") with resolver.AFF4FactoryOpen(legacyYamlInfoURI) as fd: txt = fd.read(10000000) dt = yaml.safe_load(txt) try: CR3 = dt["Registers"]["CR3"] resolver.Add(res.parent.urn, lexicon.standard.memoryPageTableEntryOffset, rdfvalue.XSDInteger(CR3)) kaslr_slide = dt["kaslr_slide"] resolver.Add(res.parent.urn, lexicon.standard.OSXKALSRSlide, rdfvalue.XSDInteger(kaslr_slide)) except: pass return res pyaff4-0.26.post6/pyaff4.egg-info/0000755000175000017500000000000013552346257017075 5ustar rhertzogrhertzogpyaff4-0.26.post6/pyaff4.egg-info/top_level.txt0000664000175000017500000000000713211617576021623 0ustar rhertzogrhertzogpyaff4 pyaff4-0.26.post6/pyaff4.egg-info/SOURCES.txt0000664000175000017500000000167213211617576020766 0ustar rhertzogrhertzogMANIFEST.in README.txt setup.py pyaff4/__init__.py pyaff4/_version.py pyaff4/aff4.py pyaff4/aff4_cloud.py pyaff4/aff4_directory.py pyaff4/aff4_directory_test.py pyaff4/aff4_file.py pyaff4/aff4_image.py pyaff4/aff4_image_test.py pyaff4/aff4_imager_utils.py pyaff4/aff4_map.py pyaff4/aff4_map_test.py pyaff4/aff4_utils.py pyaff4/block_hasher.py pyaff4/container.py pyaff4/container_test.py pyaff4/data_store.py pyaff4/data_store_test.py pyaff4/hashes.py pyaff4/hashing_test.py pyaff4/lexicon.py pyaff4/linear_hasher.py pyaff4/plugins.py pyaff4/rdfvalue.py pyaff4/rdfvalue_test.py pyaff4/registry.py pyaff4/standards_test.py pyaff4/stream_factory.py pyaff4/stream_test.py pyaff4/struct_parser.py pyaff4/symbolic_streams.py pyaff4/test.sh pyaff4/test_memory.py pyaff4/utils.py pyaff4/zip.py pyaff4/zip_test.py pyaff4.egg-info/PKG-INFO pyaff4.egg-info/SOURCES.txt pyaff4.egg-info/dependency_links.txt pyaff4.egg-info/requires.txt pyaff4.egg-info/top_level.txtpyaff4-0.26.post6/pyaff4.egg-info/PKG-INFO0000664000175000017500000000440213211617576020171 0ustar rhertzogrhertzogMetadata-Version: 1.0 Name: pyaff4 Version: 0.26.post6 Summary: Python Advanced Forensic Format Version 4 library. Home-page: https://www.aff4.org/ Author: Michael Cohen Author-email: scudette@gmail.com License: UNKNOWN Description-Content-Type: UNKNOWN Description: # AFF4 -The Advanced Forensics File Format The Advanced Forensics File Format 4 (AFF4) is an open source format used for the storage of digital evidence and data. It was originally designed and published in [1] and has since been standardised as the AFF4 Standard v1.0, which is available at https://github.com/aff4/Standard . This project is a work in progress implementation, providing two library implementations, C/C++ and Python. ## What is currently supported. The focus of this implementation at present is reading images conforming with the AFF4 Standard v1.0. Canonical images are provided in the AFF4 Reference Images github project at https://github.com/aff4/ReferenceImages 1. Reading ZipFile style volumes. 2. Reading AFF4 Image streams using the deflate or snappy compressor. 3. Reading RDF metadata using Turtle (and in some instances YAML for backwards compatibility). ## What is not yet supported. The write support in the libraries is currently broken and being worked on. Other aspects of the AFF4 that have not yet been implemented in this codebase include: 1. Encrypted AFF4 volumes. 2. Persistent data store. 3. HTTP backed streams. 4. Splitting an AFF4 Image across multiple volumes. 5. Map streams. 6. Support for signed statements or Bill of Materials. 7. Logical file acquisition. # Notice This is not an official Google product (experimental or otherwise), it is just code that happens to be owned by Google. # References [1] "Extending the advanced forensic format to accommodate multiple data sources, logical evidence, arbitrary information and forensic workflow" M.I. Cohen, Simson Garfinkel and Bradley Schatz, digital investigation 6 (2009) S57–S68. Platform: UNKNOWN pyaff4-0.26.post6/pyaff4.egg-info/dependency_links.txt0000664000175000017500000000000113211617576023142 0ustar rhertzogrhertzog pyaff4-0.26.post6/pyaff4.egg-info/requires.txt0000664000175000017500000000022313211617576021471 0ustar rhertzogrhertzogfuture aff4-snappy==0.5.1 rdflib[sparql]==4.2.2 intervaltree==2.1.0 pyblake2==0.9.3 expiringdict==1.1.4 html5lib [cloud] google-api-python-client pyaff4-0.26.post6/setup.py0000664000175000017500000000372413211617552015723 0ustar rhertzogrhertzog# Copyright 2014 Google Inc. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); you may not # use this file except in compliance with the License. You may obtain a copy of # the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations under # the License. """This module installs the pyaff4 library.""" from setuptools import setup from setuptools.command.test import test as TestCommand try: with open('../README.md') as file: long_description = file.read() except IOError: long_description = "" ENV = {"__file__": __file__} exec(open("pyaff4/_version.py").read(), ENV) VERSION = ENV["get_versions"]() class NoseTestCommand(TestCommand): def finalize_options(self): TestCommand.finalize_options(self) self.test_args = [] self.test_suite = True def run_tests(self): # Run nose ensuring that argv simulates running nosetests directly import nose nose.run_exit(argv=['nosetests']) commands = {} commands["test"] = NoseTestCommand setup( name='pyaff4', long_description=long_description, version=VERSION["pep440"], cmdclass=commands, description='Python Advanced Forensic Format Version 4 library.', author='Michael Cohen', author_email='scudette@gmail.com', url='https://www.aff4.org/', packages=['pyaff4'], package_dir={"pyaff4": "pyaff4"}, install_requires=[ "future", "aff4-snappy == 0.5.1", "rdflib[sparql] == 4.2.2", "intervaltree == 2.1.0", "pyblake2 == 0.9.3", "expiringdict == 1.1.4", "html5lib", ], extras_require=dict( cloud="google-api-python-client" ) ) pyaff4-0.26.post6/MANIFEST.in0000664000175000017500000000013513211617552015740 0ustar rhertzogrhertzoginclude README.txt recursive-include pyaff4 * recursive-exclude * *.pyc exclude .gitignore pyaff4-0.26.post6/PKG-INFO0000664000175000017500000000440213211617576015306 0ustar rhertzogrhertzogMetadata-Version: 1.0 Name: pyaff4 Version: 0.26.post6 Summary: Python Advanced Forensic Format Version 4 library. Home-page: https://www.aff4.org/ Author: Michael Cohen Author-email: scudette@gmail.com License: UNKNOWN Description-Content-Type: UNKNOWN Description: # AFF4 -The Advanced Forensics File Format The Advanced Forensics File Format 4 (AFF4) is an open source format used for the storage of digital evidence and data. It was originally designed and published in [1] and has since been standardised as the AFF4 Standard v1.0, which is available at https://github.com/aff4/Standard . This project is a work in progress implementation, providing two library implementations, C/C++ and Python. ## What is currently supported. The focus of this implementation at present is reading images conforming with the AFF4 Standard v1.0. Canonical images are provided in the AFF4 Reference Images github project at https://github.com/aff4/ReferenceImages 1. Reading ZipFile style volumes. 2. Reading AFF4 Image streams using the deflate or snappy compressor. 3. Reading RDF metadata using Turtle (and in some instances YAML for backwards compatibility). ## What is not yet supported. The write support in the libraries is currently broken and being worked on. Other aspects of the AFF4 that have not yet been implemented in this codebase include: 1. Encrypted AFF4 volumes. 2. Persistent data store. 3. HTTP backed streams. 4. Splitting an AFF4 Image across multiple volumes. 5. Map streams. 6. Support for signed statements or Bill of Materials. 7. Logical file acquisition. # Notice This is not an official Google product (experimental or otherwise), it is just code that happens to be owned by Google. # References [1] "Extending the advanced forensic format to accommodate multiple data sources, logical evidence, arbitrary information and forensic workflow" M.I. Cohen, Simson Garfinkel and Bradley Schatz, digital investigation 6 (2009) S57–S68. Platform: UNKNOWN pyaff4-0.26.post6/README.txt0000664000175000017500000000225313211617552015703 0ustar rhertzogrhertzog# AFF4 -The Advanced Forensics File Format The Advanced Forensics File format 4 was originally designed and published in "Extending the advanced forensic format to accommodate multiple data sources, logical evidence, arbitrary information and forensic workflow" M.I. Cohen, Simson Garfinkel and Bradley Schatz, digital investigation 6 (2009) S57–S68. The format is an open source format used for the storage of digital evidence and data. The original paper was released with an earlier implementation written in python. This project is a complete open source re-implementation for a general purpose AFF4 library. ## What is currently supported. 1. Reading ZipFile style volumes. 2. Reading striped ZipFile volumes. 2. Reading AFF4 Image streams using the deflate or snappy compressor. 3. Reading RDF metadata using both YAML and Turtle. What is not yet supported: 1. Writing 2. Encrypted AFF4 volumes. 3. Persistent data store. 4. HTTP backed streams. 5. Support for signed statements or Bill of Materials. 6. Logical file acquisition. # Notice This is not an official Google product (experimental or otherwise), it is just code that happens to be owned by Google and Schatz Forensic.pyaff4-0.26.post6/setup.cfg0000664000175000017500000000004613211617576016032 0ustar rhertzogrhertzog[egg_info] tag_build = tag_date = 0