whisper-0.9.15/0000755000000000000000000000000012626126470013233 5ustar rootroot00000000000000whisper-0.9.15/bin/0000755000000000000000000000000012626126470014003 5ustar rootroot00000000000000whisper-0.9.15/bin/whisper-diff.py0000644000000000000000000000423212626124251016740 0ustar rootroot00000000000000#!/usr/bin/python -tt import sys import optparse try: import whisper except ImportError: raise SystemExit('[ERROR] Please make sure whisper is installed properly') option_parser = optparse.OptionParser(usage='''%prog [options] path_a path_b''') option_parser.add_option('--summary', default=False, action='store_true', help="show summary of differences") option_parser.add_option('--ignore-empty', default=False, action='store_true', help="skip comparison if either value is undefined") option_parser.add_option('--columns', default=False, action='store_true', help="print output in simple columns") option_parser.add_option('--no-headers', default=False, action='store_true', help="do not print column headers") (options, args) = option_parser.parse_args() if len(args) != 2: option_parser.print_help() sys.exit(1) (path_a,path_b) = args[0::1] def print_diffs(diffs,pretty=True,headers=True): if pretty: h = "%7s %11s %13s %13s\n" f = "%7s %11d %13s %13s\n" else: h = "%s %s %s %s\n" f = "%s %d %s %s\n" if headers: sys.stdout.write(h%('archive','timestamp','value_a','value_b')) for archive, points, total in diffs: count = count=points.__len__() if pretty: sys.stdout.write('Archive %d (%d of %d datapoints differ)\n'%(archive,points.__len__(),total)) sys.stdout.write(h%('','timestamp','value_a','value_b')) for p in points: if pretty: sys.stdout.write(f%('',p[0],p[1],p[2])) else: sys.stdout.write(f%(archive,p[0],p[1],p[2])) def print_summary(diffs,pretty=True,headers=True): if pretty: f = "%7s %9s %9s\n" else: f = "%s %s %s\n" if headers: sys.stdout.write(f%('archive','total','differing')) for archive, points, total in diffs: sys.stdout.write(f%(archive,total,points.__len__())) archive_diffs = whisper.diff(path_a,path_b,ignore_empty=options.ignore_empty) if options.summary: print_summary(archive_diffs,pretty=(not options.columns),headers=(not options.no_headers)) else: print_diffs(archive_diffs,pretty=(not options.columns),headers=(not options.no_headers))whisper-0.9.15/bin/whisper-set-aggregation-method.py0000755000000000000000000000163312626124253022375 0ustar rootroot00000000000000#!/usr/bin/env python import os import sys import signal import optparse try: import whisper except ImportError: raise SystemExit('[ERROR] Please make sure whisper is installed properly') # Ignore SIGPIPE signal.signal(signal.SIGPIPE, signal.SIG_DFL) option_parser = optparse.OptionParser( usage='%%prog path <%s>' % '|'.join(whisper.aggregationMethods)) (options, args) = option_parser.parse_args() if len(args) < 2: option_parser.print_usage() sys.exit(1) path = args[0] aggregationMethod = args[1] try: oldAggregationMethod = whisper.setAggregationMethod(path, aggregationMethod) except IOError, exc: sys.stderr.write("[ERROR] File '%s' does not exist!\n\n" % path) option_parser.print_usage() sys.exit(1) except whisper.WhisperException, exc: raise SystemExit('[ERROR] %s' % str(exc)) print 'Updated aggregation method: %s (%s -> %s)' % (path,oldAggregationMethod,aggregationMethod) whisper-0.9.15/bin/whisper-fetch.py0000755000000000000000000000337012626124253017130 0ustar rootroot00000000000000#!/usr/bin/env python import sys import time import signal import optparse try: import whisper except ImportError: raise SystemExit('[ERROR] Please make sure whisper is installed properly') # Ignore SIGPIPE signal.signal(signal.SIGPIPE, signal.SIG_DFL) now = int( time.time() ) yesterday = now - (60 * 60 * 24) option_parser = optparse.OptionParser(usage='''%prog [options] path''') option_parser.add_option('--from', default=yesterday, type='int', dest='_from', help=("Unix epoch time of the beginning of " "your requested interval (default: 24 hours ago)")) option_parser.add_option('--until', default=now, type='int', help="Unix epoch time of the end of your requested interval (default: now)") option_parser.add_option('--json', default=False, action='store_true', help="Output results in JSON form") option_parser.add_option('--pretty', default=False, action='store_true', help="Show human-readable timestamps instead of unix times") (options, args) = option_parser.parse_args() if len(args) != 1: option_parser.print_usage() sys.exit(1) path = args[0] from_time = int( options._from ) until_time = int( options.until ) try: (timeInfo, values) = whisper.fetch(path, from_time, until_time) except whisper.WhisperException, exc: raise SystemExit('[ERROR] %s' % str(exc)) (start,end,step) = timeInfo if options.json: values_json = str(values).replace('None','null') print '''{ "start" : %d, "end" : %d, "step" : %d, "values" : %s }''' % (start,end,step,values_json) sys.exit(0) t = start for value in values: if options.pretty: timestr = time.ctime(t) else: timestr = str(t) if value is None: valuestr = "None" else: valuestr = "%f" % value print "%s\t%s" % (timestr,valuestr) t += step whisper-0.9.15/bin/whisper-fill.py0000755000000000000000000001031712626124253016764 0ustar rootroot00000000000000#!/usr/bin/env python # whisper-fill: unlike whisper-merge, don't overwrite data that's # already present in the target file, but instead, only add the missing # data (e.g. where the gaps in the target file are). Because no values # are overwritten, no data or precision gets lost. Also, unlike # whisper-merge, try to take the highest-precision archive to provide # the data, instead of the one with the largest retention. # Using this script, reconciliation between two replica instances can be # performed by whisper-fill-ing the data of the other replica with the # data that exists locally, without introducing the quite remarkable # gaps that whisper-merge leaves behind (filling a higher precision # archive with data from a lower precision one) # Work performed by Fabian Groffen @grobian while working at Booking.com. # additional patches are from https://github.com/jssjr/carbonate/ from whisper import info, fetch, update_many try: from whisper import operator HAS_OPERATOR = True except ImportError: HAS_OPERATOR = False import itertools import time import sys def itemgetter(*items): if HAS_OPERATOR: return operator.itemgetter(*items) else: if len(items) == 1: item = items[0] def g(obj): return obj[item] else: def g(obj): return tuple(obj[item] for item in items) return g def fill(src, dst, tstart, tstop): # fetch range start-stop from src, taking values from the highest # precision archive, thus optionally requiring multiple fetch + merges srcHeader = info(src) srcArchives = srcHeader['archives'] srcArchives.sort(key=itemgetter('retention')) # find oldest point in time, stored by both files srcTime = int(time.time()) - srcHeader['maxRetention'] if tstart < srcTime and tstop < srcTime: return # we want to retain as much precision as we can, hence we do backwards # walk in time # skip forward at max 'step' points at a time for archive in srcArchives: # skip over archives that don't have any data points rtime = time.time() - archive['retention'] if tstop <= rtime: continue untilTime = tstop fromTime = rtime if rtime > tstart else tstart (timeInfo, values) = fetch(src, fromTime, untilTime) (start, end, archive_step) = timeInfo pointsToWrite = list(itertools.ifilter( lambda points: points[1] is not None, itertools.izip(xrange(start, end, archive_step), values))) # order points by timestamp, newest first pointsToWrite.sort(key=lambda p: p[0], reverse=True) update_many(dst, pointsToWrite) tstop = fromTime # can stop when there's nothing to fetch any more if tstart == tstop: return def fill_archives(src, dst, startFrom): header = info(dst) archives = header['archives'] archives = sorted(archives, key=lambda t: t['retention']) for archive in archives: fromTime = time.time() - archive['retention'] if fromTime >= startFrom: continue (timeInfo, values) = fetch(dst, fromTime, startFrom) (start, end, step) = timeInfo gapstart = None for v in values: if not v and not gapstart: gapstart = start elif v and gapstart: # ignore single units lost if (start - gapstart) > archive['secondsPerPoint']: fill(src, dst, gapstart - step, start) gapstart = None elif gapstart and start == end - step: fill(src, dst, gapstart - step, start) start += step startFrom = fromTime def main(argv): if len(argv) != 2: print("usage: whisper-fill.py src dst"); print(" copies data from src in dst, if missing") if len(argv) == 1 and (argv[0].lower() == "--help" or argv[0].lower() == "-h"): sys.exit(0) sys.exit(1) src = argv[0] dst = argv[1] startFrom = time.time() fill_archives(src, dst, startFrom) if __name__ == "__main__": main(sys.argv[1:]) whisper-0.9.15/bin/whisper-merge.py0000755000000000000000000000124712626124253017137 0ustar rootroot00000000000000#!/usr/bin/env python import os import sys import signal import optparse try: import whisper except ImportError: raise SystemExit('[ERROR] Please make sure whisper is installed properly') # Ignore SIGPIPE signal.signal(signal.SIGPIPE, signal.SIG_DFL) option_parser = optparse.OptionParser( usage='''%prog [options] from_path to_path''') (options, args) = option_parser.parse_args() if len(args) < 2: option_parser.print_usage() sys.exit(1) path_from = args[0] path_to = args[1] for filename in (path_from, path_to): if not os.path.exists(filename): raise SystemExit('[ERROR] File "%s" does not exist!' % filename) whisper.merge(path_from, path_to) whisper-0.9.15/bin/whisper-update.py0000755000000000000000000000171612626124253017323 0ustar rootroot00000000000000#!/usr/bin/env python import sys import time import signal import optparse try: import whisper except ImportError: raise SystemExit('[ERROR] Please make sure whisper is installed properly') # Ignore SIGPIPE signal.signal(signal.SIGPIPE, signal.SIG_DFL) now = int( time.time() ) option_parser = optparse.OptionParser( usage='''%prog [options] path timestamp:value [timestamp:value]*''') (options, args) = option_parser.parse_args() if len(args) < 2: option_parser.print_usage() sys.exit(1) path = args[0] datapoint_strings = args[1:] datapoint_strings = [point.replace('N:', '%d:' % now) for point in datapoint_strings] datapoints = [tuple(point.split(':')) for point in datapoint_strings] try: if len(datapoints) == 1: timestamp,value = datapoints[0] whisper.update(path, value, timestamp) else: whisper.update_many(path, datapoints) except whisper.WhisperException, exc: raise SystemExit('[ERROR] %s' % str(exc)) whisper-0.9.15/bin/whisper-info.py0000755000000000000000000000206312626124253016770 0ustar rootroot00000000000000#!/usr/bin/env python import os import sys import signal import optparse try: import whisper except ImportError: raise SystemExit('[ERROR] Please make sure whisper is installed properly') # Ignore SIGPIPE signal.signal(signal.SIGPIPE, signal.SIG_DFL) option_parser = optparse.OptionParser(usage='''%prog path [field]''') (options, args) = option_parser.parse_args() if len(args) < 1: option_parser.print_usage() sys.exit(1) path = args[0] if len(args) > 1: field = args[1] else: field = None try: info = whisper.info(path) except whisper.WhisperException, exc: raise SystemExit('[ERROR] %s' % str(exc)) info['fileSize'] = os.stat(path).st_size if field: if field not in info: print 'Unknown field "%s". Valid fields are %s' % (field, ','.join(info)) sys.exit(1) print info[field] sys.exit(0) archives = info.pop('archives') for key,value in info.items(): print '%s: %s' % (key,value) print for i,archive in enumerate(archives): print 'Archive %d' % i for key,value in archive.items(): print '%s: %s' % (key,value) print whisper-0.9.15/bin/whisper-resize.py0000755000000000000000000001354412626124253017344 0ustar rootroot00000000000000#!/usr/bin/env python import os import sys import math import time import bisect import signal import optparse import traceback try: import whisper except ImportError: raise SystemExit('[ERROR] Please make sure whisper is installed properly') # Ignore SIGPIPE signal.signal(signal.SIGPIPE, signal.SIG_DFL) now = int(time.time()) option_parser = optparse.OptionParser( usage='''%prog path timePerPoint:timeToStore [timePerPoint:timeToStore]* timePerPoint and timeToStore specify lengths of time, for example: 60:1440 60 seconds per datapoint, 1440 datapoints = 1 day of retention 15m:8 15 minutes per datapoint, 8 datapoints = 2 hours of retention 1h:7d 1 hour per datapoint, 7 days of retention 12h:2y 12 hours per datapoint, 2 years of retention ''') option_parser.add_option( '--xFilesFactor', default=None, type='float', help="Change the xFilesFactor") option_parser.add_option( '--aggregationMethod', default=None, type='string', help="Change the aggregation function (%s)" % ', '.join(whisper.aggregationMethods)) option_parser.add_option( '--force', default=False, action='store_true', help="Perform a destructive change") option_parser.add_option( '--newfile', default=None, action='store', help="Create a new database file without removing the existing one") option_parser.add_option( '--nobackup', action='store_true', help='Delete the .bak file after successful execution') option_parser.add_option( '--aggregate', action='store_true', help='Try to aggregate the values to fit the new archive better.' ' Note that this will make things slower and use more memory.') (options, args) = option_parser.parse_args() if len(args) < 2: option_parser.print_usage() sys.exit(1) path = args[0] if not os.path.exists(path): sys.stderr.write("[ERROR] File '%s' does not exist!\n\n" % path) option_parser.print_usage() sys.exit(1) info = whisper.info(path) new_archives = [whisper.parseRetentionDef(retentionDef) for retentionDef in args[1:]] old_archives = info['archives'] # sort by precision, lowest to highest old_archives.sort(key=lambda a: a['secondsPerPoint'], reverse=True) if options.xFilesFactor is None: xff = info['xFilesFactor'] else: xff = options.xFilesFactor if options.aggregationMethod is None: aggregationMethod = info['aggregationMethod'] else: aggregationMethod = options.aggregationMethod print 'Retrieving all data from the archives' for archive in old_archives: fromTime = now - archive['retention'] + archive['secondsPerPoint'] untilTime = now timeinfo,values = whisper.fetch(path, fromTime, untilTime) archive['data'] = (timeinfo,values) if options.newfile is None: tmpfile = path + '.tmp' if os.path.exists(tmpfile): print 'Removing previous temporary database file: %s' % tmpfile os.unlink(tmpfile) newfile = tmpfile else: newfile = options.newfile print 'Creating new whisper database: %s' % newfile whisper.create(newfile, new_archives, xFilesFactor=xff, aggregationMethod=aggregationMethod) size = os.stat(newfile).st_size print 'Created: %s (%d bytes)' % (newfile,size) if options.aggregate: # This is where data will be interpolated (best effort) print 'Migrating data with aggregation...' all_datapoints = [] for archive in old_archives: # Loading all datapoints into memory for fast querying timeinfo, values = archive['data'] new_datapoints = zip( range(*timeinfo), values ) if all_datapoints: last_timestamp = all_datapoints[-1][0] slice_end = 0 for i,(timestamp,value) in enumerate(new_datapoints): if timestamp > last_timestamp: slice_end = i break all_datapoints += new_datapoints[i:] else: all_datapoints += new_datapoints oldtimestamps = map( lambda p: p[0], all_datapoints) oldvalues = map( lambda p: p[1], all_datapoints) print "oldtimestamps: %s" % oldtimestamps # Simply cleaning up some used memory del all_datapoints new_info = whisper.info(newfile) new_archives = new_info['archives'] for archive in new_archives: step = archive['secondsPerPoint'] fromTime = now - archive['retention'] + now % step untilTime = now + now % step + step print "(%s,%s,%s)" % (fromTime,untilTime, step) timepoints_to_update = range(fromTime, untilTime, step) print "timepoints_to_update: %s" % timepoints_to_update newdatapoints = [] for tinterval in zip( timepoints_to_update[:-1], timepoints_to_update[1:] ): # TODO: Setting lo= parameter for 'lefti' based on righti from previous # iteration. Obviously, this can only be done if # timepoints_to_update is always updated. Is it? lefti = bisect.bisect_left(oldtimestamps, tinterval[0]) righti = bisect.bisect_left(oldtimestamps, tinterval[1], lo=lefti) newvalues = oldvalues[lefti:righti] if newvalues: non_none = filter( lambda x: x is not None, newvalues) if 1.0*len(non_none)/len(newvalues) >= xff: newdatapoints.append([tinterval[0], whisper.aggregate(aggregationMethod, non_none)]) whisper.update_many(newfile, newdatapoints) else: print 'Migrating data without aggregation...' for archive in old_archives: timeinfo, values = archive['data'] datapoints = zip( range(*timeinfo), values ) datapoints = filter(lambda p: p[1] is not None, datapoints) whisper.update_many(newfile, datapoints) if options.newfile is not None: sys.exit(0) backup = path + '.bak' print 'Renaming old database to: %s' % backup os.rename(path, backup) try: print 'Renaming new database to: %s' % path os.rename(tmpfile, path) except: traceback.print_exc() print '\nOperation failed, restoring backup' os.rename(backup, path) sys.exit(1) if options.nobackup: print "Unlinking backup: %s" % backup os.unlink(backup) whisper-0.9.15/bin/rrd2whisper.py0000755000000000000000000000773012626124253016637 0ustar rootroot00000000000000#!/usr/bin/env python import os import sys import time import signal import optparse try: import rrdtool except ImportError, exc: raise SystemExit('[ERROR] Missing dependency: %s' % str(exc)) try: import whisper except ImportError: raise SystemExit('[ERROR] Please make sure whisper is installed properly') # Ignore SIGPIPE signal.signal(signal.SIGPIPE, signal.SIG_DFL) aggregationMethods = whisper.aggregationMethods # RRD doesn't have a 'sum' or 'total' type aggregationMethods.remove('sum') option_parser = optparse.OptionParser(usage='''%prog rrd_path''') option_parser.add_option( '--xFilesFactor', help="The xFilesFactor to use in the output file. " + "Defaults to the input RRD's xFilesFactor", default=None, type='float') option_parser.add_option( '--aggregationMethod', help="The consolidation function to fetch from on input and " + "aggregationMethod to set on output. One of: %s" % ', '.join(aggregationMethods), default='average', type='string') (options, args) = option_parser.parse_args() if len(args) < 1: option_parser.print_help() sys.exit(1) rrd_path = args[0] try: rrd_info = rrdtool.info(rrd_path) except rrdtool.error, exc: raise SystemExit('[ERROR] %s' % str(exc)) seconds_per_pdp = rrd_info['step'] # Reconcile old vs new python-rrdtool APIs (yuck) # leave consistent 'rras' and 'datasources' lists if 'rra' in rrd_info: rras = rrd_info['rra'] else: rra_indices = [] for key in rrd_info: if key.startswith('rra['): index = int(key.split('[')[1].split(']')[0]) rra_indices.append(index) rra_count = max(rra_indices) + 1 rras = [] for i in range(rra_count): rra_info = {} rra_info['pdp_per_row'] = rrd_info['rra[%d].pdp_per_row' % i] rra_info['rows'] = rrd_info['rra[%d].rows' % i] rra_info['cf'] = rrd_info['rra[%d].cf' % i] rra_info['xff'] = rrd_info['rra[%d].xff' % i] rras.append(rra_info) datasources = [] if 'ds' in rrd_info: datasource_names = rrd_info['ds'].keys() else: ds_keys = [key for key in rrd_info if key.startswith('ds[')] datasources = list(set(key[3:].split(']')[0] for key in ds_keys)) # Grab the archive configuration relevant_rras = [] for rra in rras: if rra['cf'] == options.aggregationMethod.upper(): relevant_rras.append(rra) if not relevant_rras: err = "[ERROR] Unable to find any RRAs with consolidation function: %s" % \ options.aggregationMethod.upper() raise SystemExit(err) archives = [] xFilesFactor = options.xFilesFactor for rra in relevant_rras: precision = rra['pdp_per_row'] * seconds_per_pdp points = rra['rows'] if not xFilesFactor: xFilesFactor = rra['xff'] archives.append((precision, points)) for datasource in datasources: now = int(time.time()) path = rrd_path.replace('.rrd', '_%s.wsp' % datasource) try: whisper.create(path, archives, xFilesFactor=xFilesFactor) except whisper.InvalidConfiguration, e: raise SystemExit('[ERROR] %s' % str(e)) size = os.stat(path).st_size archiveConfig = ','.join(["%d:%d" % ar for ar in archives]) print "Created: %s (%d bytes) with archives: %s" % (path, size, archiveConfig) print "Migrating data" archiveNumber = len(archives) - 1 for precision, points in reversed(archives): retention = precision * points endTime = now - now % precision startTime = endTime - retention (time_info, columns, rows) = rrdtool.fetch( rrd_path, options.aggregationMethod.upper(), '-r', str(precision), '-s', str(startTime), '-e', str(endTime)) column_index = list(columns).index(datasource) rows.pop() # remove the last datapoint because RRD sometimes gives funky values values = [row[column_index] for row in rows] timestamps = list(range(*time_info)) datapoints = zip(timestamps, values) datapoints = filter(lambda p: p[1] is not None, datapoints) print ' migrating %d datapoints from archive %d' % (len(datapoints), archiveNumber) archiveNumber -= 1 whisper.update_many(path, datapoints) whisper-0.9.15/bin/whisper-create.py0000755000000000000000000000543112626124253017302 0ustar rootroot00000000000000#!/usr/bin/env python import os import sys import signal import optparse import math try: import whisper except ImportError: raise SystemExit('[ERROR] Please make sure whisper is installed properly') def byte_format(num): for x in ['bytes','KB','MB']: if num < 1024.0: return "%.3f%s" % (num, x) num /= 1024.0 return "%.3f%s" % (num, 'GB') # Ignore SIGPIPE signal.signal(signal.SIGPIPE, signal.SIG_DFL) option_parser = optparse.OptionParser( usage='''%prog path timePerPoint:timeToStore [timePerPoint:timeToStore]* %prog --estimate timePerPoint:timeToStore [timePerPoint:timeToStore]* timePerPoint and timeToStore specify lengths of time, for example: 60:1440 60 seconds per datapoint, 1440 datapoints = 1 day of retention 15m:8 15 minutes per datapoint, 8 datapoints = 2 hours of retention 1h:7d 1 hour per datapoint, 7 days of retention 12h:2y 12 hours per datapoint, 2 years of retention ''') option_parser.add_option('--xFilesFactor', default=0.5, type='float') option_parser.add_option('--aggregationMethod', default='average', type='string', help="Function to use when aggregating values (%s)" % ', '.join(whisper.aggregationMethods)) option_parser.add_option('--overwrite', default=False, action='store_true') option_parser.add_option('--estimate', default=False, action='store_true', help="Don't create a whisper file, estimate storage requirements based on archive definitions") (options, args) = option_parser.parse_args() if options.estimate: if len(args) == 0: option_parser.print_usage() sys.exit(1) if len(args) == 1 and args[0].find(",") > 0: args = args[0].split(",") archives = 0 total_points = 0 for (precision, points) in map(whisper.parseRetentionDef, args): print "Archive %s: %s points of %ss precision" % (archives, points, precision) archives += 1 total_points += points size = 16 + (archives * 12) + (total_points * 12) disk_size = int(math.ceil(size / 4096.0) * 4096) print "\nEstimated Whisper DB Size: %s (%s bytes on disk with 4k blocks)\n" % (byte_format(size), disk_size) for x in [1, 5, 10, 50, 100, 500]: print "Estimated storage requirement for %sk metrics: %s" % (x, byte_format(x * 1000 * disk_size)) sys.exit(0) if len(args) < 2: option_parser.print_usage() sys.exit(1) path = args[0] archives = [whisper.parseRetentionDef(retentionDef) for retentionDef in args[1:]] if os.path.exists(path) and options.overwrite: print 'Overwriting existing file: %s' % path os.unlink(path) try: whisper.create(path, archives, xFilesFactor=options.xFilesFactor, aggregationMethod=options.aggregationMethod) except whisper.WhisperException, exc: raise SystemExit('[ERROR] %s' % str(exc)) size = os.stat(path).st_size print 'Created: %s (%d bytes)' % (path,size) whisper-0.9.15/bin/whisper-dump.py0000755000000000000000000000553212626124253017006 0ustar rootroot00000000000000#!/usr/bin/env python import os import mmap import struct import signal import optparse try: import whisper except ImportError: raise SystemExit('[ERROR] Please make sure whisper is installed properly') # Ignore SIGPIPE signal.signal(signal.SIGPIPE, signal.SIG_DFL) option_parser = optparse.OptionParser(usage='''%prog path''') (options, args) = option_parser.parse_args() if len(args) != 1: option_parser.error("require one input file name") else: path = args[0] def mmap_file(filename): fd = os.open(filename, os.O_RDONLY) map = mmap.mmap(fd, os.fstat(fd).st_size, prot=mmap.PROT_READ) os.close(fd) return map def read_header(map): try: (aggregationType,maxRetention,xFilesFactor,archiveCount) = struct.unpack(whisper.metadataFormat,map[:whisper.metadataSize]) except: raise CorruptWhisperFile("Unable to unpack header") archives = [] archiveOffset = whisper.metadataSize for i in xrange(archiveCount): try: (offset, secondsPerPoint, points) = struct.unpack(whisper.archiveInfoFormat, map[archiveOffset:archiveOffset+whisper.archiveInfoSize]) except: raise CorruptWhisperFile("Unable to read archive %d metadata" % i) archiveInfo = { 'offset' : offset, 'secondsPerPoint' : secondsPerPoint, 'points' : points, 'retention' : secondsPerPoint * points, 'size' : points * whisper.pointSize, } archives.append(archiveInfo) archiveOffset += whisper.archiveInfoSize header = { 'aggregationMethod' : whisper.aggregationTypeToMethod.get(aggregationType, 'average'), 'maxRetention' : maxRetention, 'xFilesFactor' : xFilesFactor, 'archives' : archives, } return header def dump_header(header): print 'Meta data:' print ' aggregation method: %s' % header['aggregationMethod'] print ' max retention: %d' % header['maxRetention'] print ' xFilesFactor: %g' % header['xFilesFactor'] print dump_archive_headers(header['archives']) def dump_archive_headers(archives): for i,archive in enumerate(archives): print 'Archive %d info:' % i print ' offset: %d' % archive['offset'] print ' seconds per point: %d' % archive['secondsPerPoint'] print ' points: %d' % archive['points'] print ' retention: %d' % archive['retention'] print ' size: %d' % archive['size'] print def dump_archives(archives): for i,archive in enumerate(archives): print 'Archive %d data:' %i offset = archive['offset'] for point in xrange(archive['points']): (timestamp, value) = struct.unpack(whisper.pointFormat, map[offset:offset+whisper.pointSize]) print '%d: %d, %10.35g' % (point, timestamp, value) offset += whisper.pointSize print if not os.path.exists(path): raise SystemExit('[ERROR] File "%s" does not exist!' % path) map = mmap_file(path) header = read_header(map) dump_header(header) dump_archives(header['archives']) whisper-0.9.15/whisper.py0000644000000000000000000007266012626124253015276 0ustar rootroot00000000000000# Copyright 2008 Orbitz WorldWide # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # # This module is an implementation of the Whisper database API # Here is the basic layout of a whisper data file # # File = Header,Data # Header = Metadata,ArchiveInfo+ # Metadata = aggregationType,maxRetention,xFilesFactor,archiveCount # ArchiveInfo = Offset,SecondsPerPoint,Points # Data = Archive+ # Archive = Point+ # Point = timestamp,value import os, struct, time, operator, itertools from errno import ENOSPC try: import fcntl CAN_LOCK = True except ImportError: CAN_LOCK = False try: import ctypes import ctypes.util CAN_FALLOCATE = True except ImportError: CAN_FALLOCATE = False fallocate = None if CAN_FALLOCATE: libc_name = ctypes.util.find_library('c') libc = ctypes.CDLL(libc_name) c_off64_t = ctypes.c_int64 c_off_t = ctypes.c_int try: _fallocate = libc.posix_fallocate64 _fallocate.restype = ctypes.c_int _fallocate.argtypes = [ctypes.c_int, c_off64_t, c_off64_t] except AttributeError, e: try: _fallocate = libc.posix_fallocate _fallocate.restype = ctypes.c_int _fallocate.argtypes = [ctypes.c_int, c_off_t, c_off_t] except AttributeError, e: CAN_FALLOCATE = False if CAN_FALLOCATE: def _py_fallocate(fd, offset, len_): res = _fallocate(fd.fileno(), offset, len_) if res != 0: raise IOError(res, 'fallocate') fallocate = _py_fallocate del libc del libc_name LOCK = False CACHE_HEADERS = False AUTOFLUSH = False __headerCache = {} longFormat = "!L" longSize = struct.calcsize(longFormat) floatFormat = "!f" floatSize = struct.calcsize(floatFormat) valueFormat = "!d" valueSize = struct.calcsize(valueFormat) pointFormat = "!Ld" pointSize = struct.calcsize(pointFormat) metadataFormat = "!2LfL" metadataSize = struct.calcsize(metadataFormat) archiveInfoFormat = "!3L" archiveInfoSize = struct.calcsize(archiveInfoFormat) aggregationTypeToMethod = dict({ 1: 'average', 2: 'sum', 3: 'last', 4: 'max', 5: 'min' }) aggregationMethodToType = dict([[v,k] for k,v in aggregationTypeToMethod.items()]) aggregationMethods = aggregationTypeToMethod.values() debug = startBlock = endBlock = lambda *a,**k: None UnitMultipliers = { 'seconds' : 1, 'minutes' : 60, 'hours' : 3600, 'days' : 86400, 'weeks' : 86400 * 7, 'years' : 86400 * 365 } def getUnitString(s): if 'seconds'.startswith(s): return 'seconds' if 'minutes'.startswith(s): return 'minutes' if 'hours'.startswith(s): return 'hours' if 'days'.startswith(s): return 'days' if 'weeks'.startswith(s): return 'weeks' if 'years'.startswith(s): return 'years' raise ValueError("Invalid unit '%s'" % s) def parseRetentionDef(retentionDef): import re (precision, points) = retentionDef.strip().split(':') if precision.isdigit(): precision = int(precision) * UnitMultipliers[getUnitString('s')] else: precision_re = re.compile(r'^(\d+)([a-z]+)$') match = precision_re.match(precision) if match: precision = int(match.group(1)) * UnitMultipliers[getUnitString(match.group(2))] else: raise ValueError("Invalid precision specification '%s'" % precision) if points.isdigit(): points = int(points) else: points_re = re.compile(r'^(\d+)([a-z]+)$') match = points_re.match(points) if match: points = int(match.group(1)) * UnitMultipliers[getUnitString(match.group(2))] / precision else: raise ValueError("Invalid retention specification '%s'" % points) return (precision, points) class WhisperException(Exception): """Base class for whisper exceptions.""" class InvalidConfiguration(WhisperException): """Invalid configuration.""" class InvalidAggregationMethod(WhisperException): """Invalid aggregation method.""" class InvalidTimeInterval(WhisperException): """Invalid time interval.""" class TimestampNotCovered(WhisperException): """Timestamp not covered by any archives in this database.""" class CorruptWhisperFile(WhisperException): def __init__(self, error, path): Exception.__init__(self, error) self.error = error self.path = path def __repr__(self): return "" % (self.path, self.error) def __str__(self): return "%s (%s)" % (self.error, self.path) def enableDebug(): global open, debug, startBlock, endBlock class open(file): def __init__(self,*args,**kwargs): file.__init__(self,*args,**kwargs) self.writeCount = 0 self.readCount = 0 def write(self,data): self.writeCount += 1 debug('WRITE %d bytes #%d' % (len(data),self.writeCount)) return file.write(self,data) def read(self,bytes): self.readCount += 1 debug('READ %d bytes #%d' % (bytes,self.readCount)) return file.read(self,bytes) def debug(message): print 'DEBUG :: %s' % message __timingBlocks = {} def startBlock(name): __timingBlocks[name] = time.time() def endBlock(name): debug("%s took %.5f seconds" % (name,time.time() - __timingBlocks.pop(name))) def __readHeader(fh): info = __headerCache.get(fh.name) if info: return info originalOffset = fh.tell() fh.seek(0) packedMetadata = fh.read(metadataSize) try: (aggregationType,maxRetention,xff,archiveCount) = struct.unpack(metadataFormat,packedMetadata) except: raise CorruptWhisperFile("Unable to read header", fh.name) archives = [] for i in xrange(archiveCount): packedArchiveInfo = fh.read(archiveInfoSize) try: (offset,secondsPerPoint,points) = struct.unpack(archiveInfoFormat,packedArchiveInfo) except: raise CorruptWhisperFile("Unable to read archive%d metadata" % i, fh.name) archiveInfo = { 'offset' : offset, 'secondsPerPoint' : secondsPerPoint, 'points' : points, 'retention' : secondsPerPoint * points, 'size' : points * pointSize, } archives.append(archiveInfo) fh.seek(originalOffset) info = { 'aggregationMethod' : aggregationTypeToMethod.get(aggregationType, 'average'), 'maxRetention' : maxRetention, 'xFilesFactor' : xff, 'archives' : archives, } if CACHE_HEADERS: __headerCache[fh.name] = info return info def setAggregationMethod(path, aggregationMethod): """setAggregationMethod(path,aggregationMethod) path is a string aggregationMethod specifies the method to use when propogating data (see ``whisper.aggregationMethods``) """ with open(path,'r+b') as fh: if LOCK: fcntl.flock( fh.fileno(), fcntl.LOCK_EX ) packedMetadata = fh.read(metadataSize) try: (aggregationType,maxRetention,xff,archiveCount) = struct.unpack(metadataFormat,packedMetadata) except: raise CorruptWhisperFile("Unable to read header", fh.name) try: newAggregationType = struct.pack( longFormat, aggregationMethodToType[aggregationMethod] ) except KeyError: raise InvalidAggregationMethod("Unrecognized aggregation method: %s" % aggregationMethod) fh.seek(0) fh.write(newAggregationType) if AUTOFLUSH: fh.flush() os.fsync(fh.fileno()) if CACHE_HEADERS and fh.name in __headerCache: del __headerCache[fh.name] return aggregationTypeToMethod.get(aggregationType, 'average') def validateArchiveList(archiveList): """ Validates an archiveList. An ArchiveList must: 1. Have at least one archive config. Example: (60, 86400) 2. No archive may be a duplicate of another. 3. Higher precision archives' precision must evenly divide all lower precision archives' precision. 4. Lower precision archives must cover larger time intervals than higher precision archives. 5. Each archive must have at least enough points to consolidate to the next archive Returns True or False """ if not archiveList: raise InvalidConfiguration("You must specify at least one archive configuration!") archiveList.sort(key=lambda a: a[0]) #sort by precision (secondsPerPoint) for i,archive in enumerate(archiveList): if i == len(archiveList) - 1: break nextArchive = archiveList[i+1] if not archive[0] < nextArchive[0]: raise InvalidConfiguration("A Whisper database may not configured having" "two archives with the same precision (archive%d: %s, archive%d: %s)" % (i, archive, i + 1, nextArchive)) if nextArchive[0] % archive[0] != 0: raise InvalidConfiguration("Higher precision archives' precision " "must evenly divide all lower precision archives' precision " "(archive%d: %s, archive%d: %s)" % (i, archive[0], i + 1, nextArchive[0])) retention = archive[0] * archive[1] nextRetention = nextArchive[0] * nextArchive[1] if not nextRetention > retention: raise InvalidConfiguration("Lower precision archives must cover " "larger time intervals than higher precision archives " "(archive%d: %s seconds, archive%d: %s seconds)" % (i, retention, i + 1, nextRetention)) archivePoints = archive[1] pointsPerConsolidation = nextArchive[0] / archive[0] if not archivePoints >= pointsPerConsolidation: raise InvalidConfiguration("Each archive must have at least enough points " "to consolidate to the next archive (archive%d consolidates %d of " "archive%d's points but it has only %d total points)" % (i + 1, pointsPerConsolidation, i, archivePoints)) def create(path,archiveList,xFilesFactor=None,aggregationMethod=None,sparse=False,useFallocate=False): """create(path,archiveList,xFilesFactor=0.5,aggregationMethod='average') path is a string archiveList is a list of archives, each of which is of the form (secondsPerPoint,numberOfPoints) xFilesFactor specifies the fraction of data points in a propagation interval that must have known values for a propagation to occur aggregationMethod specifies the function to use when propogating data (see ``whisper.aggregationMethods``) """ # Set default params if xFilesFactor is None: xFilesFactor = 0.5 if aggregationMethod is None: aggregationMethod = 'average' #Validate archive configurations... validateArchiveList(archiveList) #Looks good, now we create the file and write the header if os.path.exists(path): raise InvalidConfiguration("File %s already exists!" % path) with open(path,'wb') as fh: try: if LOCK: fcntl.flock( fh.fileno(), fcntl.LOCK_EX ) aggregationType = struct.pack( longFormat, aggregationMethodToType.get(aggregationMethod, 1) ) oldest = max([secondsPerPoint * points for secondsPerPoint,points in archiveList]) maxRetention = struct.pack( longFormat, oldest ) xFilesFactor = struct.pack( floatFormat, float(xFilesFactor) ) archiveCount = struct.pack(longFormat, len(archiveList)) packedMetadata = aggregationType + maxRetention + xFilesFactor + archiveCount fh.write(packedMetadata) headerSize = metadataSize + (archiveInfoSize * len(archiveList)) archiveOffsetPointer = headerSize for secondsPerPoint,points in archiveList: archiveInfo = struct.pack(archiveInfoFormat, archiveOffsetPointer, secondsPerPoint, points) fh.write(archiveInfo) archiveOffsetPointer += (points * pointSize) #If configured to use fallocate and capable of fallocate use that, else #attempt sparse if configure or zero pre-allocate if sparse isn't configured. if CAN_FALLOCATE and useFallocate: remaining = archiveOffsetPointer - headerSize fallocate(fh, headerSize, remaining) elif sparse: fh.seek(archiveOffsetPointer - 1) fh.write('\x00') else: remaining = archiveOffsetPointer - headerSize chunksize = 16384 zeroes = '\x00' * chunksize while remaining > chunksize: fh.write(zeroes) remaining -= chunksize fh.write(zeroes[:remaining]) if AUTOFLUSH: fh.flush() os.fsync(fh.fileno()) fh.close() except IOError, e: try: # if we got an IOError above, the file is either empty or half created. # Better off deleting it to avoid surprises later os.unlink(fh.name) finally: # double close is ok - the first one is needed to catch ENOSPC on close # This one closes the file if we caught an IOError higher up fh.close() raise def aggregate(aggregationMethod, knownValues): if aggregationMethod == 'average': return float(sum(knownValues)) / float(len(knownValues)) elif aggregationMethod == 'sum': return float(sum(knownValues)) elif aggregationMethod == 'last': return knownValues[len(knownValues)-1] elif aggregationMethod == 'max': return max(knownValues) elif aggregationMethod == 'min': return min(knownValues) else: raise InvalidAggregationMethod("Unrecognized aggregation method %s" % aggregationMethod) def __propagate(fh,header,timestamp,higher,lower): aggregationMethod = header['aggregationMethod'] xff = header['xFilesFactor'] lowerIntervalStart = timestamp - (timestamp % lower['secondsPerPoint']) lowerIntervalEnd = lowerIntervalStart + lower['secondsPerPoint'] fh.seek(higher['offset']) packedPoint = fh.read(pointSize) (higherBaseInterval,higherBaseValue) = struct.unpack(pointFormat,packedPoint) if higherBaseInterval == 0: higherFirstOffset = higher['offset'] else: timeDistance = lowerIntervalStart - higherBaseInterval pointDistance = timeDistance / higher['secondsPerPoint'] byteDistance = pointDistance * pointSize higherFirstOffset = higher['offset'] + (byteDistance % higher['size']) higherPoints = lower['secondsPerPoint'] / higher['secondsPerPoint'] higherSize = higherPoints * pointSize relativeFirstOffset = higherFirstOffset - higher['offset'] relativeLastOffset = (relativeFirstOffset + higherSize) % higher['size'] higherLastOffset = relativeLastOffset + higher['offset'] fh.seek(higherFirstOffset) if higherFirstOffset < higherLastOffset: #we don't wrap the archive seriesString = fh.read(higherLastOffset - higherFirstOffset) else: #We do wrap the archive higherEnd = higher['offset'] + higher['size'] seriesString = fh.read(higherEnd - higherFirstOffset) fh.seek(higher['offset']) seriesString += fh.read(higherLastOffset - higher['offset']) #Now we unpack the series data we just read byteOrder,pointTypes = pointFormat[0],pointFormat[1:] points = len(seriesString) / pointSize seriesFormat = byteOrder + (pointTypes * points) unpackedSeries = struct.unpack(seriesFormat, seriesString) #And finally we construct a list of values neighborValues = [None] * points currentInterval = lowerIntervalStart step = higher['secondsPerPoint'] for i in xrange(0,len(unpackedSeries),2): pointTime = unpackedSeries[i] if pointTime == currentInterval: neighborValues[i/2] = unpackedSeries[i+1] currentInterval += step #Propagate aggregateValue to propagate from neighborValues if we have enough known points knownValues = [v for v in neighborValues if v is not None] if not knownValues: return False knownPercent = float(len(knownValues)) / float(len(neighborValues)) if knownPercent >= xff: #we have enough data to propagate a value! aggregateValue = aggregate(aggregationMethod, knownValues) myPackedPoint = struct.pack(pointFormat,lowerIntervalStart,aggregateValue) fh.seek(lower['offset']) packedPoint = fh.read(pointSize) (lowerBaseInterval,lowerBaseValue) = struct.unpack(pointFormat,packedPoint) if lowerBaseInterval == 0: #First propagated update to this lower archive fh.seek(lower['offset']) fh.write(myPackedPoint) else: #Not our first propagated update to this lower archive timeDistance = lowerIntervalStart - lowerBaseInterval pointDistance = timeDistance / lower['secondsPerPoint'] byteDistance = pointDistance * pointSize lowerOffset = lower['offset'] + (byteDistance % lower['size']) fh.seek(lowerOffset) fh.write(myPackedPoint) return True else: return False def update(path,value,timestamp=None): """update(path,value,timestamp=None) path is a string value is a float timestamp is either an int or float """ value = float(value) with open(path,'r+b') as fh: return file_update(fh, value, timestamp) def file_update(fh, value, timestamp): if LOCK: fcntl.flock( fh.fileno(), fcntl.LOCK_EX ) header = __readHeader(fh) now = int( time.time() ) if timestamp is None: timestamp = now timestamp = int(timestamp) diff = now - timestamp if not ((diff < header['maxRetention']) and diff >= 0): raise TimestampNotCovered("Timestamp not covered by any archives in " "this database.") for i,archive in enumerate(header['archives']): #Find the highest-precision archive that covers timestamp if archive['retention'] < diff: continue lowerArchives = header['archives'][i+1:] #We'll pass on the update to these lower precision archives later break #First we update the highest-precision archive myInterval = timestamp - (timestamp % archive['secondsPerPoint']) myPackedPoint = struct.pack(pointFormat,myInterval,value) fh.seek(archive['offset']) packedPoint = fh.read(pointSize) (baseInterval,baseValue) = struct.unpack(pointFormat,packedPoint) if baseInterval == 0: #This file's first update fh.seek(archive['offset']) fh.write(myPackedPoint) baseInterval,baseValue = myInterval,value else: #Not our first update timeDistance = myInterval - baseInterval pointDistance = timeDistance / archive['secondsPerPoint'] byteDistance = pointDistance * pointSize myOffset = archive['offset'] + (byteDistance % archive['size']) fh.seek(myOffset) fh.write(myPackedPoint) #Now we propagate the update to lower-precision archives higher = archive for lower in lowerArchives: if not __propagate(fh, header, myInterval, higher, lower): break higher = lower if AUTOFLUSH: fh.flush() os.fsync(fh.fileno()) def update_many(path,points): """update_many(path,points) path is a string points is a list of (timestamp,value) points """ if not points: return points = [ (int(t),float(v)) for (t,v) in points] points.sort(key=lambda p: p[0],reverse=True) #order points by timestamp, newest first with open(path,'r+b') as fh: return file_update_many(fh, points) def file_update_many(fh, points): if LOCK: fcntl.flock( fh.fileno(), fcntl.LOCK_EX ) header = __readHeader(fh) now = int( time.time() ) archives = iter( header['archives'] ) currentArchive = archives.next() currentPoints = [] for point in points: age = now - point[0] while currentArchive['retention'] < age: #we can't fit any more points in this archive if currentPoints: #commit all the points we've found that it can fit currentPoints.reverse() #put points in chronological order __archive_update_many(fh,header,currentArchive,currentPoints) currentPoints = [] try: currentArchive = archives.next() except StopIteration: currentArchive = None break if not currentArchive: break #drop remaining points that don't fit in the database currentPoints.append(point) if currentArchive and currentPoints: #don't forget to commit after we've checked all the archives currentPoints.reverse() __archive_update_many(fh,header,currentArchive,currentPoints) if AUTOFLUSH: fh.flush() os.fsync(fh.fileno()) def __archive_update_many(fh,header,archive,points): step = archive['secondsPerPoint'] alignedPoints = [ (timestamp - (timestamp % step), value) for (timestamp,value) in points ] #Create a packed string for each contiguous sequence of points packedStrings = [] previousInterval = None currentString = "" lenAlignedPoints = len(alignedPoints) for i in xrange(0,lenAlignedPoints): #take last point in run of points with duplicate intervals if i+1 < lenAlignedPoints and alignedPoints[i][0] == alignedPoints[i+1][0]: continue (interval,value) = alignedPoints[i] if (not previousInterval) or (interval == previousInterval + step): currentString += struct.pack(pointFormat,interval,value) previousInterval = interval else: numberOfPoints = len(currentString) / pointSize startInterval = previousInterval - (step * (numberOfPoints-1)) packedStrings.append( (startInterval,currentString) ) currentString = struct.pack(pointFormat,interval,value) previousInterval = interval if currentString: numberOfPoints = len(currentString) / pointSize startInterval = previousInterval - (step * (numberOfPoints-1)) packedStrings.append( (startInterval,currentString) ) #Read base point and determine where our writes will start fh.seek(archive['offset']) packedBasePoint = fh.read(pointSize) (baseInterval,baseValue) = struct.unpack(pointFormat,packedBasePoint) if baseInterval == 0: #This file's first update baseInterval = packedStrings[0][0] #use our first string as the base, so we start at the start #Write all of our packed strings in locations determined by the baseInterval for (interval,packedString) in packedStrings: timeDistance = interval - baseInterval pointDistance = timeDistance / step byteDistance = pointDistance * pointSize myOffset = archive['offset'] + (byteDistance % archive['size']) fh.seek(myOffset) archiveEnd = archive['offset'] + archive['size'] bytesBeyond = (myOffset + len(packedString)) - archiveEnd if bytesBeyond > 0: fh.write( packedString[:-bytesBeyond] ) assert fh.tell() == archiveEnd, "archiveEnd=%d fh.tell=%d bytesBeyond=%d len(packedString)=%d" % (archiveEnd,fh.tell(),bytesBeyond,len(packedString)) fh.seek( archive['offset'] ) fh.write( packedString[-bytesBeyond:] ) #safe because it can't exceed the archive (retention checking logic above) else: fh.write(packedString) #Now we propagate the updates to lower-precision archives higher = archive lowerArchives = [arc for arc in header['archives'] if arc['secondsPerPoint'] > archive['secondsPerPoint']] for lower in lowerArchives: fit = lambda i: i - (i % lower['secondsPerPoint']) lowerIntervals = [fit(p[0]) for p in alignedPoints] uniqueLowerIntervals = set(lowerIntervals) propagateFurther = False for interval in uniqueLowerIntervals: if __propagate(fh, header, interval, higher, lower): propagateFurther = True if not propagateFurther: break higher = lower def info(path): """info(path) path is a string """ with open(path,'rb') as fh: info = __readHeader(fh) return info def fetch(path,fromTime,untilTime=None,now=None): """fetch(path,fromTime,untilTime=None) path is a string fromTime is an epoch time untilTime is also an epoch time, but defaults to now. Returns a tuple of (timeInfo, valueList) where timeInfo is itself a tuple of (fromTime, untilTime, step) Returns None if no data can be returned """ with open(path,'rb') as fh: return file_fetch(fh, fromTime, untilTime, now) def file_fetch(fh, fromTime, untilTime, now = None): header = __readHeader(fh) if now is None: now = int( time.time() ) if untilTime is None: untilTime = now fromTime = int(fromTime) untilTime = int(untilTime) # Here we try and be flexible and return as much data as we can. # If the range of data is from too far in the past or fully in the future, we # return nothing if (fromTime > untilTime): raise InvalidTimeInterval("Invalid time interval: from time '%s' is after until time '%s'" % (fromTime, untilTime)) oldestTime = now - header['maxRetention'] # Range is in the future if fromTime > now: return None # Range is beyond retention if untilTime < oldestTime: return None # Range requested is partially beyond retention, adjust if fromTime < oldestTime: fromTime = oldestTime # Range is partially in the future, adjust if untilTime > now: untilTime = now diff = now - fromTime for archive in header['archives']: if archive['retention'] >= diff: break fromInterval = int( fromTime - (fromTime % archive['secondsPerPoint']) ) + archive['secondsPerPoint'] untilInterval = int( untilTime - (untilTime % archive['secondsPerPoint']) ) + archive['secondsPerPoint'] if fromInterval == untilInterval: # Check for zero-length time rages and always include the next point untilInterval = untilInterval + archive['secondsPerPoint'] fh.seek(archive['offset']) packedPoint = fh.read(pointSize) (baseInterval,baseValue) = struct.unpack(pointFormat,packedPoint) if baseInterval == 0: step = archive['secondsPerPoint'] points = (untilInterval - fromInterval) / step timeInfo = (fromInterval,untilInterval,step) valueList = [None] * points return (timeInfo,valueList) #Determine fromOffset timeDistance = fromInterval - baseInterval pointDistance = timeDistance / archive['secondsPerPoint'] byteDistance = pointDistance * pointSize fromOffset = archive['offset'] + (byteDistance % archive['size']) #Determine untilOffset timeDistance = untilInterval - baseInterval pointDistance = timeDistance / archive['secondsPerPoint'] byteDistance = pointDistance * pointSize untilOffset = archive['offset'] + (byteDistance % archive['size']) #Read all the points in the interval fh.seek(fromOffset) if fromOffset < untilOffset: #If we don't wrap around the archive seriesString = fh.read(untilOffset - fromOffset) else: #We do wrap around the archive, so we need two reads archiveEnd = archive['offset'] + archive['size'] seriesString = fh.read(archiveEnd - fromOffset) fh.seek(archive['offset']) seriesString += fh.read(untilOffset - archive['offset']) #Now we unpack the series data we just read (anything faster than unpack?) byteOrder,pointTypes = pointFormat[0],pointFormat[1:] points = len(seriesString) / pointSize seriesFormat = byteOrder + (pointTypes * points) unpackedSeries = struct.unpack(seriesFormat, seriesString) #And finally we construct a list of values (optimize this!) valueList = [None] * points #pre-allocate entire list for speed currentInterval = fromInterval step = archive['secondsPerPoint'] for i in xrange(0,len(unpackedSeries),2): pointTime = unpackedSeries[i] if pointTime == currentInterval: pointValue = unpackedSeries[i+1] valueList[i/2] = pointValue #in-place reassignment is faster than append() currentInterval += step timeInfo = (fromInterval,untilInterval,step) return (timeInfo,valueList) def merge(path_from, path_to, step=1<<12): headerFrom = info(path_from) archives = headerFrom['archives'] archives.sort(key=operator.itemgetter('retention'), reverse=True) # Start from maxRetention of the oldest file, and skip forward at max 'step' # points at a time. fromTime = int(time.time()) - headerFrom['maxRetention'] for archive in archives: pointsRemaining = archive['points'] while pointsRemaining: pointsToRead = step if pointsRemaining < step: pointsToRead = pointsRemaining pointsRemaining -= pointsToRead untilTime = fromTime + (pointsToRead * archive['secondsPerPoint']) (timeInfo, values) = fetch(path_from, fromTime, untilTime) (start, end, archive_step) = timeInfo pointsToWrite = list(itertools.ifilter( lambda points: points[1] is not None, itertools.izip(xrange(start, end, archive_step), values))) pointsToWrite.sort(key=lambda p: p[0],reverse=True) #order points by timestamp, newest first update_many(path_to, pointsToWrite) fromTime = untilTime def diff(path_from, path_to, ignore_empty = False): """ Compare two whisper databases. Each file must have the same archive configuration """ fh_from = open(path_from, 'rb') fh_to = open(path_to, 'rb') diffs = file_diff(fh_from, fh_to, ignore_empty) fh_to.close() fh_from.close() return diffs def file_diff(fh_from, fh_to, ignore_empty = False): headerFrom = __readHeader(fh_from) headerTo = __readHeader(fh_to) if headerFrom['archives'] != headerTo['archives']: raise NotImplementedError("%s and %s archive configurations are unalike. " \ "Resize the input before diffing" % (fh_from.name, fh_to.name)) archives = headerFrom['archives'] archives.sort(key=operator.itemgetter('retention')) archive_diffs = [] now = int(time.time()) untilTime = now for archive_number, archive in enumerate(archives): diffs = [] startTime = now - archive['retention'] (fromTimeInfo, fromValues) = __archive_fetch(fh_from, archive, startTime, untilTime) (toTimeInfo, toValues) = __archive_fetch(fh_to, archive, startTime, untilTime) (start, end, archive_step) = ( min(fromTimeInfo[0],toTimeInfo[0]), max(fromTimeInfo[1],toTimeInfo[1]), min(fromTimeInfo[2],toTimeInfo[2]) ) points = map(lambda s: (s * archive_step + start,fromValues[s],toValues[s]), range(0,(end - start) / archive_step)) if ignore_empty: points = [p for p in points if p[1] != None and p[2] != None] else: points = [p for p in points if p[1] != None or p[2] != None] diffs = [p for p in points if p[1] != p[2]] archive_diffs.append( (archive_number, diffs, points.__len__()) ) untilTime = startTime return archive_diffs #!/usr/bin/env python whisper-0.9.15/setup.py0000644000000000000000000000060512626124253014743 0ustar rootroot00000000000000#!/usr/bin/env python import os from glob import glob from distutils.core import setup setup( name='whisper', version='0.9.15', url='http://graphite-project.github.com/', author='Chris Davis', author_email='chrismd@gmail.com', license='Apache Software License 2.0', description='Fixed size round-robin style database', py_modules=['whisper'], scripts=glob('bin/*'), ) whisper-0.9.15/PKG-INFO0000644000000000000000000000042212626126470014326 0ustar rootroot00000000000000Metadata-Version: 1.0 Name: whisper Version: 0.9.15 Summary: Fixed size round-robin style database Home-page: http://graphite-project.github.com/ Author: Chris Davis Author-email: chrismd@gmail.com License: Apache Software License 2.0 Description: UNKNOWN Platform: UNKNOWN