whisper-0.9.12/0000755000076600000240000000000012205170570014546 5ustar mleinartasstaff00000000000000whisper-0.9.12/bin/0000755000076600000240000000000012205170570015316 5ustar mleinartasstaff00000000000000whisper-0.9.12/bin/rrd2whisper.py0000755000076600000240000000773012126623713020161 0ustar mleinartasstaff00000000000000#!/usr/bin/env python import os import sys import time import signal import optparse try: import rrdtool except ImportError, exc: raise SystemExit('[ERROR] Missing dependency: %s' % str(exc)) try: import whisper except ImportError: raise SystemExit('[ERROR] Please make sure whisper is installed properly') # Ignore SIGPIPE signal.signal(signal.SIGPIPE, signal.SIG_DFL) aggregationMethods = whisper.aggregationMethods # RRD doesn't have a 'sum' or 'total' type aggregationMethods.remove('sum') option_parser = optparse.OptionParser(usage='''%prog rrd_path''') option_parser.add_option( '--xFilesFactor', help="The xFilesFactor to use in the output file. " + "Defaults to the input RRD's xFilesFactor", default=None, type='float') option_parser.add_option( '--aggregationMethod', help="The consolidation function to fetch from on input and " + "aggregationMethod to set on output. One of: %s" % ', '.join(aggregationMethods), default='average', type='string') (options, args) = option_parser.parse_args() if len(args) < 1: option_parser.print_help() sys.exit(1) rrd_path = args[0] try: rrd_info = rrdtool.info(rrd_path) except rrdtool.error, exc: raise SystemExit('[ERROR] %s' % str(exc)) seconds_per_pdp = rrd_info['step'] # Reconcile old vs new python-rrdtool APIs (yuck) # leave consistent 'rras' and 'datasources' lists if 'rra' in rrd_info: rras = rrd_info['rra'] else: rra_indices = [] for key in rrd_info: if key.startswith('rra['): index = int(key.split('[')[1].split(']')[0]) rra_indices.append(index) rra_count = max(rra_indices) + 1 rras = [] for i in range(rra_count): rra_info = {} rra_info['pdp_per_row'] = rrd_info['rra[%d].pdp_per_row' % i] rra_info['rows'] = rrd_info['rra[%d].rows' % i] rra_info['cf'] = rrd_info['rra[%d].cf' % i] rra_info['xff'] = rrd_info['rra[%d].xff' % i] rras.append(rra_info) datasources = [] if 'ds' in rrd_info: datasource_names = rrd_info['ds'].keys() else: ds_keys = [key for key in rrd_info if key.startswith('ds[')] datasources = list(set(key[3:].split(']')[0] for key in ds_keys)) # Grab the archive configuration relevant_rras = [] for rra in rras: if rra['cf'] == options.aggregationMethod.upper(): relevant_rras.append(rra) if not relevant_rras: err = "[ERROR] Unable to find any RRAs with consolidation function: %s" % \ options.aggregationMethod.upper() raise SystemExit(err) archives = [] xFilesFactor = options.xFilesFactor for rra in relevant_rras: precision = rra['pdp_per_row'] * seconds_per_pdp points = rra['rows'] if not xFilesFactor: xFilesFactor = rra['xff'] archives.append((precision, points)) for datasource in datasources: now = int(time.time()) path = rrd_path.replace('.rrd', '_%s.wsp' % datasource) try: whisper.create(path, archives, xFilesFactor=xFilesFactor) except whisper.InvalidConfiguration, e: raise SystemExit('[ERROR] %s' % str(e)) size = os.stat(path).st_size archiveConfig = ','.join(["%d:%d" % ar for ar in archives]) print "Created: %s (%d bytes) with archives: %s" % (path, size, archiveConfig) print "Migrating data" archiveNumber = len(archives) - 1 for precision, points in reversed(archives): retention = precision * points endTime = now - now % precision startTime = endTime - retention (time_info, columns, rows) = rrdtool.fetch( rrd_path, options.aggregationMethod.upper(), '-r', str(precision), '-s', str(startTime), '-e', str(endTime)) column_index = list(columns).index(datasource) rows.pop() # remove the last datapoint because RRD sometimes gives funky values values = [row[column_index] for row in rows] timestamps = list(range(*time_info)) datapoints = zip(timestamps, values) datapoints = filter(lambda p: p[1] is not None, datapoints) print ' migrating %d datapoints from archive %d' % (len(datapoints), archiveNumber) archiveNumber -= 1 whisper.update_many(path, datapoints) whisper-0.9.12/bin/whisper-create.py0000755000076600000240000000317612204726745020636 0ustar mleinartasstaff00000000000000#!/usr/bin/env python import os import sys import signal import optparse try: import whisper except ImportError: raise SystemExit('[ERROR] Please make sure whisper is installed properly') # Ignore SIGPIPE signal.signal(signal.SIGPIPE, signal.SIG_DFL) option_parser = optparse.OptionParser( usage='''%prog path timePerPoint:timeToStore [timePerPoint:timeToStore]* timePerPoint and timeToStore specify lengths of time, for example: 60:1440 60 seconds per datapoint, 1440 datapoints = 1 day of retention 15m:8 15 minutes per datapoint, 8 datapoints = 2 hours of retention 1h:7d 1 hour per datapoint, 7 days of retention 12h:2y 12 hours per datapoint, 2 years of retention ''') option_parser.add_option('--xFilesFactor', default=0.5, type='float') option_parser.add_option('--aggregationMethod', default='average', type='string', help="Function to use when aggregating values (%s)" % ', '.join(whisper.aggregationMethods)) option_parser.add_option('--overwrite', default=False, action='store_true') (options, args) = option_parser.parse_args() if len(args) < 2: option_parser.print_usage() sys.exit(1) path = args[0] archives = [whisper.parseRetentionDef(retentionDef) for retentionDef in args[1:]] if os.path.exists(path) and options.overwrite: print 'Overwriting existing file: %s' % path os.unlink(path) try: whisper.create(path, archives, xFilesFactor=options.xFilesFactor, aggregationMethod=options.aggregationMethod) except whisper.WhisperException, exc: raise SystemExit('[ERROR] %s' % str(exc)) size = os.stat(path).st_size print 'Created: %s (%d bytes)' % (path,size) whisper-0.9.12/bin/whisper-dump.py0000755000076600000240000000553212035105226020321 0ustar mleinartasstaff00000000000000#!/usr/bin/env python import os import mmap import struct import signal import optparse try: import whisper except ImportError: raise SystemExit('[ERROR] Please make sure whisper is installed properly') # Ignore SIGPIPE signal.signal(signal.SIGPIPE, signal.SIG_DFL) option_parser = optparse.OptionParser(usage='''%prog path''') (options, args) = option_parser.parse_args() if len(args) != 1: option_parser.error("require one input file name") else: path = args[0] def mmap_file(filename): fd = os.open(filename, os.O_RDONLY) map = mmap.mmap(fd, os.fstat(fd).st_size, prot=mmap.PROT_READ) os.close(fd) return map def read_header(map): try: (aggregationType,maxRetention,xFilesFactor,archiveCount) = struct.unpack(whisper.metadataFormat,map[:whisper.metadataSize]) except: raise CorruptWhisperFile("Unable to unpack header") archives = [] archiveOffset = whisper.metadataSize for i in xrange(archiveCount): try: (offset, secondsPerPoint, points) = struct.unpack(whisper.archiveInfoFormat, map[archiveOffset:archiveOffset+whisper.archiveInfoSize]) except: raise CorruptWhisperFile("Unable to read archive %d metadata" % i) archiveInfo = { 'offset' : offset, 'secondsPerPoint' : secondsPerPoint, 'points' : points, 'retention' : secondsPerPoint * points, 'size' : points * whisper.pointSize, } archives.append(archiveInfo) archiveOffset += whisper.archiveInfoSize header = { 'aggregationMethod' : whisper.aggregationTypeToMethod.get(aggregationType, 'average'), 'maxRetention' : maxRetention, 'xFilesFactor' : xFilesFactor, 'archives' : archives, } return header def dump_header(header): print 'Meta data:' print ' aggregation method: %s' % header['aggregationMethod'] print ' max retention: %d' % header['maxRetention'] print ' xFilesFactor: %g' % header['xFilesFactor'] print dump_archive_headers(header['archives']) def dump_archive_headers(archives): for i,archive in enumerate(archives): print 'Archive %d info:' % i print ' offset: %d' % archive['offset'] print ' seconds per point: %d' % archive['secondsPerPoint'] print ' points: %d' % archive['points'] print ' retention: %d' % archive['retention'] print ' size: %d' % archive['size'] print def dump_archives(archives): for i,archive in enumerate(archives): print 'Archive %d data:' %i offset = archive['offset'] for point in xrange(archive['points']): (timestamp, value) = struct.unpack(whisper.pointFormat, map[offset:offset+whisper.pointSize]) print '%d: %d, %10.35g' % (point, timestamp, value) offset += whisper.pointSize print if not os.path.exists(path): raise SystemExit('[ERROR] File "%s" does not exist!' % path) map = mmap_file(path) header = read_header(map) dump_header(header) dump_archives(header['archives']) whisper-0.9.12/bin/whisper-fetch.py0000755000076600000240000000337012204726745020460 0ustar mleinartasstaff00000000000000#!/usr/bin/env python import sys import time import signal import optparse try: import whisper except ImportError: raise SystemExit('[ERROR] Please make sure whisper is installed properly') # Ignore SIGPIPE signal.signal(signal.SIGPIPE, signal.SIG_DFL) now = int( time.time() ) yesterday = now - (60 * 60 * 24) option_parser = optparse.OptionParser(usage='''%prog [options] path''') option_parser.add_option('--from', default=yesterday, type='int', dest='_from', help=("Unix epoch time of the beginning of " "your requested interval (default: 24 hours ago)")) option_parser.add_option('--until', default=now, type='int', help="Unix epoch time of the end of your requested interval (default: now)") option_parser.add_option('--json', default=False, action='store_true', help="Output results in JSON form") option_parser.add_option('--pretty', default=False, action='store_true', help="Show human-readable timestamps instead of unix times") (options, args) = option_parser.parse_args() if len(args) != 1: option_parser.print_usage() sys.exit(1) path = args[0] from_time = int( options._from ) until_time = int( options.until ) try: (timeInfo, values) = whisper.fetch(path, from_time, until_time) except whisper.WhisperException, exc: raise SystemExit('[ERROR] %s' % str(exc)) (start,end,step) = timeInfo if options.json: values_json = str(values).replace('None','null') print '''{ "start" : %d, "end" : %d, "step" : %d, "values" : %s }''' % (start,end,step,values_json) sys.exit(0) t = start for value in values: if options.pretty: timestr = time.ctime(t) else: timestr = str(t) if value is None: valuestr = "None" else: valuestr = "%f" % value print "%s\t%s" % (timestr,valuestr) t += step whisper-0.9.12/bin/whisper-info.py0000755000076600000240000000206312204726745020320 0ustar mleinartasstaff00000000000000#!/usr/bin/env python import os import sys import signal import optparse try: import whisper except ImportError: raise SystemExit('[ERROR] Please make sure whisper is installed properly') # Ignore SIGPIPE signal.signal(signal.SIGPIPE, signal.SIG_DFL) option_parser = optparse.OptionParser(usage='''%prog path [field]''') (options, args) = option_parser.parse_args() if len(args) < 1: option_parser.print_usage() sys.exit(1) path = args[0] if len(args) > 1: field = args[1] else: field = None try: info = whisper.info(path) except whisper.WhisperException, exc: raise SystemExit('[ERROR] %s' % str(exc)) info['fileSize'] = os.stat(path).st_size if field: if field not in info: print 'Unknown field "%s". Valid fields are %s' % (field, ','.join(info)) sys.exit(1) print info[field] sys.exit(0) archives = info.pop('archives') for key,value in info.items(): print '%s: %s' % (key,value) print for i,archive in enumerate(archives): print 'Archive %d' % i for key,value in archive.items(): print '%s: %s' % (key,value) print whisper-0.9.12/bin/whisper-merge.py0000755000076600000240000000124712204726745020467 0ustar mleinartasstaff00000000000000#!/usr/bin/env python import os import sys import signal import optparse try: import whisper except ImportError: raise SystemExit('[ERROR] Please make sure whisper is installed properly') # Ignore SIGPIPE signal.signal(signal.SIGPIPE, signal.SIG_DFL) option_parser = optparse.OptionParser( usage='''%prog [options] from_path to_path''') (options, args) = option_parser.parse_args() if len(args) < 2: option_parser.print_usage() sys.exit(1) path_from = args[0] path_to = args[1] for filename in (path_from, path_to): if not os.path.exists(filename): raise SystemExit('[ERROR] File "%s" does not exist!' % filename) whisper.merge(path_from, path_to) whisper-0.9.12/bin/whisper-resize.py0000755000076600000240000001354412204726745020674 0ustar mleinartasstaff00000000000000#!/usr/bin/env python import os import sys import math import time import bisect import signal import optparse import traceback try: import whisper except ImportError: raise SystemExit('[ERROR] Please make sure whisper is installed properly') # Ignore SIGPIPE signal.signal(signal.SIGPIPE, signal.SIG_DFL) now = int(time.time()) option_parser = optparse.OptionParser( usage='''%prog path timePerPoint:timeToStore [timePerPoint:timeToStore]* timePerPoint and timeToStore specify lengths of time, for example: 60:1440 60 seconds per datapoint, 1440 datapoints = 1 day of retention 15m:8 15 minutes per datapoint, 8 datapoints = 2 hours of retention 1h:7d 1 hour per datapoint, 7 days of retention 12h:2y 12 hours per datapoint, 2 years of retention ''') option_parser.add_option( '--xFilesFactor', default=None, type='float', help="Change the xFilesFactor") option_parser.add_option( '--aggregationMethod', default=None, type='string', help="Change the aggregation function (%s)" % ', '.join(whisper.aggregationMethods)) option_parser.add_option( '--force', default=False, action='store_true', help="Perform a destructive change") option_parser.add_option( '--newfile', default=None, action='store', help="Create a new database file without removing the existing one") option_parser.add_option( '--nobackup', action='store_true', help='Delete the .bak file after successful execution') option_parser.add_option( '--aggregate', action='store_true', help='Try to aggregate the values to fit the new archive better.' ' Note that this will make things slower and use more memory.') (options, args) = option_parser.parse_args() if len(args) < 2: option_parser.print_usage() sys.exit(1) path = args[0] if not os.path.exists(path): sys.stderr.write("[ERROR] File '%s' does not exist!\n\n" % path) option_parser.print_usage() sys.exit(1) info = whisper.info(path) new_archives = [whisper.parseRetentionDef(retentionDef) for retentionDef in args[1:]] old_archives = info['archives'] # sort by precision, lowest to highest old_archives.sort(key=lambda a: a['secondsPerPoint'], reverse=True) if options.xFilesFactor is None: xff = info['xFilesFactor'] else: xff = options.xFilesFactor if options.aggregationMethod is None: aggregationMethod = info['aggregationMethod'] else: aggregationMethod = options.aggregationMethod print 'Retrieving all data from the archives' for archive in old_archives: fromTime = now - archive['retention'] + archive['secondsPerPoint'] untilTime = now timeinfo,values = whisper.fetch(path, fromTime, untilTime) archive['data'] = (timeinfo,values) if options.newfile is None: tmpfile = path + '.tmp' if os.path.exists(tmpfile): print 'Removing previous temporary database file: %s' % tmpfile os.unlink(tmpfile) newfile = tmpfile else: newfile = options.newfile print 'Creating new whisper database: %s' % newfile whisper.create(newfile, new_archives, xFilesFactor=xff, aggregationMethod=aggregationMethod) size = os.stat(newfile).st_size print 'Created: %s (%d bytes)' % (newfile,size) if options.aggregate: # This is where data will be interpolated (best effort) print 'Migrating data with aggregation...' all_datapoints = [] for archive in old_archives: # Loading all datapoints into memory for fast querying timeinfo, values = archive['data'] new_datapoints = zip( range(*timeinfo), values ) if all_datapoints: last_timestamp = all_datapoints[-1][0] slice_end = 0 for i,(timestamp,value) in enumerate(new_datapoints): if timestamp > last_timestamp: slice_end = i break all_datapoints += new_datapoints[i:] else: all_datapoints += new_datapoints oldtimestamps = map( lambda p: p[0], all_datapoints) oldvalues = map( lambda p: p[1], all_datapoints) print "oldtimestamps: %s" % oldtimestamps # Simply cleaning up some used memory del all_datapoints new_info = whisper.info(newfile) new_archives = new_info['archives'] for archive in new_archives: step = archive['secondsPerPoint'] fromTime = now - archive['retention'] + now % step untilTime = now + now % step + step print "(%s,%s,%s)" % (fromTime,untilTime, step) timepoints_to_update = range(fromTime, untilTime, step) print "timepoints_to_update: %s" % timepoints_to_update newdatapoints = [] for tinterval in zip( timepoints_to_update[:-1], timepoints_to_update[1:] ): # TODO: Setting lo= parameter for 'lefti' based on righti from previous # iteration. Obviously, this can only be done if # timepoints_to_update is always updated. Is it? lefti = bisect.bisect_left(oldtimestamps, tinterval[0]) righti = bisect.bisect_left(oldtimestamps, tinterval[1], lo=lefti) newvalues = oldvalues[lefti:righti] if newvalues: non_none = filter( lambda x: x is not None, newvalues) if 1.0*len(non_none)/len(newvalues) >= xff: newdatapoints.append([tinterval[0], whisper.aggregate(aggregationMethod, non_none)]) whisper.update_many(newfile, newdatapoints) else: print 'Migrating data without aggregation...' for archive in old_archives: timeinfo, values = archive['data'] datapoints = zip( range(*timeinfo), values ) datapoints = filter(lambda p: p[1] is not None, datapoints) whisper.update_many(newfile, datapoints) if options.newfile is not None: sys.exit(0) backup = path + '.bak' print 'Renaming old database to: %s' % backup os.rename(path, backup) try: print 'Renaming new database to: %s' % path os.rename(tmpfile, path) except: traceback.print_exc() print '\nOperation failed, restoring backup' os.rename(backup, path) sys.exit(1) if options.nobackup: print "Unlinking backup: %s" % backup os.unlink(backup) whisper-0.9.12/bin/whisper-set-aggregation-method.py0000755000076600000240000000163312204726745023725 0ustar mleinartasstaff00000000000000#!/usr/bin/env python import os import sys import signal import optparse try: import whisper except ImportError: raise SystemExit('[ERROR] Please make sure whisper is installed properly') # Ignore SIGPIPE signal.signal(signal.SIGPIPE, signal.SIG_DFL) option_parser = optparse.OptionParser( usage='%%prog path <%s>' % '|'.join(whisper.aggregationMethods)) (options, args) = option_parser.parse_args() if len(args) < 2: option_parser.print_usage() sys.exit(1) path = args[0] aggregationMethod = args[1] try: oldAggregationMethod = whisper.setAggregationMethod(path, aggregationMethod) except IOError, exc: sys.stderr.write("[ERROR] File '%s' does not exist!\n\n" % path) option_parser.print_usage() sys.exit(1) except whisper.WhisperException, exc: raise SystemExit('[ERROR] %s' % str(exc)) print 'Updated aggregation method: %s (%s -> %s)' % (path,oldAggregationMethod,aggregationMethod) whisper-0.9.12/bin/whisper-update.py0000755000076600000240000000171612204726745020653 0ustar mleinartasstaff00000000000000#!/usr/bin/env python import sys import time import signal import optparse try: import whisper except ImportError: raise SystemExit('[ERROR] Please make sure whisper is installed properly') # Ignore SIGPIPE signal.signal(signal.SIGPIPE, signal.SIG_DFL) now = int( time.time() ) option_parser = optparse.OptionParser( usage='''%prog [options] path timestamp:value [timestamp:value]*''') (options, args) = option_parser.parse_args() if len(args) < 2: option_parser.print_usage() sys.exit(1) path = args[0] datapoint_strings = args[1:] datapoint_strings = [point.replace('N:', '%d:' % now) for point in datapoint_strings] datapoints = [tuple(point.split(':')) for point in datapoint_strings] try: if len(datapoints) == 1: timestamp,value = datapoints[0] whisper.update(path, value, timestamp) else: whisper.update_many(path, datapoints) except whisper.WhisperException, exc: raise SystemExit('[ERROR] %s' % str(exc)) whisper-0.9.12/PKG-INFO0000644000076600000240000000042212205170570015641 0ustar mleinartasstaff00000000000000Metadata-Version: 1.0 Name: whisper Version: 0.9.12 Summary: Fixed size round-robin style database Home-page: http://graphite-project.github.com/ Author: Chris Davis Author-email: chrismd@gmail.com License: Apache Software License 2.0 Description: UNKNOWN Platform: UNKNOWN whisper-0.9.12/setup.py0000644000076600000240000000060512205170450016256 0ustar mleinartasstaff00000000000000#!/usr/bin/env python import os from glob import glob from distutils.core import setup setup( name='whisper', version='0.9.12', url='http://graphite-project.github.com/', author='Chris Davis', author_email='chrismd@gmail.com', license='Apache Software License 2.0', description='Fixed size round-robin style database', py_modules=['whisper'], scripts=glob('bin/*'), ) whisper-0.9.12/whisper.py0000644000076600000240000006547112204726745016630 0ustar mleinartasstaff00000000000000# Copyright 2008 Orbitz WorldWide # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # # This module is an implementation of the Whisper database API # Here is the basic layout of a whisper data file # # File = Header,Data # Header = Metadata,ArchiveInfo+ # Metadata = aggregationType,maxRetention,xFilesFactor,archiveCount # ArchiveInfo = Offset,SecondsPerPoint,Points # Data = Archive+ # Archive = Point+ # Point = timestamp,value import os, struct, time, operator, itertools try: import fcntl CAN_LOCK = True except ImportError: CAN_LOCK = False try: import ctypes import ctypes.util CAN_FALLOCATE = True except ImportError: CAN_FALLOCATE = False fallocate = None if CAN_FALLOCATE: libc_name = ctypes.util.find_library('c') libc = ctypes.CDLL(libc_name) c_off64_t = ctypes.c_int64 c_off_t = ctypes.c_int try: _fallocate = libc.posix_fallocate64 _fallocate.restype = ctypes.c_int _fallocate.argtypes = [ctypes.c_int, c_off64_t, c_off64_t] except AttributeError, e: try: _fallocate = libc.posix_fallocate _fallocate.restype = ctypes.c_int _fallocate.argtypes = [ctypes.c_int, c_off_t, c_off_t] except AttributeError, e: CAN_FALLOCATE = False if CAN_FALLOCATE: def _py_fallocate(fd, offset, len_): res = _fallocate(fd.fileno(), offset, len_) if res != 0: raise IOError(res, 'fallocate') fallocate = _py_fallocate del libc del libc_name LOCK = False CACHE_HEADERS = False AUTOFLUSH = False __headerCache = {} longFormat = "!L" longSize = struct.calcsize(longFormat) floatFormat = "!f" floatSize = struct.calcsize(floatFormat) valueFormat = "!d" valueSize = struct.calcsize(valueFormat) pointFormat = "!Ld" pointSize = struct.calcsize(pointFormat) metadataFormat = "!2LfL" metadataSize = struct.calcsize(metadataFormat) archiveInfoFormat = "!3L" archiveInfoSize = struct.calcsize(archiveInfoFormat) aggregationTypeToMethod = dict({ 1: 'average', 2: 'sum', 3: 'last', 4: 'max', 5: 'min' }) aggregationMethodToType = dict([[v,k] for k,v in aggregationTypeToMethod.items()]) aggregationMethods = aggregationTypeToMethod.values() debug = startBlock = endBlock = lambda *a,**k: None UnitMultipliers = { 'seconds' : 1, 'minutes' : 60, 'hours' : 3600, 'days' : 86400, 'weeks' : 86400 * 7, 'years' : 86400 * 365 } def getUnitString(s): if 'seconds'.startswith(s): return 'seconds' if 'minutes'.startswith(s): return 'minutes' if 'hours'.startswith(s): return 'hours' if 'days'.startswith(s): return 'days' if 'weeks'.startswith(s): return 'weeks' if 'years'.startswith(s): return 'years' raise ValueError("Invalid unit '%s'" % s) def parseRetentionDef(retentionDef): import re (precision, points) = retentionDef.strip().split(':') if precision.isdigit(): precision = int(precision) * UnitMultipliers[getUnitString('s')] else: precision_re = re.compile(r'^(\d+)([a-z]+)$') match = precision_re.match(precision) if match: precision = int(match.group(1)) * UnitMultipliers[getUnitString(match.group(2))] else: raise ValueError("Invalid precision specification '%s'" % precision) if points.isdigit(): points = int(points) else: points_re = re.compile(r'^(\d+)([a-z]+)$') match = points_re.match(points) if match: points = int(match.group(1)) * UnitMultipliers[getUnitString(match.group(2))] / precision else: raise ValueError("Invalid retention specification '%s'" % points) return (precision, points) class WhisperException(Exception): """Base class for whisper exceptions.""" class InvalidConfiguration(WhisperException): """Invalid configuration.""" class InvalidAggregationMethod(WhisperException): """Invalid aggregation method.""" class InvalidTimeInterval(WhisperException): """Invalid time interval.""" class TimestampNotCovered(WhisperException): """Timestamp not covered by any archives in this database.""" class CorruptWhisperFile(WhisperException): def __init__(self, error, path): Exception.__init__(self, error) self.error = error self.path = path def __repr__(self): return "" % (self.path, self.error) def __str__(self): return "%s (%s)" % (self.error, self.path) def enableDebug(): global open, debug, startBlock, endBlock class open(file): def __init__(self,*args,**kwargs): file.__init__(self,*args,**kwargs) self.writeCount = 0 self.readCount = 0 def write(self,data): self.writeCount += 1 debug('WRITE %d bytes #%d' % (len(data),self.writeCount)) return file.write(self,data) def read(self,bytes): self.readCount += 1 debug('READ %d bytes #%d' % (bytes,self.readCount)) return file.read(self,bytes) def debug(message): print 'DEBUG :: %s' % message __timingBlocks = {} def startBlock(name): __timingBlocks[name] = time.time() def endBlock(name): debug("%s took %.5f seconds" % (name,time.time() - __timingBlocks.pop(name))) def __readHeader(fh): info = __headerCache.get(fh.name) if info: return info originalOffset = fh.tell() fh.seek(0) packedMetadata = fh.read(metadataSize) try: (aggregationType,maxRetention,xff,archiveCount) = struct.unpack(metadataFormat,packedMetadata) except: raise CorruptWhisperFile("Unable to read header", fh.name) archives = [] for i in xrange(archiveCount): packedArchiveInfo = fh.read(archiveInfoSize) try: (offset,secondsPerPoint,points) = struct.unpack(archiveInfoFormat,packedArchiveInfo) except: raise CorruptWhisperFile("Unable to read archive%d metadata" % i, fh.name) archiveInfo = { 'offset' : offset, 'secondsPerPoint' : secondsPerPoint, 'points' : points, 'retention' : secondsPerPoint * points, 'size' : points * pointSize, } archives.append(archiveInfo) fh.seek(originalOffset) info = { 'aggregationMethod' : aggregationTypeToMethod.get(aggregationType, 'average'), 'maxRetention' : maxRetention, 'xFilesFactor' : xff, 'archives' : archives, } if CACHE_HEADERS: __headerCache[fh.name] = info return info def setAggregationMethod(path, aggregationMethod): """setAggregationMethod(path,aggregationMethod) path is a string aggregationMethod specifies the method to use when propogating data (see ``whisper.aggregationMethods``) """ fh = open(path,'r+b') if LOCK: fcntl.flock( fh.fileno(), fcntl.LOCK_EX ) packedMetadata = fh.read(metadataSize) try: (aggregationType,maxRetention,xff,archiveCount) = struct.unpack(metadataFormat,packedMetadata) except: raise CorruptWhisperFile("Unable to read header", fh.name) try: newAggregationType = struct.pack( longFormat, aggregationMethodToType[aggregationMethod] ) except KeyError: raise InvalidAggregationMethod("Unrecognized aggregation method: %s" % aggregationMethod) fh.seek(0) fh.write(newAggregationType) if AUTOFLUSH: fh.flush() os.fsync(fh.fileno()) if CACHE_HEADERS and fh.name in __headerCache: del __headerCache[fh.name] fh.close() return aggregationTypeToMethod.get(aggregationType, 'average') def validateArchiveList(archiveList): """ Validates an archiveList. An ArchiveList must: 1. Have at least one archive config. Example: (60, 86400) 2. No archive may be a duplicate of another. 3. Higher precision archives' precision must evenly divide all lower precision archives' precision. 4. Lower precision archives must cover larger time intervals than higher precision archives. 5. Each archive must have at least enough points to consolidate to the next archive Returns True or False """ if not archiveList: raise InvalidConfiguration("You must specify at least one archive configuration!") archiveList.sort(key=lambda a: a[0]) #sort by precision (secondsPerPoint) for i,archive in enumerate(archiveList): if i == len(archiveList) - 1: break nextArchive = archiveList[i+1] if not archive[0] < nextArchive[0]: raise InvalidConfiguration("A Whisper database may not configured having" "two archives with the same precision (archive%d: %s, archive%d: %s)" % (i, archive, i + 1, nextArchive)) if nextArchive[0] % archive[0] != 0: raise InvalidConfiguration("Higher precision archives' precision " "must evenly divide all lower precision archives' precision " "(archive%d: %s, archive%d: %s)" % (i, archive[0], i + 1, nextArchive[0])) retention = archive[0] * archive[1] nextRetention = nextArchive[0] * nextArchive[1] if not nextRetention > retention: raise InvalidConfiguration("Lower precision archives must cover " "larger time intervals than higher precision archives " "(archive%d: %s seconds, archive%d: %s seconds)" % (i, retention, i + 1, nextRetention)) archivePoints = archive[1] pointsPerConsolidation = nextArchive[0] / archive[0] if not archivePoints >= pointsPerConsolidation: raise InvalidConfiguration("Each archive must have at least enough points " "to consolidate to the next archive (archive%d consolidates %d of " "archive%d's points but it has only %d total points)" % (i + 1, pointsPerConsolidation, i, archivePoints)) def create(path,archiveList,xFilesFactor=None,aggregationMethod=None,sparse=False,useFallocate=False): """create(path,archiveList,xFilesFactor=0.5,aggregationMethod='average') path is a string archiveList is a list of archives, each of which is of the form (secondsPerPoint,numberOfPoints) xFilesFactor specifies the fraction of data points in a propagation interval that must have known values for a propagation to occur aggregationMethod specifies the function to use when propogating data (see ``whisper.aggregationMethods``) """ # Set default params if xFilesFactor is None: xFilesFactor = 0.5 if aggregationMethod is None: aggregationMethod = 'average' #Validate archive configurations... validateArchiveList(archiveList) #Looks good, now we create the file and write the header if os.path.exists(path): raise InvalidConfiguration("File %s already exists!" % path) fh = open(path,'wb') if LOCK: fcntl.flock( fh.fileno(), fcntl.LOCK_EX ) aggregationType = struct.pack( longFormat, aggregationMethodToType.get(aggregationMethod, 1) ) oldest = max([secondsPerPoint * points for secondsPerPoint,points in archiveList]) maxRetention = struct.pack( longFormat, oldest ) xFilesFactor = struct.pack( floatFormat, float(xFilesFactor) ) archiveCount = struct.pack(longFormat, len(archiveList)) packedMetadata = aggregationType + maxRetention + xFilesFactor + archiveCount fh.write(packedMetadata) headerSize = metadataSize + (archiveInfoSize * len(archiveList)) archiveOffsetPointer = headerSize for secondsPerPoint,points in archiveList: archiveInfo = struct.pack(archiveInfoFormat, archiveOffsetPointer, secondsPerPoint, points) fh.write(archiveInfo) archiveOffsetPointer += (points * pointSize) #If configured to use fallocate and capable of fallocate use that, else #attempt sparse if configure or zero pre-allocate if sparse isn't configured. if CAN_FALLOCATE and useFallocate: remaining = archiveOffsetPointer - headerSize fallocate(fh, headerSize, remaining) elif sparse: fh.seek(archiveOffsetPointer - 1) fh.write('\x00') else: remaining = archiveOffsetPointer - headerSize chunksize = 16384 zeroes = '\x00' * chunksize while remaining > chunksize: fh.write(zeroes) remaining -= chunksize fh.write(zeroes[:remaining]) if AUTOFLUSH: fh.flush() os.fsync(fh.fileno()) fh.close() def aggregate(aggregationMethod, knownValues): if aggregationMethod == 'average': return float(sum(knownValues)) / float(len(knownValues)) elif aggregationMethod == 'sum': return float(sum(knownValues)) elif aggregationMethod == 'last': return knownValues[len(knownValues)-1] elif aggregationMethod == 'max': return max(knownValues) elif aggregationMethod == 'min': return min(knownValues) else: raise InvalidAggregationMethod("Unrecognized aggregation method %s" % aggregationMethod) def __propagate(fh,header,timestamp,higher,lower): aggregationMethod = header['aggregationMethod'] xff = header['xFilesFactor'] lowerIntervalStart = timestamp - (timestamp % lower['secondsPerPoint']) lowerIntervalEnd = lowerIntervalStart + lower['secondsPerPoint'] fh.seek(higher['offset']) packedPoint = fh.read(pointSize) (higherBaseInterval,higherBaseValue) = struct.unpack(pointFormat,packedPoint) if higherBaseInterval == 0: higherFirstOffset = higher['offset'] else: timeDistance = lowerIntervalStart - higherBaseInterval pointDistance = timeDistance / higher['secondsPerPoint'] byteDistance = pointDistance * pointSize higherFirstOffset = higher['offset'] + (byteDistance % higher['size']) higherPoints = lower['secondsPerPoint'] / higher['secondsPerPoint'] higherSize = higherPoints * pointSize relativeFirstOffset = higherFirstOffset - higher['offset'] relativeLastOffset = (relativeFirstOffset + higherSize) % higher['size'] higherLastOffset = relativeLastOffset + higher['offset'] fh.seek(higherFirstOffset) if higherFirstOffset < higherLastOffset: #we don't wrap the archive seriesString = fh.read(higherLastOffset - higherFirstOffset) else: #We do wrap the archive higherEnd = higher['offset'] + higher['size'] seriesString = fh.read(higherEnd - higherFirstOffset) fh.seek(higher['offset']) seriesString += fh.read(higherLastOffset - higher['offset']) #Now we unpack the series data we just read byteOrder,pointTypes = pointFormat[0],pointFormat[1:] points = len(seriesString) / pointSize seriesFormat = byteOrder + (pointTypes * points) unpackedSeries = struct.unpack(seriesFormat, seriesString) #And finally we construct a list of values neighborValues = [None] * points currentInterval = lowerIntervalStart step = higher['secondsPerPoint'] for i in xrange(0,len(unpackedSeries),2): pointTime = unpackedSeries[i] if pointTime == currentInterval: neighborValues[i/2] = unpackedSeries[i+1] currentInterval += step #Propagate aggregateValue to propagate from neighborValues if we have enough known points knownValues = [v for v in neighborValues if v is not None] if not knownValues: return False knownPercent = float(len(knownValues)) / float(len(neighborValues)) if knownPercent >= xff: #we have enough data to propagate a value! aggregateValue = aggregate(aggregationMethod, knownValues) myPackedPoint = struct.pack(pointFormat,lowerIntervalStart,aggregateValue) fh.seek(lower['offset']) packedPoint = fh.read(pointSize) (lowerBaseInterval,lowerBaseValue) = struct.unpack(pointFormat,packedPoint) if lowerBaseInterval == 0: #First propagated update to this lower archive fh.seek(lower['offset']) fh.write(myPackedPoint) else: #Not our first propagated update to this lower archive timeDistance = lowerIntervalStart - lowerBaseInterval pointDistance = timeDistance / lower['secondsPerPoint'] byteDistance = pointDistance * pointSize lowerOffset = lower['offset'] + (byteDistance % lower['size']) fh.seek(lowerOffset) fh.write(myPackedPoint) return True else: return False def update(path,value,timestamp=None): """update(path,value,timestamp=None) path is a string value is a float timestamp is either an int or float """ value = float(value) fh = open(path,'r+b') return file_update(fh, value, timestamp) def file_update(fh, value, timestamp): if LOCK: fcntl.flock( fh.fileno(), fcntl.LOCK_EX ) header = __readHeader(fh) now = int( time.time() ) if timestamp is None: timestamp = now timestamp = int(timestamp) diff = now - timestamp if not ((diff < header['maxRetention']) and diff >= 0): raise TimestampNotCovered("Timestamp not covered by any archives in " "this database.") for i,archive in enumerate(header['archives']): #Find the highest-precision archive that covers timestamp if archive['retention'] < diff: continue lowerArchives = header['archives'][i+1:] #We'll pass on the update to these lower precision archives later break #First we update the highest-precision archive myInterval = timestamp - (timestamp % archive['secondsPerPoint']) myPackedPoint = struct.pack(pointFormat,myInterval,value) fh.seek(archive['offset']) packedPoint = fh.read(pointSize) (baseInterval,baseValue) = struct.unpack(pointFormat,packedPoint) if baseInterval == 0: #This file's first update fh.seek(archive['offset']) fh.write(myPackedPoint) baseInterval,baseValue = myInterval,value else: #Not our first update timeDistance = myInterval - baseInterval pointDistance = timeDistance / archive['secondsPerPoint'] byteDistance = pointDistance * pointSize myOffset = archive['offset'] + (byteDistance % archive['size']) fh.seek(myOffset) fh.write(myPackedPoint) #Now we propagate the update to lower-precision archives higher = archive for lower in lowerArchives: if not __propagate(fh, header, myInterval, higher, lower): break higher = lower if AUTOFLUSH: fh.flush() os.fsync(fh.fileno()) fh.close() def update_many(path,points): """update_many(path,points) path is a string points is a list of (timestamp,value) points """ if not points: return points = [ (int(t),float(v)) for (t,v) in points] points.sort(key=lambda p: p[0],reverse=True) #order points by timestamp, newest first fh = open(path,'r+b') return file_update_many(fh, points) def file_update_many(fh, points): if LOCK: fcntl.flock( fh.fileno(), fcntl.LOCK_EX ) header = __readHeader(fh) now = int( time.time() ) archives = iter( header['archives'] ) currentArchive = archives.next() currentPoints = [] for point in points: age = now - point[0] while currentArchive['retention'] < age: #we can't fit any more points in this archive if currentPoints: #commit all the points we've found that it can fit currentPoints.reverse() #put points in chronological order __archive_update_many(fh,header,currentArchive,currentPoints) currentPoints = [] try: currentArchive = archives.next() except StopIteration: currentArchive = None break if not currentArchive: break #drop remaining points that don't fit in the database currentPoints.append(point) if currentArchive and currentPoints: #don't forget to commit after we've checked all the archives currentPoints.reverse() __archive_update_many(fh,header,currentArchive,currentPoints) if AUTOFLUSH: fh.flush() os.fsync(fh.fileno()) fh.close() def __archive_update_many(fh,header,archive,points): step = archive['secondsPerPoint'] alignedPoints = [ (timestamp - (timestamp % step), value) for (timestamp,value) in points ] alignedPoints = dict(alignedPoints).items() # Take the last val of duplicates #Create a packed string for each contiguous sequence of points packedStrings = [] previousInterval = None currentString = "" for (interval,value) in alignedPoints: if (not previousInterval) or (interval == previousInterval + step): currentString += struct.pack(pointFormat,interval,value) previousInterval = interval else: numberOfPoints = len(currentString) / pointSize startInterval = previousInterval - (step * (numberOfPoints-1)) packedStrings.append( (startInterval,currentString) ) currentString = struct.pack(pointFormat,interval,value) previousInterval = interval if currentString: numberOfPoints = len(currentString) / pointSize startInterval = previousInterval - (step * (numberOfPoints-1)) packedStrings.append( (startInterval,currentString) ) #Read base point and determine where our writes will start fh.seek(archive['offset']) packedBasePoint = fh.read(pointSize) (baseInterval,baseValue) = struct.unpack(pointFormat,packedBasePoint) if baseInterval == 0: #This file's first update baseInterval = packedStrings[0][0] #use our first string as the base, so we start at the start #Write all of our packed strings in locations determined by the baseInterval for (interval,packedString) in packedStrings: timeDistance = interval - baseInterval pointDistance = timeDistance / step byteDistance = pointDistance * pointSize myOffset = archive['offset'] + (byteDistance % archive['size']) fh.seek(myOffset) archiveEnd = archive['offset'] + archive['size'] bytesBeyond = (myOffset + len(packedString)) - archiveEnd if bytesBeyond > 0: fh.write( packedString[:-bytesBeyond] ) assert fh.tell() == archiveEnd, "archiveEnd=%d fh.tell=%d bytesBeyond=%d len(packedString)=%d" % (archiveEnd,fh.tell(),bytesBeyond,len(packedString)) fh.seek( archive['offset'] ) fh.write( packedString[-bytesBeyond:] ) #safe because it can't exceed the archive (retention checking logic above) else: fh.write(packedString) #Now we propagate the updates to lower-precision archives higher = archive lowerArchives = [arc for arc in header['archives'] if arc['secondsPerPoint'] > archive['secondsPerPoint']] for lower in lowerArchives: fit = lambda i: i - (i % lower['secondsPerPoint']) lowerIntervals = [fit(p[0]) for p in alignedPoints] uniqueLowerIntervals = set(lowerIntervals) propagateFurther = False for interval in uniqueLowerIntervals: if __propagate(fh, header, interval, higher, lower): propagateFurther = True if not propagateFurther: break higher = lower def info(path): """info(path) path is a string """ fh = open(path,'rb') info = __readHeader(fh) fh.close() return info def fetch(path,fromTime,untilTime=None): """fetch(path,fromTime,untilTime=None) path is a string fromTime is an epoch time untilTime is also an epoch time, but defaults to now. Returns a tuple of (timeInfo, valueList) where timeInfo is itself a tuple of (fromTime, untilTime, step) Returns None if no data can be returned """ fh = open(path,'rb') return file_fetch(fh, fromTime, untilTime) def file_fetch(fh, fromTime, untilTime): header = __readHeader(fh) now = int( time.time() ) if untilTime is None: untilTime = now fromTime = int(fromTime) untilTime = int(untilTime) # Here we try and be flexible and return as much data as we can. # If the range of data is from too far in the past or fully in the future, we # return nothing if (fromTime > untilTime): raise InvalidTimeInterval("Invalid time interval: from time '%s' is after until time '%s'" % (fromTime, untilTime)) oldestTime = now - header['maxRetention'] # Range is in the future if fromTime > now: return None # Range is beyond retention if untilTime < oldestTime: return None # Range requested is partially beyond retention, adjust if fromTime < oldestTime: fromTime = oldestTime # Range is partially in the future, adjust if untilTime > now: untilTime = now diff = now - fromTime for archive in header['archives']: if archive['retention'] >= diff: break fromInterval = int( fromTime - (fromTime % archive['secondsPerPoint']) ) + archive['secondsPerPoint'] untilInterval = int( untilTime - (untilTime % archive['secondsPerPoint']) ) + archive['secondsPerPoint'] fh.seek(archive['offset']) packedPoint = fh.read(pointSize) (baseInterval,baseValue) = struct.unpack(pointFormat,packedPoint) if baseInterval == 0: step = archive['secondsPerPoint'] points = (untilInterval - fromInterval) / step timeInfo = (fromInterval,untilInterval,step) valueList = [None] * points return (timeInfo,valueList) #Determine fromOffset timeDistance = fromInterval - baseInterval pointDistance = timeDistance / archive['secondsPerPoint'] byteDistance = pointDistance * pointSize fromOffset = archive['offset'] + (byteDistance % archive['size']) #Determine untilOffset timeDistance = untilInterval - baseInterval pointDistance = timeDistance / archive['secondsPerPoint'] byteDistance = pointDistance * pointSize untilOffset = archive['offset'] + (byteDistance % archive['size']) #Read all the points in the interval fh.seek(fromOffset) if fromOffset < untilOffset: #If we don't wrap around the archive seriesString = fh.read(untilOffset - fromOffset) else: #We do wrap around the archive, so we need two reads archiveEnd = archive['offset'] + archive['size'] seriesString = fh.read(archiveEnd - fromOffset) fh.seek(archive['offset']) seriesString += fh.read(untilOffset - archive['offset']) #Now we unpack the series data we just read (anything faster than unpack?) byteOrder,pointTypes = pointFormat[0],pointFormat[1:] points = len(seriesString) / pointSize seriesFormat = byteOrder + (pointTypes * points) unpackedSeries = struct.unpack(seriesFormat, seriesString) #And finally we construct a list of values (optimize this!) valueList = [None] * points #pre-allocate entire list for speed currentInterval = fromInterval step = archive['secondsPerPoint'] for i in xrange(0,len(unpackedSeries),2): pointTime = unpackedSeries[i] if pointTime == currentInterval: pointValue = unpackedSeries[i+1] valueList[i/2] = pointValue #in-place reassignment is faster than append() currentInterval += step fh.close() timeInfo = (fromInterval,untilInterval,step) return (timeInfo,valueList) def merge(path_from, path_to, step=1<<12): headerFrom = info(path_from) archives = headerFrom['archives'] archives.sort(key=operator.itemgetter('retention'), reverse=True) # Start from maxRetention of the oldest file, and skip forward at max 'step' # points at a time. fromTime = int(time.time()) - headerFrom['maxRetention'] for archive in archives: pointsRemaining = archive['points'] while pointsRemaining: pointsToRead = step if pointsRemaining < step: pointsToRead = pointsRemaining pointsRemaining -= pointsToRead untilTime = fromTime + (pointsToRead * archive['secondsPerPoint']) (timeInfo, values) = fetch(path_from, fromTime, untilTime) (start, end, archive_step) = timeInfo pointsToWrite = list(itertools.ifilter( lambda points: points[1] is not None, itertools.izip(xrange(start, end, archive_step), values))) pointsToWrite.sort(key=lambda p: p[0],reverse=True) #order points by timestamp, newest first update_many(path_to, pointsToWrite) fromTime = untilTime #!/usr/bin/env python