whisper-1.0.2/0000755000000000000000000000000013131245044013126 5ustar rootroot00000000000000whisper-1.0.2/bin/0000755000000000000000000000000013131245044013676 5ustar rootroot00000000000000whisper-1.0.2/bin/whisper-set-aggregation-method.py0000755000000000000000000000204213131244455022273 0ustar rootroot00000000000000#!/usr/bin/env python import os import sys import signal import optparse try: import whisper except ImportError: raise SystemExit('[ERROR] Please make sure whisper is installed properly') # Ignore SIGPIPE try: signal.signal(signal.SIGPIPE, signal.SIG_DFL) except AttributeError: #windows? pass option_parser = optparse.OptionParser( usage='%%prog path <%s> [xFilesFactor]' % '|'.join(whisper.aggregationMethods)) (options, args) = option_parser.parse_args() if len(args) < 2: option_parser.print_help() sys.exit(1) path = args[0] aggregationMethod = args[1] xFilesFactor = None if len(args) == 3: xFilesFactor = args[2] try: oldAggregationMethod = whisper.setAggregationMethod(path, aggregationMethod, xFilesFactor) except IOError: sys.stderr.write("[ERROR] File '%s' does not exist!\n\n" % path) option_parser.print_help() sys.exit(1) except whisper.WhisperException as exc: raise SystemExit('[ERROR] %s' % str(exc)) print('Updated aggregation method: %s (%s -> %s)' % (path,oldAggregationMethod,aggregationMethod)) whisper-1.0.2/bin/whisper-update.py0000755000000000000000000000171713131244455017227 0ustar rootroot00000000000000#!/usr/bin/env python import sys import time import signal import optparse try: import whisper except ImportError: raise SystemExit('[ERROR] Please make sure whisper is installed properly') # Ignore SIGPIPE signal.signal(signal.SIGPIPE, signal.SIG_DFL) now = int( time.time() ) option_parser = optparse.OptionParser( usage='''%prog [options] path timestamp:value [timestamp:value]*''') (options, args) = option_parser.parse_args() if len(args) < 2: option_parser.print_help() sys.exit(1) path = args[0] datapoint_strings = args[1:] datapoint_strings = [point.replace('N:', '%d:' % now) for point in datapoint_strings] datapoints = [tuple(point.split(':')) for point in datapoint_strings] try: if len(datapoints) == 1: timestamp,value = datapoints[0] whisper.update(path, value, timestamp) else: whisper.update_many(path, datapoints) except whisper.WhisperException as exc: raise SystemExit('[ERROR] %s' % str(exc)) whisper-1.0.2/bin/whisper-resize.py0000755000000000000000000001357213131244455017250 0ustar rootroot00000000000000#!/usr/bin/env python import os import sys import math import time import bisect import signal import optparse import traceback try: import whisper except ImportError: raise SystemExit('[ERROR] Please make sure whisper is installed properly') # Ignore SIGPIPE signal.signal(signal.SIGPIPE, signal.SIG_DFL) now = int(time.time()) option_parser = optparse.OptionParser( usage='''%prog path timePerPoint:timeToStore [timePerPoint:timeToStore]* timePerPoint and timeToStore specify lengths of time, for example: 60:1440 60 seconds per datapoint, 1440 datapoints = 1 day of retention 15m:8 15 minutes per datapoint, 8 datapoints = 2 hours of retention 1h:7d 1 hour per datapoint, 7 days of retention 12h:2y 12 hours per datapoint, 2 years of retention ''') option_parser.add_option( '--xFilesFactor', default=None, type='float', help="Change the xFilesFactor") option_parser.add_option( '--aggregationMethod', default=None, type='string', help="Change the aggregation function (%s)" % ', '.join(whisper.aggregationMethods)) option_parser.add_option( '--force', default=False, action='store_true', help="Perform a destructive change") option_parser.add_option( '--newfile', default=None, action='store', help="Create a new database file without removing the existing one") option_parser.add_option( '--nobackup', action='store_true', help='Delete the .bak file after successful execution') option_parser.add_option( '--aggregate', action='store_true', help='Try to aggregate the values to fit the new archive better.' ' Note that this will make things slower and use more memory.') (options, args) = option_parser.parse_args() if len(args) < 2: option_parser.print_help() sys.exit(1) path = args[0] if not os.path.exists(path): sys.stderr.write("[ERROR] File '%s' does not exist!\n\n" % path) option_parser.print_help() sys.exit(1) info = whisper.info(path) new_archives = [whisper.parseRetentionDef(retentionDef) for retentionDef in args[1:]] old_archives = info['archives'] # sort by precision, lowest to highest old_archives.sort(key=lambda a: a['secondsPerPoint'], reverse=True) if options.xFilesFactor is None: xff = info['xFilesFactor'] else: xff = options.xFilesFactor if options.aggregationMethod is None: aggregationMethod = info['aggregationMethod'] else: aggregationMethod = options.aggregationMethod print('Retrieving all data from the archives') for archive in old_archives: fromTime = now - archive['retention'] + archive['secondsPerPoint'] untilTime = now timeinfo,values = whisper.fetch(path, fromTime, untilTime) archive['data'] = (timeinfo,values) if options.newfile is None: tmpfile = path + '.tmp' if os.path.exists(tmpfile): print('Removing previous temporary database file: %s' % tmpfile) os.unlink(tmpfile) newfile = tmpfile else: newfile = options.newfile print('Creating new whisper database: %s' % newfile) whisper.create(newfile, new_archives, xFilesFactor=xff, aggregationMethod=aggregationMethod) size = os.stat(newfile).st_size print('Created: %s (%d bytes)' % (newfile,size)) if options.aggregate: # This is where data will be interpolated (best effort) print('Migrating data with aggregation...') all_datapoints = [] for archive in old_archives: # Loading all datapoints into memory for fast querying timeinfo, values = archive['data'] new_datapoints = zip( range(*timeinfo), values ) if all_datapoints: last_timestamp = all_datapoints[-1][0] slice_end = 0 for i,(timestamp,value) in enumerate(new_datapoints): if timestamp > last_timestamp: slice_end = i break all_datapoints += new_datapoints[i:] else: all_datapoints += new_datapoints oldtimestamps = map( lambda p: p[0], all_datapoints) oldvalues = map( lambda p: p[1], all_datapoints) print("oldtimestamps: %s" % oldtimestamps) # Simply cleaning up some used memory del all_datapoints new_info = whisper.info(newfile) new_archives = new_info['archives'] for archive in new_archives: step = archive['secondsPerPoint'] fromTime = now - archive['retention'] + now % step untilTime = now + now % step + step print("(%s,%s,%s)" % (fromTime,untilTime, step)) timepoints_to_update = range(fromTime, untilTime, step) print("timepoints_to_update: %s" % timepoints_to_update) newdatapoints = [] for tinterval in zip( timepoints_to_update[:-1], timepoints_to_update[1:] ): # TODO: Setting lo= parameter for 'lefti' based on righti from previous # iteration. Obviously, this can only be done if # timepoints_to_update is always updated. Is it? lefti = bisect.bisect_left(oldtimestamps, tinterval[0]) righti = bisect.bisect_left(oldtimestamps, tinterval[1], lo=lefti) newvalues = oldvalues[lefti:righti] if newvalues: non_none = filter( lambda x: x is not None, newvalues) if 1.0*len(non_none)/len(newvalues) >= xff: newdatapoints.append([tinterval[0], whisper.aggregate(aggregationMethod, non_none, newvalues)]) whisper.update_many(newfile, newdatapoints) else: print('Migrating data without aggregation...') for archive in old_archives: timeinfo, values = archive['data'] datapoints = zip( range(*timeinfo), values ) datapoints = filter(lambda p: p[1] is not None, datapoints) whisper.update_many(newfile, datapoints) if options.newfile is not None: sys.exit(0) backup = path + '.bak' print('Renaming old database to: %s' % backup) os.rename(path, backup) try: print('Renaming new database to: %s' % path) os.rename(tmpfile, path) except: traceback.print_exc() print('\nOperation failed, restoring backup') os.rename(backup, path) sys.exit(1) if options.nobackup: print("Unlinking backup: %s" % backup) os.unlink(backup) whisper-1.0.2/bin/find-corrupt-whisper-files.py0000644000000000000000000000344613131244452021454 0ustar rootroot00000000000000#!/usr/bin/env python # encoding: utf-8 """Find and (optionally) delete corrupt Whisper data files""" from __future__ import absolute_import, print_function, unicode_literals import argparse import os import sys import whisper def walk_dir(base_dir, delete_corrupt=False, verbose=False): for dirpath, dirnames, filenames in os.walk(base_dir): if verbose: print("Scanning %s…" % dirpath) whisper_files = (os.path.join(dirpath, i) for i in filenames if i.endswith('.wsp')) for f in whisper_files: try: info = whisper.info(f) except whisper.CorruptWhisperFile: if delete_corrupt: print('Deleting corrupt Whisper file: %s' % f, file=sys.stderr) os.unlink(f) else: print('Corrupt Whisper file: %s' % f, file=sys.stderr) continue if verbose: print('%s: %d points' % (f, sum(i['points'] for i in info.get('archives', {})))) if __name__ == "__main__": parser = argparse.ArgumentParser(description=__doc__.strip()) parser.add_argument('--delete-corrupt', default=False, action='store_true', help='Delete reported files') parser.add_argument('--verbose', default=False, action='store_true', help='Display progress info') parser.add_argument('directories', type=str, nargs='+', metavar='WHISPER_DIR', help='Directory containing Whisper files') args = parser.parse_args() for d in args.directories: d = os.path.realpath(d) if not os.path.isdir(d): parser.error("%d is not a directory!") walk_dir(d, delete_corrupt=args.delete_corrupt, verbose=args.verbose) whisper-1.0.2/bin/whisper-merge.py0000755000000000000000000000170413131244455017040 0ustar rootroot00000000000000#!/usr/bin/env python import os import sys import signal import optparse try: import whisper except ImportError: raise SystemExit('[ERROR] Please make sure whisper is installed properly') # Ignore SIGPIPE signal.signal(signal.SIGPIPE, signal.SIG_DFL) option_parser = optparse.OptionParser( usage='''%prog [options] from_path to_path''') option_parser.add_option('--from', default=None, type='int', dest='_from', help=("Begining of interval, unix timestamp (default: epoch)")) option_parser.add_option('--until', default=None, type='int', help="End of interval, unix timestamp (default: now)") (options, args) = option_parser.parse_args() if len(args) < 2: option_parser.print_help() sys.exit(1) path_from = args[0] path_to = args[1] for filename in (path_from, path_to): if not os.path.exists(filename): raise SystemExit('[ERROR] File "%s" does not exist!' % filename) whisper.merge(path_from, path_to, options._from, options.until) whisper-1.0.2/bin/whisper-create.py0000755000000000000000000000616113131244455017206 0ustar rootroot00000000000000#!/usr/bin/env python import os import sys import signal import optparse import math try: import whisper except ImportError: raise SystemExit('[ERROR] Please make sure whisper is installed properly') def byte_format(num): for x in ['bytes','KB','MB']: if num < 1024.0: return "%.3f%s" % (num, x) num /= 1024.0 return "%.3f%s" % (num, 'GB') # Ignore SIGPIPE try: signal.signal(signal.SIGPIPE, signal.SIG_DFL) except AttributeError: #OS=windows pass option_parser = optparse.OptionParser( usage='''%prog path timePerPoint:timeToStore [timePerPoint:timeToStore]* %prog --estimate timePerPoint:timeToStore [timePerPoint:timeToStore]* timePerPoint and timeToStore specify lengths of time, for example: 60:1440 60 seconds per datapoint, 1440 datapoints = 1 day of retention 15m:8 15 minutes per datapoint, 8 datapoints = 2 hours of retention 1h:7d 1 hour per datapoint, 7 days of retention 12h:2y 12 hours per datapoint, 2 years of retention ''') option_parser.add_option('--xFilesFactor', default=0.5, type='float') option_parser.add_option('--aggregationMethod', default='average', type='string', help="Function to use when aggregating values (%s)" % ', '.join(whisper.aggregationMethods)) option_parser.add_option('--overwrite', default=False, action='store_true') option_parser.add_option('--estimate', default=False, action='store_true', help="Don't create a whisper file, estimate storage requirements based on archive definitions") option_parser.add_option('--sparse', default=False, action='store_true', help="Create new whisper as sparse file") option_parser.add_option('--fallocate', default=False, action='store_true', help="Create new whisper and use fallocate") (options, args) = option_parser.parse_args() if options.estimate: if len(args) == 0: option_parser.print_usage() sys.exit(1) if len(args) == 1 and args[0].find(",") > 0: args = args[0].split(",") archives = 0 total_points = 0 for (precision, points) in map(whisper.parseRetentionDef, args): print("Archive %s: %s points of %ss precision" % (archives, points, precision)) archives += 1 total_points += points size = 16 + (archives * 12) + (total_points * 12) disk_size = int(math.ceil(size / 4096.0) * 4096) print("\nEstimated Whisper DB Size: %s (%s bytes on disk with 4k blocks)\n" % (byte_format(size), disk_size)) for x in [1, 5, 10, 50, 100, 500]: print("Estimated storage requirement for %sk metrics: %s" % (x, byte_format(x * 1000 * disk_size))) sys.exit(0) if len(args) < 2: option_parser.print_help() sys.exit(1) path = args[0] archives = [whisper.parseRetentionDef(retentionDef) for retentionDef in args[1:]] if os.path.exists(path) and options.overwrite: print('Overwriting existing file: %s' % path) os.unlink(path) try: whisper.create(path, archives, xFilesFactor=options.xFilesFactor, aggregationMethod=options.aggregationMethod, sparse=options.sparse, useFallocate=options.fallocate) except whisper.WhisperException as exc: raise SystemExit('[ERROR] %s' % str(exc)) size = os.stat(path).st_size print('Created: %s (%d bytes)' % (path,size)) whisper-1.0.2/bin/whisper-info.py0000755000000000000000000000256213131244455016677 0ustar rootroot00000000000000#!/usr/bin/env python import os import sys import signal import optparse import json try: import whisper except ImportError: raise SystemExit('[ERROR] Please make sure whisper is installed properly') # Ignore SIGPIPE try: signal.signal(signal.SIGPIPE, signal.SIG_DFL) except AttributeError: #OS=windows pass option_parser = optparse.OptionParser(usage='''%prog [options] path [field]''') option_parser.add_option('--json', default=False, action='store_true', help="Output results in JSON form") (options, args) = option_parser.parse_args() if len(args) < 1: option_parser.print_help() sys.exit(1) path = args[0] if len(args) > 1: field = args[1] else: field = None try: info = whisper.info(path) except whisper.WhisperException as exc: raise SystemExit('[ERROR] %s' % str(exc)) info['fileSize'] = os.stat(path).st_size if field: if field not in info: print('Unknown field "%s". Valid fields are %s' % (field, ','.join(info))) sys.exit(1) print(info[field]) sys.exit(0) if options.json: print(json.dumps(info, indent=2, separators=(',', ': '))) else: archives = info.pop('archives') for key,value in info.items(): print('%s: %s' % (key,value)) print('') for i,archive in enumerate(archives): print('Archive %d' % i) for key,value in archive.items(): print('%s: %s' % (key,value)) print('') whisper-1.0.2/bin/whisper-diff.py0000755000000000000000000000737213131244455016660 0ustar rootroot00000000000000#!/usr/bin/python -tt import sys import time import optparse import json try: import whisper except ImportError: raise SystemExit('[ERROR] Please make sure whisper is installed properly') option_parser = optparse.OptionParser(usage='''%prog [options] path_a path_b''') option_parser.add_option('--summary', default=False, action='store_true', help="show summary of differences") option_parser.add_option('--ignore-empty', default=False, action='store_true', help="skip comparison if either value is undefined") option_parser.add_option('--columns', default=False, action='store_true', help="print output in simple columns") option_parser.add_option('--no-headers', default=False, action='store_true', help="do not print column headers") option_parser.add_option('--until', default=None, type='int', help="Unix epoch time of the end of your requested interval (default: None)") option_parser.add_option('--json', default=False, action='store_true', help="Output results in JSON form") (options, args) = option_parser.parse_args() if len(args) != 2: option_parser.print_help() sys.exit(1) (path_a,path_b) = args[0::1] if options.until: until_time = int( options.until ) else: until_time = None def print_diffs(diffs,pretty=True,headers=True): if pretty: h = "%7s %11s %13s %13s\n" f = "%7s %11d %13s %13s\n" else: h = "%s %s %s %s\n" f = "%s %d %s %s\n" if headers: sys.stdout.write(h%('archive','timestamp','value_a','value_b')) for archive, points, total in diffs: count = count=points.__len__() if pretty: sys.stdout.write('Archive %d (%d of %d datapoints differ)\n'%(archive,points.__len__(),total)) sys.stdout.write(h%('','timestamp','value_a','value_b')) for p in points: if pretty: sys.stdout.write(f%('',p[0],p[1],p[2])) else: sys.stdout.write(f%(archive,p[0],p[1],p[2])) def print_summary(diffs,pretty=True,headers=True): if pretty: f = "%7s %9s %9s\n" else: f = "%s %s %s\n" if headers: sys.stdout.write(f%('archive','total','differing')) for archive, points, total in diffs: sys.stdout.write(f%(archive,total,points.__len__())) def print_summary_json(diffs,path_a,path_b): print json.dumps({'path_a': path_a, 'path_b': path_b, 'archives': [{'archive': archive, 'total': total, 'points': points.__len__()} for archive, points, total in diffs]}, sort_keys=True, indent=2, separators=(',', ' : ')) def print_diffs_json(diffs,path_a,path_b): print json.dumps({'path_a': path_a, 'path_b': path_b, 'archives': [{'archive': archive, 'total': total, 'points': points.__len__(), 'datapoint': [{'timestamp': p[0], 'value_a': p[1], 'value_b': p[2]} for p in points]} for archive, points, total in diffs]}, sort_keys=True, indent=2, separators=(',', ' : ')) def main(): archive_diffs = whisper.diff(path_a,path_b,ignore_empty=options.ignore_empty,until_time=until_time) if options.summary: if options.json: print_summary_json(archive_diffs,path_a,path_b) else: print_summary(archive_diffs,pretty=(not options.columns),headers=(not options.no_headers)) else: if options.json: print_diffs_json(archive_diffs,path_a,path_b) else: print_diffs(archive_diffs,pretty=(not options.columns),headers=(not options.no_headers)) if __name__ == "__main__": main() whisper-1.0.2/bin/whisper-set-xfilesfactor.py0000755000000000000000000000163513131244455021226 0ustar rootroot00000000000000#!/usr/bin/env python import sys import argparse import whisper def main(): """Set xFilesFactor for existing whisper file""" parser = argparse.ArgumentParser( description='Set xFilesFactor for existing whisper file') parser.add_argument('path', type=str, help='path to whisper file') parser.add_argument('xff', metavar='xFilesFactor', type=float, help='new xFilesFactor, a float between 0 and 1') args = parser.parse_args() try: old_xff = whisper.setXFilesFactor(args.path, args.xff) except IOError: sys.stderr.write("[ERROR] File '%s' does not exist!\n\n" % args.path) parser.print_help() sys.exit(1) except whisper.WhisperException as exc: raise SystemExit('[ERROR] %s' % str(exc)) print('Updated xFilesFactor: %s (%s -> %s)' % (args.path, old_xff, args.xff)) if __name__ == "__main__": main() whisper-1.0.2/bin/whisper-fill.py0000755000000000000000000001072713131244455016674 0ustar rootroot00000000000000#!/usr/bin/env python # whisper-fill: unlike whisper-merge, don't overwrite data that's # already present in the target file, but instead, only add the missing # data (e.g. where the gaps in the target file are). Because no values # are overwritten, no data or precision gets lost. Also, unlike # whisper-merge, try to take the highest-precision archive to provide # the data, instead of the one with the largest retention. # Using this script, reconciliation between two replica instances can be # performed by whisper-fill-ing the data of the other replica with the # data that exists locally, without introducing the quite remarkable # gaps that whisper-merge leaves behind (filling a higher precision # archive with data from a lower precision one) # Work performed by Fabian Groffen @grobian while working at Booking.com. # additional patches are from https://github.com/jssjr/carbonate/ import whisper try: from whisper import operator HAS_OPERATOR = True except ImportError: HAS_OPERATOR = False import itertools import time import sys import optparse if sys.version_info >= (3, 0): xrange = range def itemgetter(*items): if HAS_OPERATOR: return operator.itemgetter(*items) else: if len(items) == 1: item = items[0] def g(obj): return obj[item] else: def g(obj): return tuple(obj[item] for item in items) return g def fill(src, dst, tstart, tstop): # fetch range start-stop from src, taking values from the highest # precision archive, thus optionally requiring multiple fetch + merges srcHeader = whisper.info(src) srcArchives = srcHeader['archives'] srcArchives.sort(key=itemgetter('retention')) # find oldest point in time, stored by both files srcTime = int(time.time()) - srcHeader['maxRetention'] if tstart < srcTime and tstop < srcTime: return # we want to retain as much precision as we can, hence we do backwards # walk in time # skip forward at max 'step' points at a time for archive in srcArchives: # skip over archives that don't have any data points rtime = time.time() - archive['retention'] if tstop <= rtime: continue untilTime = tstop fromTime = rtime if rtime > tstart else tstart (timeInfo, values) = whisper.fetch(src, fromTime, untilTime) (start, end, archive_step) = timeInfo pointsToWrite = list(itertools.ifilter( lambda points: points[1] is not None, itertools.izip(xrange(start, end, archive_step), values))) # order points by timestamp, newest first pointsToWrite.sort(key=lambda p: p[0], reverse=True) whisper.update_many(dst, pointsToWrite) tstop = fromTime # can stop when there's nothing to fetch any more if tstart == tstop: return def fill_archives(src, dst, startFrom): header = whisper.info(dst) archives = header['archives'] archives = sorted(archives, key=lambda t: t['retention']) for archive in archives: fromTime = time.time() - archive['retention'] if fromTime >= startFrom: continue (timeInfo, values) = whisper.fetch(dst, fromTime, startFrom) (start, end, step) = timeInfo gapstart = None for v in values: if not v and not gapstart: gapstart = start elif v and gapstart: # ignore single units lost if (start - gapstart) > archive['secondsPerPoint']: fill(src, dst, gapstart - step, start) gapstart = None elif gapstart and start == end - step: fill(src, dst, gapstart - step, start) start += step startFrom = fromTime def main(): option_parser = optparse.OptionParser( usage='%prog [--lock] src dst', description='copies data from src in dst, if missing') option_parser.add_option('--lock', help='Lock whisper files', default=False, action='store_true') (options, args) = option_parser.parse_args() if len(args) != 2: option_parser.print_help() sys.exit(1) if options.lock is True and whisper.CAN_LOCK: whisper.LOCK = True src = args[0] dst = args[1] startFrom = time.time() fill_archives(src, dst, startFrom) if __name__ == "__main__": main() whisper-1.0.2/bin/whisper-dump.py0000755000000000000000000000567213131244455016716 0ustar rootroot00000000000000#!/usr/bin/env python import os import mmap import struct import signal import sys import optparse try: import whisper except ImportError: raise SystemExit('[ERROR] Please make sure whisper is installed properly') if sys.version_info >= (3, 0): xrange = range # Ignore SIGPIPE signal.signal(signal.SIGPIPE, signal.SIG_DFL) option_parser = optparse.OptionParser(usage='''%prog path''') (options, args) = option_parser.parse_args() if len(args) != 1: option_parser.error("require one input file name") else: path = args[0] def mmap_file(filename): fd = os.open(filename, os.O_RDONLY) map = mmap.mmap(fd, os.fstat(fd).st_size, prot=mmap.PROT_READ) os.close(fd) return map def read_header(map): try: (aggregationType,maxRetention,xFilesFactor,archiveCount) = struct.unpack(whisper.metadataFormat,map[:whisper.metadataSize]) except: raise whisper.CorruptWhisperFile("Unable to unpack header") archives = [] archiveOffset = whisper.metadataSize for i in xrange(archiveCount): try: (offset, secondsPerPoint, points) = struct.unpack(whisper.archiveInfoFormat, map[archiveOffset:archiveOffset+whisper.archiveInfoSize]) except: raise whisper.CorruptWhisperFile("Unable to read archive %d metadata" % i) archiveInfo = { 'offset' : offset, 'secondsPerPoint' : secondsPerPoint, 'points' : points, 'retention' : secondsPerPoint * points, 'size' : points * whisper.pointSize, } archives.append(archiveInfo) archiveOffset += whisper.archiveInfoSize header = { 'aggregationMethod' : whisper.aggregationTypeToMethod.get(aggregationType, 'average'), 'maxRetention' : maxRetention, 'xFilesFactor' : xFilesFactor, 'archives' : archives, } return header def dump_header(header): print('Meta data:') print(' aggregation method: %s' % header['aggregationMethod']) print(' max retention: %d' % header['maxRetention']) print(' xFilesFactor: %g' % header['xFilesFactor']) print("") dump_archive_headers(header['archives']) def dump_archive_headers(archives): for i,archive in enumerate(archives): print('Archive %d info:' % i) print(' offset: %d' % archive['offset']) print(' seconds per point: %d' % archive['secondsPerPoint']) print(' points: %d' % archive['points']) print(' retention: %d' % archive['retention']) print(' size: %d' % archive['size']) print("") def dump_archives(archives): for i,archive in enumerate(archives): print('Archive %d data:' %i) offset = archive['offset'] for point in xrange(archive['points']): (timestamp, value) = struct.unpack(whisper.pointFormat, map[offset:offset+whisper.pointSize]) print('%d: %d, %10.35g' % (point, timestamp, value)) offset += whisper.pointSize print if not os.path.exists(path): raise SystemExit('[ERROR] File "%s" does not exist!' % path) map = mmap_file(path) header = read_header(map) dump_header(header) dump_archives(header['archives']) whisper-1.0.2/bin/rrd2whisper.py0000755000000000000000000001120213131244455016527 0ustar rootroot00000000000000#!/usr/bin/env python import os import sys import time import signal import optparse try: import rrdtool except ImportError as exc: raise SystemExit('[ERROR] Missing dependency: %s' % str(exc)) try: import whisper except ImportError: raise SystemExit('[ERROR] Please make sure whisper is installed properly') # Ignore SIGPIPE signal.signal(signal.SIGPIPE, signal.SIG_DFL) aggregationMethods = whisper.aggregationMethods # RRD doesn't have a 'sum' or 'total' type aggregationMethods.remove('sum') option_parser = optparse.OptionParser(usage='''%prog rrd_path''') option_parser.add_option( '--xFilesFactor', help="The xFilesFactor to use in the output file. " + "Defaults to the input RRD's xFilesFactor", default=None, type='float') option_parser.add_option( '--aggregationMethod', help="The consolidation function to fetch from on input and " + "aggregationMethod to set on output. One of: %s" % ', '.join(aggregationMethods), default='average', type='string') option_parser.add_option('--destinationPath', help="Path to place created whisper file. Defaults to the " + "RRD file's source path.", default=None, type='string') (options, args) = option_parser.parse_args() if len(args) < 1: option_parser.print_help() sys.exit(1) rrd_path = args[0] try: rrd_info = rrdtool.info(rrd_path) except rrdtool.error as exc: raise SystemExit('[ERROR] %s' % str(exc)) seconds_per_pdp = rrd_info['step'] # Reconcile old vs new python-rrdtool APIs (yuck) # leave consistent 'rras' and 'datasources' lists if 'rra' in rrd_info: rras = rrd_info['rra'] else: rra_indices = [] for key in rrd_info: if key.startswith('rra['): index = int(key.split('[')[1].split(']')[0]) rra_indices.append(index) rra_count = max(rra_indices) + 1 rras = [] for i in range(rra_count): rra_info = {} rra_info['pdp_per_row'] = rrd_info['rra[%d].pdp_per_row' % i] rra_info['rows'] = rrd_info['rra[%d].rows' % i] rra_info['cf'] = rrd_info['rra[%d].cf' % i] rra_info['xff'] = rrd_info['rra[%d].xff' % i] rras.append(rra_info) if 'ds' in rrd_info: datasources = rrd_info['ds'].keys() else: ds_keys = [key for key in rrd_info if key.startswith('ds[')] datasources = list(set(key[3:].split(']')[0] for key in ds_keys)) # Grab the archive configuration relevant_rras = [] for rra in rras: if rra['cf'] == options.aggregationMethod.upper(): relevant_rras.append(rra) if not relevant_rras: err = "[ERROR] Unable to find any RRAs with consolidation function: %s" % \ options.aggregationMethod.upper() raise SystemExit(err) archives = [] xFilesFactor = options.xFilesFactor for rra in relevant_rras: precision = rra['pdp_per_row'] * seconds_per_pdp points = rra['rows'] if not xFilesFactor: xFilesFactor = rra['xff'] archives.append((precision, points)) for datasource in datasources: now = int(time.time()) suffix = '_%s' % datasource if len(datasources) > 1 else '' if options.destinationPath: destination_path = options.destinationPath if not os.path.isdir(destination_path): try: os.makedirs(destination_path) except OSError as exc: # Python >2.5 if exc.errno == errno.EEXIST and os.path.isdir(destination_path): pass else: raise rrd_file = os.path.basename(rrd_path).replace('.rrd', '%s.wsp' % suffix) path = destination_path + '/' + rrd_file else: path = rrd_path.replace('.rrd', '%s.wsp' % suffix) try: whisper.create(path, archives, xFilesFactor=xFilesFactor) except whisper.InvalidConfiguration as e: raise SystemExit('[ERROR] %s' % str(e)) size = os.stat(path).st_size archiveConfig = ','.join(["%d:%d" % ar for ar in archives]) print("Created: %s (%d bytes) with archives: %s" % (path, size, archiveConfig)) print("Migrating data") archiveNumber = len(archives) - 1 for precision, points in reversed(archives): retention = precision * points endTime = now - now % precision startTime = endTime - retention (time_info, columns, rows) = rrdtool.fetch( rrd_path, options.aggregationMethod.upper(), '-r', str(precision), '-s', str(startTime), '-e', str(endTime)) column_index = list(columns).index(datasource) rows.pop() # remove the last datapoint because RRD sometimes gives funky values values = [row[column_index] for row in rows] timestamps = list(range(*time_info)) datapoints = zip(timestamps, values) datapoints = filter(lambda p: p[1] is not None, datapoints) print(' migrating %d datapoints from archive %d' % (len(datapoints), archiveNumber)) archiveNumber -= 1 whisper.update_many(path, datapoints) whisper-1.0.2/bin/whisper-fetch.py0000755000000000000000000000460013131244455017030 0ustar rootroot00000000000000#!/usr/bin/env python import sys import time import signal import optparse try: import whisper except ImportError: raise SystemExit('[ERROR] Please make sure whisper is installed properly') _DROP_FUNCTIONS = { 'zeroes': lambda x: x != 0, 'nulls': lambda x: x is not None, 'empty': lambda x: x != 0 and x is not None } # Ignore SIGPIPE signal.signal(signal.SIGPIPE, signal.SIG_DFL) now = int( time.time() ) yesterday = now - (60 * 60 * 24) option_parser = optparse.OptionParser(usage='''%prog [options] path''') option_parser.add_option('--from', default=yesterday, type='int', dest='_from', help=("Unix epoch time of the beginning of " "your requested interval (default: 24 hours ago)")) option_parser.add_option('--until', default=now, type='int', help="Unix epoch time of the end of your requested interval (default: now)") option_parser.add_option('--json', default=False, action='store_true', help="Output results in JSON form") option_parser.add_option('--pretty', default=False, action='store_true', help="Show human-readable timestamps instead of unix times") option_parser.add_option('--drop', choices=list(_DROP_FUNCTIONS.keys()), action='store', help="Specify 'nulls' to drop all null values. \ Specify 'zeroes' to drop all zero values. \ Specify 'empty' to drop both null and zero values") (options, args) = option_parser.parse_args() if len(args) != 1: option_parser.print_help() sys.exit(1) path = args[0] from_time = int( options._from ) until_time = int( options.until ) try: data = whisper.fetch(path, from_time, until_time) if not data: raise SystemExit('No data in selected timerange') (timeInfo, values) = data except (whisper.WhisperException, IOError) as exc: raise SystemExit('[ERROR] %s' % str(exc)) if options.drop: fcn = _DROP_FUNCTIONS.get(options.drop) values = [ x for x in values if fcn(x) ] (start,end,step) = timeInfo if options.json: values_json = str(values).replace('None','null') print('''{ "start" : %d, "end" : %d, "step" : %d, "values" : %s }''' % (start,end,step,values_json)) sys.exit(0) t = start for value in values: if options.pretty: timestr = time.ctime(t) else: timestr = str(t) if value is None: valuestr = "None" else: valuestr = "%f" % value print("%s\t%s" % (timestr,valuestr)) t += step whisper-1.0.2/PKG-INFO0000644000000000000000000000152013131245044014221 0ustar rootroot00000000000000Metadata-Version: 1.1 Name: whisper Version: 1.0.2 Summary: Fixed size round-robin style database Home-page: http://graphiteapp.org/ Author: Chris Davis Author-email: chrismd@gmail.com License: Apache Software License 2.0 Description: UNKNOWN Platform: UNKNOWN Classifier: Programming Language :: Python :: 2 Classifier: Programming Language :: Python :: 2.6 Classifier: Programming Language :: Python :: 2.7 Classifier: Programming Language :: Python :: 3 Classifier: Programming Language :: Python :: 3.2 Classifier: Programming Language :: Python :: 3.3 Classifier: Programming Language :: Python :: 3.4 Classifier: Programming Language :: Python :: 3.5 Classifier: Programming Language :: Python :: 3.6 Classifier: Programming Language :: Python :: Implementation :: CPython Classifier: Programming Language :: Python :: Implementation :: PyPy whisper-1.0.2/setup.py0000644000000000000000000000166513131244455014655 0ustar rootroot00000000000000#!/usr/bin/env python import os from glob import glob from distutils.core import setup setup( name='whisper', version='1.0.2', url='http://graphiteapp.org/', author='Chris Davis', author_email='chrismd@gmail.com', license='Apache Software License 2.0', description='Fixed size round-robin style database', py_modules=['whisper'], scripts=glob('bin/*') + glob('contrib/*'), classifiers=[ 'Programming Language :: Python :: 2', 'Programming Language :: Python :: 2.6', 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3', 'Programming Language :: Python :: 3.2', 'Programming Language :: Python :: 3.3', 'Programming Language :: Python :: 3.4', 'Programming Language :: Python :: 3.5', 'Programming Language :: Python :: 3.6', 'Programming Language :: Python :: Implementation :: CPython', 'Programming Language :: Python :: Implementation :: PyPy', ], ) whisper-1.0.2/whisper.py0000644000000000000000000010457113131244455015176 0ustar rootroot00000000000000# Copyright 2009-Present The Graphite Development Team # Copyright 2008 Orbitz WorldWide # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # # This module is an implementation of the Whisper database API # Here is the basic layout of a whisper data file # # File = Header,Data # Header = Metadata,ArchiveInfo+ # Metadata = aggregationType,maxRetention,xFilesFactor,archiveCount # ArchiveInfo = Offset,SecondsPerPoint,Points # Data = Archive+ # Archive = Point+ # Point = timestamp,value import itertools import operator import os import re import struct import sys import time izip = getattr(itertools, 'izip', zip) ifilter = getattr(itertools, 'ifilter', filter) if sys.version_info >= (3, 0): xrange = range try: import fcntl CAN_LOCK = True except ImportError: CAN_LOCK = False try: import ctypes import ctypes.util CAN_FALLOCATE = True except ImportError: CAN_FALLOCATE = False try: from fadvise import posix_fadvise, POSIX_FADV_RANDOM CAN_FADVISE = True except ImportError: CAN_FADVISE = False fallocate = None if CAN_FALLOCATE: libc_name = ctypes.util.find_library('c') libc = ctypes.CDLL(libc_name) c_off64_t = ctypes.c_int64 c_off_t = ctypes.c_int if os.uname()[0] == 'FreeBSD': # offset type is 64-bit on FreeBSD 32-bit & 64-bit platforms to address files more than 2GB c_off_t = ctypes.c_int64 try: _fallocate = libc.posix_fallocate64 _fallocate.restype = ctypes.c_int _fallocate.argtypes = [ctypes.c_int, c_off64_t, c_off64_t] except AttributeError: try: _fallocate = libc.posix_fallocate _fallocate.restype = ctypes.c_int _fallocate.argtypes = [ctypes.c_int, c_off_t, c_off_t] except AttributeError: CAN_FALLOCATE = False if CAN_FALLOCATE: def _py_fallocate(fd, offset, len_): res = _fallocate(fd.fileno(), offset, len_) if res != 0: raise IOError(res, 'fallocate') fallocate = _py_fallocate del libc del libc_name LOCK = False CACHE_HEADERS = False AUTOFLUSH = False FADVISE_RANDOM = False # Buffering setting applied to all operations that do *not* require # a full scan of the file in order to minimize cache thrashing. BUFFERING = 0 __headerCache = {} longFormat = "!L" longSize = struct.calcsize(longFormat) floatFormat = "!f" floatSize = struct.calcsize(floatFormat) valueFormat = "!d" valueSize = struct.calcsize(valueFormat) pointFormat = "!Ld" pointSize = struct.calcsize(pointFormat) metadataFormat = "!2LfL" metadataSize = struct.calcsize(metadataFormat) archiveInfoFormat = "!3L" archiveInfoSize = struct.calcsize(archiveInfoFormat) aggregationTypeToMethod = dict({ 1: 'average', 2: 'sum', 3: 'last', 4: 'max', 5: 'min', 6: 'avg_zero' }) aggregationMethodToType = dict([[v, k] for k, v in aggregationTypeToMethod.items()]) aggregationMethods = aggregationTypeToMethod.values() debug = startBlock = endBlock = lambda *a, **k: None UnitMultipliers = { 'seconds': 1, 'minutes': 60, 'hours': 3600, 'days': 86400, 'weeks': 86400 * 7, 'years': 86400 * 365 } def getUnitString(s): for value in ('seconds', 'minutes', 'hours', 'days', 'weeks', 'years'): if value.startswith(s): return value raise ValueError("Invalid unit '%s'" % s) def parseRetentionDef(retentionDef): try: (precision, points) = retentionDef.strip().split(':', 1) except ValueError: raise ValueError("Invalid retention definition '%s'" % retentionDef) if precision.isdigit(): precision = int(precision) * UnitMultipliers[getUnitString('s')] else: precision_re = re.compile(r'^(\d+)([a-z]+)$') match = precision_re.match(precision) if match: precision = int(match.group(1)) * UnitMultipliers[getUnitString(match.group(2))] else: raise ValueError("Invalid precision specification '%s'" % precision) if points.isdigit(): points = int(points) else: points_re = re.compile(r'^(\d+)([a-z]+)$') match = points_re.match(points) if match: points = int(match.group(1)) * UnitMultipliers[getUnitString(match.group(2))] // precision else: raise ValueError("Invalid retention specification '%s'" % points) return (precision, points) class WhisperException(Exception): """Base class for whisper exceptions.""" class InvalidConfiguration(WhisperException): """Invalid configuration.""" class InvalidAggregationMethod(WhisperException): """Invalid aggregation method.""" class InvalidTimeInterval(WhisperException): """Invalid time interval.""" class InvalidXFilesFactor(WhisperException): """Invalid xFilesFactor.""" class TimestampNotCovered(WhisperException): """Timestamp not covered by any archives in this database.""" class CorruptWhisperFile(WhisperException): def __init__(self, error, path): Exception.__init__(self, error) self.error = error self.path = path def __repr__(self): return "" % (self.path, self.error) def __str__(self): return "%s (%s)" % (self.error, self.path) def enableDebug(): global open, debug, startBlock, endBlock class open(file): def __init__(self, *args, **kwargs): file.__init__(self, *args, **kwargs) self.writeCount = 0 self.readCount = 0 def write(self, data): self.writeCount += 1 debug('WRITE %d bytes #%d' % (len(data), self.writeCount)) return file.write(self, data) def read(self, bytes): self.readCount += 1 debug('READ %d bytes #%d' % (bytes, self.readCount)) return file.read(self, bytes) def debug(message): print('DEBUG :: %s' % message) __timingBlocks = {} def startBlock(name): __timingBlocks[name] = time.time() def endBlock(name): debug("%s took %.5f seconds" % (name, time.time() - __timingBlocks.pop(name))) def __readHeader(fh): if CACHE_HEADERS: info = __headerCache.get(fh.name) if info: return info originalOffset = fh.tell() fh.seek(0) packedMetadata = fh.read(metadataSize) try: (aggregationType, maxRetention, xff, archiveCount) = struct.unpack(metadataFormat, packedMetadata) except: raise CorruptWhisperFile("Unable to read header", fh.name) try: agm = aggregationTypeToMethod[aggregationType] except: raise CorruptWhisperFile("Unable to read header", fh.name) if not 0 <= xff <= 1: raise CorruptWhisperFile("Unable to read header", fh.name) archives = [] for i in xrange(archiveCount): packedArchiveInfo = fh.read(archiveInfoSize) try: (offset, secondsPerPoint, points) = struct.unpack(archiveInfoFormat, packedArchiveInfo) except (struct.error, ValueError, TypeError): raise CorruptWhisperFile("Unable to read archive%d metadata" % i, fh.name) archiveInfo = { 'offset': offset, 'secondsPerPoint': secondsPerPoint, 'points': points, 'retention': secondsPerPoint * points, 'size': points * pointSize, } archives.append(archiveInfo) fh.seek(originalOffset) info = { 'aggregationMethod': aggregationTypeToMethod.get(aggregationType, 'average'), 'maxRetention': maxRetention, 'xFilesFactor': xff, 'archives': archives, } if CACHE_HEADERS: __headerCache[fh.name] = info return info def setXFilesFactor(path, xFilesFactor): """Sets the xFilesFactor for file in path path is a string pointing to a whisper file xFilesFactor is a float between 0 and 1 returns the old xFilesFactor """ (_, old_xff) = __setAggregation(path, xFilesFactor=xFilesFactor) return old_xff def setAggregationMethod(path, aggregationMethod, xFilesFactor=None): """Sets the aggregationMethod for file in path path is a string pointing to the whisper file aggregationMethod specifies the method to use when propagating data (see ``whisper.aggregationMethods``) xFilesFactor specifies the fraction of data points in a propagation interval that must have known values for a propagation to occur. If None, the existing xFilesFactor in path will not be changed returns the old aggregationMethod """ (old_agm, _) = __setAggregation(path, aggregationMethod, xFilesFactor) return old_agm def __setAggregation(path, aggregationMethod=None, xFilesFactor=None): """ Set aggregationMethod and or xFilesFactor for file in path""" with open(path, 'r+b', BUFFERING) as fh: if LOCK: fcntl.flock(fh.fileno(), fcntl.LOCK_EX) info = __readHeader(fh) if xFilesFactor is None: xFilesFactor = info['xFilesFactor'] if aggregationMethod is None: aggregationMethod = info['aggregationMethod'] __writeHeaderMetadata(fh, aggregationMethod, info['maxRetention'], xFilesFactor, len(info['archives'])) if AUTOFLUSH: fh.flush() os.fsync(fh.fileno()) if CACHE_HEADERS and fh.name in __headerCache: del __headerCache[fh.name] return (info['aggregationMethod'], info['xFilesFactor']) def __writeHeaderMetadata(fh, aggregationMethod, maxRetention, xFilesFactor, archiveCount): """ Writes header metadata to fh """ try: aggregationType = aggregationMethodToType[aggregationMethod] except KeyError: raise InvalidAggregationMethod("Unrecognized aggregation method: %s" % aggregationMethod) try: xFilesFactor = float(xFilesFactor) except: raise InvalidXFilesFactor("Invalid xFilesFactor %s, not a float" % xFilesFactor) if xFilesFactor < 0 or xFilesFactor > 1: raise InvalidXFilesFactor("Invalid xFilesFactor %s, not between 0 and 1" % xFilesFactor) aggregationType = struct.pack(longFormat, aggregationType) maxRetention = struct.pack(longFormat, maxRetention) xFilesFactor = struct.pack(floatFormat, xFilesFactor) archiveCount = struct.pack(longFormat, archiveCount) packedMetadata = aggregationType + maxRetention + xFilesFactor + archiveCount fh.seek(0) fh.write(packedMetadata) def validateArchiveList(archiveList): """ Validates an archiveList. An ArchiveList must: 1. Have at least one archive config. Example: (60, 86400) 2. No archive may be a duplicate of another. 3. Higher precision archives' precision must evenly divide all lower precision archives' precision. 4. Lower precision archives must cover larger time intervals than higher precision archives. 5. Each archive must have at least enough points to consolidate to the next archive Returns True or False """ if not archiveList: raise InvalidConfiguration("You must specify at least one archive configuration!") archiveList.sort(key=lambda a: a[0]) # Sort by precision (secondsPerPoint) for i, archive in enumerate(archiveList): if i == len(archiveList) - 1: break nextArchive = archiveList[i+1] if not archive[0] < nextArchive[0]: raise InvalidConfiguration("A Whisper database may not be configured having " "two archives with the same precision (archive%d: %s, archive%d: %s)" % (i, archive, i + 1, nextArchive)) if nextArchive[0] % archive[0] != 0: raise InvalidConfiguration("Higher precision archives' precision " "must evenly divide all lower precision archives' precision " "(archive%d: %s, archive%d: %s)" % (i, archive[0], i + 1, nextArchive[0])) retention = archive[0] * archive[1] nextRetention = nextArchive[0] * nextArchive[1] if not nextRetention > retention: raise InvalidConfiguration("Lower precision archives must cover " "larger time intervals than higher precision archives " "(archive%d: %s seconds, archive%d: %s seconds)" % (i, retention, i + 1, nextRetention)) archivePoints = archive[1] pointsPerConsolidation = nextArchive[0] // archive[0] if not archivePoints >= pointsPerConsolidation: raise InvalidConfiguration("Each archive must have at least enough points " "to consolidate to the next archive (archive%d consolidates %d of " "archive%d's points but it has only %d total points)" % (i + 1, pointsPerConsolidation, i, archivePoints)) def create(path, archiveList, xFilesFactor=None, aggregationMethod=None, sparse=False, useFallocate=False): """create(path,archiveList,xFilesFactor=0.5,aggregationMethod='average') path is a string archiveList is a list of archives, each of which is of the form (secondsPerPoint,numberOfPoints) xFilesFactor specifies the fraction of data points in a propagation interval that must have known values for a propagation to occur aggregationMethod specifies the function to use when propagating data (see ``whisper.aggregationMethods``) """ # Set default params if xFilesFactor is None: xFilesFactor = 0.5 if aggregationMethod is None: aggregationMethod = 'average' # Validate archive configurations... validateArchiveList(archiveList) # Looks good, now we create the file and write the header if os.path.exists(path): raise InvalidConfiguration("File %s already exists!" % path) with open(path, 'wb', BUFFERING) as fh: try: if LOCK: fcntl.flock(fh.fileno(), fcntl.LOCK_EX) if CAN_FADVISE and FADVISE_RANDOM: posix_fadvise(fh.fileno(), 0, 0, POSIX_FADV_RANDOM) oldest = max([secondsPerPoint * points for secondsPerPoint, points in archiveList]) __writeHeaderMetadata(fh, aggregationMethod, oldest, xFilesFactor, len(archiveList)) headerSize = metadataSize + (archiveInfoSize * len(archiveList)) archiveOffsetPointer = headerSize for secondsPerPoint, points in archiveList: archiveInfo = struct.pack(archiveInfoFormat, archiveOffsetPointer, secondsPerPoint, points) fh.write(archiveInfo) archiveOffsetPointer += (points * pointSize) # If configured to use fallocate and capable of fallocate use that, else # attempt sparse if configure or zero pre-allocate if sparse isn't configured. if CAN_FALLOCATE and useFallocate: remaining = archiveOffsetPointer - headerSize fallocate(fh, headerSize, remaining) elif sparse: fh.seek(archiveOffsetPointer - 1) fh.write(b'\x00') else: remaining = archiveOffsetPointer - headerSize chunksize = 16384 zeroes = b'\x00' * chunksize while remaining > chunksize: fh.write(zeroes) remaining -= chunksize fh.write(zeroes[:remaining]) if AUTOFLUSH: fh.flush() os.fsync(fh.fileno()) # Explicitly close the file to catch IOError on close() fh.close() except IOError: # if we got an IOError above, the file is either empty or half created. # Better off deleting it to avoid surprises later os.unlink(fh.name) raise def aggregate(aggregationMethod, knownValues, neighborValues=None): if aggregationMethod == 'average': return float(sum(knownValues)) / float(len(knownValues)) elif aggregationMethod == 'sum': return float(sum(knownValues)) elif aggregationMethod == 'last': return knownValues[-1] elif aggregationMethod == 'max': return max(knownValues) elif aggregationMethod == 'min': return min(knownValues) elif aggregationMethod == 'avg_zero': if not neighborValues: raise InvalidAggregationMethod("Using avg_zero without neighborValues") values = [x or 0 for x in neighborValues] return float(sum(values)) / float(len(values)) else: raise InvalidAggregationMethod("Unrecognized aggregation method %s" % aggregationMethod) def __propagate(fh, header, timestamp, higher, lower): aggregationMethod = header['aggregationMethod'] xff = header['xFilesFactor'] lowerIntervalStart = timestamp - (timestamp % lower['secondsPerPoint']) lowerIntervalEnd = lowerIntervalStart + lower['secondsPerPoint'] fh.seek(higher['offset']) packedPoint = fh.read(pointSize) (higherBaseInterval, higherBaseValue) = struct.unpack(pointFormat, packedPoint) if higherBaseInterval == 0: higherFirstOffset = higher['offset'] else: timeDistance = lowerIntervalStart - higherBaseInterval pointDistance = timeDistance // higher['secondsPerPoint'] byteDistance = pointDistance * pointSize higherFirstOffset = higher['offset'] + (byteDistance % higher['size']) higherPoints = lower['secondsPerPoint'] // higher['secondsPerPoint'] higherSize = higherPoints * pointSize relativeFirstOffset = higherFirstOffset - higher['offset'] relativeLastOffset = (relativeFirstOffset + higherSize) % higher['size'] higherLastOffset = relativeLastOffset + higher['offset'] fh.seek(higherFirstOffset) if higherFirstOffset < higherLastOffset: # We don't wrap the archive seriesString = fh.read(higherLastOffset - higherFirstOffset) else: # We do wrap the archive higherEnd = higher['offset'] + higher['size'] seriesString = fh.read(higherEnd - higherFirstOffset) fh.seek(higher['offset']) seriesString += fh.read(higherLastOffset - higher['offset']) # Now we unpack the series data we just read byteOrder, pointTypes = pointFormat[0], pointFormat[1:] points = len(seriesString) // pointSize seriesFormat = byteOrder + (pointTypes * points) unpackedSeries = struct.unpack(seriesFormat, seriesString) # And finally we construct a list of values neighborValues = [None] * points currentInterval = lowerIntervalStart step = higher['secondsPerPoint'] for i in xrange(0, len(unpackedSeries), 2): pointTime = unpackedSeries[i] if pointTime == currentInterval: neighborValues[i//2] = unpackedSeries[i+1] currentInterval += step # Propagate aggregateValue to propagate from neighborValues if we have enough known points knownValues = [v for v in neighborValues if v is not None] if not knownValues: return False knownPercent = float(len(knownValues)) / float(len(neighborValues)) if knownPercent >= xff: # We have enough data to propagate a value! aggregateValue = aggregate(aggregationMethod, knownValues, neighborValues) myPackedPoint = struct.pack(pointFormat, lowerIntervalStart, aggregateValue) fh.seek(lower['offset']) packedPoint = fh.read(pointSize) (lowerBaseInterval, lowerBaseValue) = struct.unpack(pointFormat, packedPoint) if lowerBaseInterval == 0: # First propagated update to this lower archive fh.seek(lower['offset']) fh.write(myPackedPoint) else: # Not our first propagated update to this lower archive timeDistance = lowerIntervalStart - lowerBaseInterval pointDistance = timeDistance // lower['secondsPerPoint'] byteDistance = pointDistance * pointSize lowerOffset = lower['offset'] + (byteDistance % lower['size']) fh.seek(lowerOffset) fh.write(myPackedPoint) return True else: return False def update(path, value, timestamp=None): """ update(path, value, timestamp=None) path is a string value is a float timestamp is either an int or float """ value = float(value) with open(path, 'r+b', BUFFERING) as fh: if CAN_FADVISE and FADVISE_RANDOM: posix_fadvise(fh.fileno(), 0, 0, POSIX_FADV_RANDOM) return file_update(fh, value, timestamp) def file_update(fh, value, timestamp): if LOCK: fcntl.flock(fh.fileno(), fcntl.LOCK_EX) header = __readHeader(fh) now = int(time.time()) if timestamp is None: timestamp = now timestamp = int(timestamp) diff = now - timestamp if not ((diff < header['maxRetention']) and diff >= 0): raise TimestampNotCovered("Timestamp not covered by any archives in " "this database.") for i, archive in enumerate(header['archives']): # Find the highest-precision archive that covers timestamp if archive['retention'] < diff: continue lowerArchives = header['archives'][i+1:] # We'll pass on the update to these lower precision archives later break # First we update the highest-precision archive myInterval = timestamp - (timestamp % archive['secondsPerPoint']) myPackedPoint = struct.pack(pointFormat, myInterval, value) fh.seek(archive['offset']) packedPoint = fh.read(pointSize) (baseInterval, baseValue) = struct.unpack(pointFormat, packedPoint) if baseInterval == 0: # This file's first update fh.seek(archive['offset']) fh.write(myPackedPoint) baseInterval, baseValue = myInterval, value else: # Not our first update timeDistance = myInterval - baseInterval pointDistance = timeDistance // archive['secondsPerPoint'] byteDistance = pointDistance * pointSize myOffset = archive['offset'] + (byteDistance % archive['size']) fh.seek(myOffset) fh.write(myPackedPoint) # Now we propagate the update to lower-precision archives higher = archive for lower in lowerArchives: if not __propagate(fh, header, myInterval, higher, lower): break higher = lower if AUTOFLUSH: fh.flush() os.fsync(fh.fileno()) def update_many(path, points): """update_many(path,points) path is a string points is a list of (timestamp,value) points """ if not points: return points = [(int(t), float(v)) for (t, v) in points] points.sort(key=lambda p: p[0], reverse=True) # Order points by timestamp, newest first with open(path, 'r+b', BUFFERING) as fh: if CAN_FADVISE and FADVISE_RANDOM: posix_fadvise(fh.fileno(), 0, 0, POSIX_FADV_RANDOM) return file_update_many(fh, points) def file_update_many(fh, points): if LOCK: fcntl.flock(fh.fileno(), fcntl.LOCK_EX) header = __readHeader(fh) now = int(time.time()) archives = iter(header['archives']) currentArchive = next(archives) currentPoints = [] for point in points: age = now - point[0] while currentArchive['retention'] < age: # We can't fit any more points in this archive if currentPoints: # Commit all the points we've found that it can fit currentPoints.reverse() # Put points in chronological order __archive_update_many(fh, header, currentArchive, currentPoints) currentPoints = [] try: currentArchive = next(archives) except StopIteration: currentArchive = None break if not currentArchive: break # Drop remaining points that don't fit in the database currentPoints.append(point) if currentArchive and currentPoints: # Don't forget to commit after we've checked all the archives currentPoints.reverse() __archive_update_many(fh, header, currentArchive, currentPoints) if AUTOFLUSH: fh.flush() os.fsync(fh.fileno()) def __archive_update_many(fh, header, archive, points): step = archive['secondsPerPoint'] alignedPoints = [(timestamp - (timestamp % step), value) for (timestamp, value) in points] # Create a packed string for each contiguous sequence of points packedStrings = [] previousInterval = None currentString = b"" lenAlignedPoints = len(alignedPoints) for i in xrange(0, lenAlignedPoints): # Take last point in run of points with duplicate intervals if i+1 < lenAlignedPoints and alignedPoints[i][0] == alignedPoints[i+1][0]: continue (interval, value) = alignedPoints[i] if (not previousInterval) or (interval == previousInterval + step): currentString += struct.pack(pointFormat, interval, value) previousInterval = interval else: numberOfPoints = len(currentString) // pointSize startInterval = previousInterval - (step * (numberOfPoints-1)) packedStrings.append((startInterval, currentString)) currentString = struct.pack(pointFormat, interval, value) previousInterval = interval if currentString: numberOfPoints = len(currentString) // pointSize startInterval = previousInterval - (step * (numberOfPoints-1)) packedStrings.append((startInterval, currentString)) # Read base point and determine where our writes will start fh.seek(archive['offset']) packedBasePoint = fh.read(pointSize) (baseInterval, baseValue) = struct.unpack(pointFormat, packedBasePoint) if baseInterval == 0: # This file's first update baseInterval = packedStrings[0][0] # Use our first string as the base, so we start at the start # Write all of our packed strings in locations determined by the baseInterval for (interval, packedString) in packedStrings: timeDistance = interval - baseInterval pointDistance = timeDistance // step byteDistance = pointDistance * pointSize myOffset = archive['offset'] + (byteDistance % archive['size']) fh.seek(myOffset) archiveEnd = archive['offset'] + archive['size'] bytesBeyond = (myOffset + len(packedString)) - archiveEnd if bytesBeyond > 0: fh.write(packedString[:-bytesBeyond]) assert fh.tell() == archiveEnd, "archiveEnd=%d fh.tell=%d bytesBeyond=%d len(packedString)=%d" % (archiveEnd, fh.tell(), bytesBeyond, len(packedString)) fh.seek(archive['offset']) fh.write(packedString[-bytesBeyond:]) # Safe because it can't exceed the archive (retention checking logic above) else: fh.write(packedString) # Now we propagate the updates to lower-precision archives higher = archive lowerArchives = [arc for arc in header['archives'] if arc['secondsPerPoint'] > archive['secondsPerPoint']] for lower in lowerArchives: fit = lambda i: i - (i % lower['secondsPerPoint']) lowerIntervals = [fit(p[0]) for p in alignedPoints] uniqueLowerIntervals = set(lowerIntervals) propagateFurther = False for interval in uniqueLowerIntervals: if __propagate(fh, header, interval, higher, lower): propagateFurther = True if not propagateFurther: break higher = lower def info(path): """ info(path) path is a string """ try: with open(path, 'rb') as fh: return __readHeader(fh) except (IOError, OSError): pass return None def fetch(path, fromTime, untilTime=None, now=None, archiveToSelect=None): """fetch(path,fromTime,untilTime=None,archiveToSelect=None) path is a string fromTime is an epoch time untilTime is also an epoch time, but defaults to now. archiveToSelect is the requested granularity, but defaults to None. Returns a tuple of (timeInfo, valueList) where timeInfo is itself a tuple of (fromTime, untilTime, step) Returns None if no data can be returned """ with open(path, 'rb') as fh: return file_fetch(fh, fromTime, untilTime, now, archiveToSelect) def file_fetch(fh, fromTime, untilTime, now=None, archiveToSelect=None): header = __readHeader(fh) if now is None: now = int(time.time()) if untilTime is None: untilTime = now fromTime = int(fromTime) untilTime = int(untilTime) # Here we try and be flexible and return as much data as we can. # If the range of data is from too far in the past or fully in the future, we # return nothing if fromTime > untilTime: raise InvalidTimeInterval("Invalid time interval: from time '%s' is after until time '%s'" % (fromTime, untilTime)) oldestTime = now - header['maxRetention'] # Range is in the future if fromTime > now: return None # Range is beyond retention if untilTime < oldestTime: return None # Range requested is partially beyond retention, adjust if fromTime < oldestTime: fromTime = oldestTime # Range is partially in the future, adjust if untilTime > now: untilTime = now diff = now - fromTime #Parse granularity if requested if archiveToSelect: retentionStr = str(archiveToSelect)+":1" archiveToSelect = parseRetentionDef(retentionStr)[0] for archive in header['archives']: if archiveToSelect: if archive['secondsPerPoint'] == archiveToSelect: break archive = None else: if archive['retention'] >= diff: break if archiveToSelect and not archive: raise ValueError("Invalid granularity: %s" %(archiveToSelect)) return __archive_fetch(fh, archive, fromTime, untilTime) def __archive_fetch(fh, archive, fromTime, untilTime): """ Fetch data from a single archive. Note that checks for validity of the time period requested happen above this level so it's possible to wrap around the archive on a read and request data older than the archive's retention """ fromInterval = int(fromTime - (fromTime % archive['secondsPerPoint'])) + archive['secondsPerPoint'] untilInterval = int(untilTime - (untilTime % archive['secondsPerPoint'])) + archive['secondsPerPoint'] if fromInterval == untilInterval: # Zero-length time range: always include the next point untilInterval += archive['secondsPerPoint'] fh.seek(archive['offset']) packedPoint = fh.read(pointSize) (baseInterval, baseValue) = struct.unpack(pointFormat, packedPoint) if baseInterval == 0: step = archive['secondsPerPoint'] points = (untilInterval - fromInterval) // step timeInfo = (fromInterval, untilInterval, step) valueList = [None] * points return (timeInfo, valueList) # Determine fromOffset timeDistance = fromInterval - baseInterval pointDistance = timeDistance // archive['secondsPerPoint'] byteDistance = pointDistance * pointSize fromOffset = archive['offset'] + (byteDistance % archive['size']) # Determine untilOffset timeDistance = untilInterval - baseInterval pointDistance = timeDistance // archive['secondsPerPoint'] byteDistance = pointDistance * pointSize untilOffset = archive['offset'] + (byteDistance % archive['size']) # Read all the points in the interval fh.seek(fromOffset) if fromOffset < untilOffset: # If we don't wrap around the archive seriesString = fh.read(untilOffset - fromOffset) else: # We do wrap around the archive, so we need two reads archiveEnd = archive['offset'] + archive['size'] seriesString = fh.read(archiveEnd - fromOffset) fh.seek(archive['offset']) seriesString += fh.read(untilOffset - archive['offset']) # Now we unpack the series data we just read (anything faster than unpack?) byteOrder, pointTypes = pointFormat[0], pointFormat[1:] points = len(seriesString) // pointSize seriesFormat = byteOrder + (pointTypes * points) unpackedSeries = struct.unpack(seriesFormat, seriesString) # And finally we construct a list of values (optimize this!) valueList = [None] * points # Pre-allocate entire list for speed currentInterval = fromInterval step = archive['secondsPerPoint'] for i in xrange(0, len(unpackedSeries), 2): pointTime = unpackedSeries[i] if pointTime == currentInterval: pointValue = unpackedSeries[i+1] valueList[i//2] = pointValue # In-place reassignment is faster than append() currentInterval += step timeInfo = (fromInterval, untilInterval, step) return (timeInfo, valueList) def merge(path_from, path_to, time_from=None,time_to=None): """ Merges the data from one whisper file into another. Each file must have the same archive configuration. time_from and time_to can optionally be specified for the merge. """ # Python 2.7 will allow the following commented line # with open(path_from, 'rb') as fh_from, open(path_to, 'rb+') as fh_to: # But with Python 2.6 we need to use this (I prefer not to introduce # contextlib.nested just for this): with open(path_from, 'rb') as fh_from: with open(path_to, 'rb+') as fh_to: return file_merge(fh_from, fh_to, time_from, time_to) def file_merge(fh_from, fh_to, time_from=None, time_to=None): headerFrom = __readHeader(fh_from) headerTo = __readHeader(fh_to) if headerFrom['archives'] != headerTo['archives']: raise NotImplementedError("%s and %s archive configurations are unalike. " \ "Resize the input before merging" % (fh_from.name, fh_to.name)) now = int(time.time()) if (time_to is not None): untilTime = time_to else: untilTime = now if (time_from is not None): fromTime = time_from else: fromTime = 0 # Sanity check: do not mix the from/to values. if untilTime < fromTime: raise ValueError("time_to must be >= time_from") archives = headerFrom['archives'] archives.sort(key=operator.itemgetter('retention')) for archive in archives: archiveFrom = fromTime archiveTo = untilTime if archiveFrom < now - archive['retention']: archiveFrom = now - archive['retention'] # if untilTime is too old, skip this archive if archiveTo < now - archive['retention']: continue (timeInfo, values) = __archive_fetch(fh_from, archive, archiveFrom, archiveTo) (start, end, archive_step) = timeInfo pointsToWrite = list(ifilter( lambda points: points[1] is not None, izip(xrange(start, end, archive_step), values))) # skip if there are no points to write if len(pointsToWrite) == 0: continue __archive_update_many(fh_to, headerTo, archive, pointsToWrite) def diff(path_from, path_to, ignore_empty=False, until_time=None): """ Compare two whisper databases. Each file must have the same archive configuration """ with open(path_from, 'rb') as fh_from: with open(path_to, 'rb') as fh_to: return file_diff(fh_from, fh_to, ignore_empty, until_time) def file_diff(fh_from, fh_to, ignore_empty=False, until_time=None): headerFrom = __readHeader(fh_from) headerTo = __readHeader(fh_to) if headerFrom['archives'] != headerTo['archives']: # TODO: Add specific whisper-resize commands to right size things raise NotImplementedError("%s and %s archive configurations are unalike. " \ "Resize the input before diffing" % (fh_from.name, fh_to.name)) archives = headerFrom['archives'] archives.sort(key=operator.itemgetter('retention')) archive_diffs = [] now = int(time.time()) if until_time: untilTime = until_time else: untilTime = now for archive_number, archive in enumerate(archives): diffs = [] startTime = now - archive['retention'] (fromTimeInfo, fromValues) = __archive_fetch(fh_from, archive, startTime, untilTime) (toTimeInfo, toValues) = __archive_fetch(fh_to, archive, startTime, untilTime) (start, end, archive_step) = (min(fromTimeInfo[0], toTimeInfo[0]), max(fromTimeInfo[1], toTimeInfo[1]), min(fromTimeInfo[2], toTimeInfo[2])) points = map(lambda s: (s * archive_step + start, fromValues[s], toValues[s]), xrange(0, (end - start) // archive_step)) if ignore_empty: points = [p for p in points if p[1] is not None and p[2] is not None] else: points = [p for p in points if p[1] is not None or p[2] is not None] diffs = [p for p in points if p[1] != p[2]] archive_diffs.append((archive_number, diffs, points.__len__())) untilTime = min(startTime,untilTime) return archive_diffs whisper-1.0.2/contrib/0000755000000000000000000000000013131245044014566 5ustar rootroot00000000000000whisper-1.0.2/contrib/whisper-auto-resize.py0000755000000000000000000002066713131244455021111 0ustar rootroot00000000000000#!/usr/bin/env python import sys, os, fnmatch from subprocess import call from optparse import OptionParser from distutils.spawn import find_executable from os.path import basename # On Debian systems whisper-resize.py is available as whisper-resize whisperResizeExecutable = find_executable("whisper-resize.py") if whisperResizeExecutable is None: whisperResizeExecutable = find_executable("whisper-resize") if whisperResizeExecutable is None: # Probably will fail later, set it nevertheless whisperResizeExecutable = "whisper-resize.py" option_parser = OptionParser( usage='''%prog storagePath configPath storagePath the Path to the directory containing whisper files (CAN NOT BE A SUBDIR, use --subdir for that) configPath the path to your carbon config files ''', version="%prog 0.1") option_parser.add_option( '--doit', default=False, action='store_true', help="This is not a drill, lets do it") option_parser.add_option( '-q', '--quiet', default=False, action='store_true', help="Display extra debugging info") option_parser.add_option( '--subdir', default=None, type='string', help="only process a subdir of whisper files") option_parser.add_option( '--carbonlib', default=None, type='string', help="folder where the carbon lib files are if its not in your path already") option_parser.add_option( '--whisperlib', default=None, type='string', help="folder where the whisper lib files are if its not in your path already") option_parser.add_option( '--confirm', default=False, action='store_true', help="ask for comfirmation prior to resizing a whisper file") option_parser.add_option( '-x', '--extra_args', default='', type='string', help="pass any additional arguments to the %s script" % basename(whisperResizeExecutable)) (options, args) = option_parser.parse_args() if len(args) < 2: option_parser.print_help() sys.exit(1) storagePath = args[0] configPath = args[1] #check to see if we are processing a subfolder # we need to have a separate config option for this since # otherwise the metric test thinks the metric is at the root # of the storage path and can match schemas incorrectly if options.subdir is None: processPath = args[0] else: processPath = options.subdir # Injecting the Whisper Lib Path if needed if options.whisperlib is not None: sys.path.insert(0, options.whisperlib) try: import whisper except ImportError: raise SystemExit('[ERROR] Can\'t find the whisper module, try using --whisperlib to explicitly include the path') # Injecting the Carbon Lib Path if needed if options.carbonlib is not None: sys.path.insert(0, options.carbonlib) try: from carbon import conf from carbon.conf import settings except ImportError: raise SystemExit('[ERROR] Can\'t find the carbon module, try using --carbonlib to explicitly include the path') #carbon.conf not seeing the config files so give it a nudge settings.CONF_DIR = configPath settings.LOCAL_DATA_DIR = storagePath # import these once we have the settings figured out from carbon.storage import loadStorageSchemas, loadAggregationSchemas # Load the Defined Schemas from our config files schemas = loadStorageSchemas() agg_schemas = loadAggregationSchemas() # check to see if a metric needs to be resized based on the current config def processMetric(fullPath, schemas, agg_schemas): """ method to process a given metric, and resize it if necessary Parameters: fullPath - full path to the metric whisper file schemas - carbon storage schemas loaded from config agg_schemas - carbon storage aggregation schemas load from confg """ schema_config_args = '' schema_file_args = '' rebuild = False messages = '' # get archive info from whisper file info = whisper.info(fullPath) # get graphite metric name from fullPath metric = getMetricFromPath(fullPath) # loop the carbon-storage schemas for schema in schemas: if schema.matches(metric): # returns secondsPerPoint and points for this schema in tuple format archive_config = [archive.getTuple() for archive in schema.archives] break # loop through the carbon-aggregation schemas for agg_schema in agg_schemas: if agg_schema.matches(metric): xFilesFactor, aggregationMethod = agg_schema.archives break if xFilesFactor is None: xFilesFactor = 0.5 if aggregationMethod is None: aggregationMethod = 'average' # loop through the bucket tuples and convert to string format for resizing for retention in archive_config: current_schema = '%s:%s ' % (retention[0], retention[1]) schema_config_args += current_schema # loop through the current files bucket sizes and convert to string format to compare for resizing for fileRetention in info['archives']: current_schema = '%s:%s ' % (fileRetention['secondsPerPoint'], fileRetention['points']) schema_file_args += current_schema # check to see if the current and configured schemas are the same or rebuild if (schema_config_args != schema_file_args): rebuild = True messages += 'updating Retentions from: %s to: %s \n' % (schema_file_args, schema_config_args) # only care about the first two decimals in the comparison since there is floaty stuff going on. info_xFilesFactor = "{0:.2f}".format(info['xFilesFactor']) str_xFilesFactor = "{0:.2f}".format(xFilesFactor) # check to see if the current and configured xFilesFactor are the same if (str_xFilesFactor != info_xFilesFactor): rebuild = True messages += '%s xFilesFactor differs real: %s should be: %s \n' % (metric, info_xFilesFactor, str_xFilesFactor) # check to see if the current and configured aggregationMethods are the same if (aggregationMethod != info['aggregationMethod']): rebuild = True messages += '%s aggregation schema differs real: %s should be: %s \n' % (metric, info['aggregationMethod'], aggregationMethod) # if we need to rebuild, lets do it. if (rebuild == True): cmd = '%s "%s" %s --xFilesFactor=%s --aggregationMethod=%s %s' % (whisperResizeExecutable, fullPath, options.extra_args, xFilesFactor, aggregationMethod, schema_config_args) if (options.quiet != True or options.confirm == True): print(messages) print(cmd) if (options.confirm == True): options.doit = confirm("Would you like to run this command? [y/n]: ") if (options.doit == False): print("Skipping command \n") if (options.doit == True): exitcode = call(cmd, shell=True) # if the command failed lets bail so we can take a look before proceeding if (exitcode > 0): print('Error running: %s' % (cmd)) sys.exit(1) def getMetricFromPath(filePath): """ this method takes the full file path of a whisper file an converts it to a gaphite metric name Parameters: filePath - full file path to a whisper file Returns a string representing the metric name """ # sanitize directory since we may get a trailing slash or not, and if we don't it creates a leading . data_dir = os.path.normpath(settings.LOCAL_DATA_DIR) + os.sep # pull the data dir off and convert to the graphite metric name metric_name = filePath.replace(data_dir, '') metric_name = metric_name.replace('.wsp', '') metric_name = metric_name.replace('/', '.') return metric_name def confirm(question, error_response='Valid options : yes or no'): """ ask the user if they would like to perform the action Parameters: question - the question you would like to ask the user to confirm. error_response - the message to display if an invalid option is given. """ while True: answer = raw_input(question).lower() if answer in ('y', 'yes'): return True if answer in ('n', 'no' ): return False print(error_response) if os.path.isfile(processPath) and processPath.endswith('.wsp'): processMetric(processPath, schemas, agg_schemas) else: for root, _, files in os.walk(processPath): # we only want to deal with non-hidden whisper files for f in fnmatch.filter(files, '*.wsp'): fullpath = os.path.join(root, f) processMetric(fullpath, schemas, agg_schemas) whisper-1.0.2/contrib/whisper-auto-update.py0000755000000000000000000000374213131244455021065 0ustar rootroot00000000000000#!/usr/bin/env python import sys import time import signal import optparse try: import whisper except ImportError: raise SystemExit('[ERROR] Please make sure whisper is installed properly') # update this callback to do the logic you want. # a future version could use a config while in which this fn is defined. def update_value(timestamp, value): if value is None: return value return value*1024*1024*1024 # Ignore SIGPIPE signal.signal(signal.SIGPIPE, signal.SIG_DFL) now = int( time.time() ) yesterday = now - (60 * 60 * 24) option_parser = optparse.OptionParser(usage='''%prog [options] path''') option_parser.add_option('--from', default=yesterday, type='int', dest='_from', help=("Unix epoch time of the beginning of " "your requested interval (default: 24 hours ago)")) option_parser.add_option('--until', default=now, type='int', help="Unix epoch time of the end of your requested interval (default: now)") option_parser.add_option('--pretty', default=False, action='store_true', help="Show human-readable timestamps instead of unix times") (options, args) = option_parser.parse_args() if len(args) < 1: option_parser.print_usage() sys.exit(1) path = args[0] from_time = int( options._from ) until_time = int( options.until ) try: data = whisper.fetch(path, from_time, until_time) if not data: raise SystemExit('No data in selected timerange') (timeInfo, values_old) = data except whisper.WhisperException as exc: raise SystemExit('[ERROR] %s' % str(exc)) (start,end,step) = timeInfo t = start for value_old in values_old: value_str_old = str(value_old) value_new = update_value(t, value_old) value_str_new = str(value_new) if options.pretty: timestr = time.ctime(t) else: timestr = str(t) print("%s\t%s -> %s" % (timestr,value_str_old, value_str_new)) try: if value_new is not None: whisper.update(path, value_new, t) t += step except whisper.WhisperException as exc: raise SystemExit('[ERROR] %s' % str(exc)) whisper-1.0.2/contrib/update-storage-times.py0000755000000000000000000001465413131244455021225 0ustar rootroot00000000000000#!/usr/bin/env python # @package update_storage_schemas.py # Correct/Update storage schemas\n # @code # # Usage example for update_storage_schemas.py # sudo ./update_storage_schemas.py --path /opt/graphite/whisper --cfg /opt/graphite/conf/schemas # @endcode import sys import os import logging import subprocess import argparse import re import time from multiprocessing import Pool, cpu_count from configobj import ConfigObj # Assuming Python 2, we'll want scandir if possible, it's much faster try: from scandir import scandir except ImportError: from os import listdir as scandir RESIZE_BIN = "/opt/graphite/bin/whisper-resize.py" INFO_BIN = "/opt/graphite/bin/whisper-info.py" LOG = logging.getLogger() LOG.setLevel(logging.INFO) SCHEMA_LIST = {} # The very basic default retentions DEFAULT_SCHEMA = {'match': re.compile('.*'), 'retentions': '1m:7d'} DEBUG = False DRY_RUN = False BASE_COMMAND = [RESIZE_BIN] ROOT_PATH = "" def config_schemas(cfg): schema_conf = ConfigObj(cfg) for schema in schema_conf.items(): item = schema[1]['pattern'] if item == '.*': DEFAULT_SCHEMA['retentions'] = schema[1]['retentions'] else: if item[0] == '^': item = item[1:] SCHEMA_LIST[item] = {'retentions': schema[1]['retentions'], 'match': re.compile(item)} def _convert_seconds(time): seconds_dict = {'s': 1, 'm': 60, 'h': 3600, 'min': 60, 'd': 86400, 'w': 604800, 'y': 31536000} (points, time) = time.split(':') if str.isalpha(time[-1]): time = int(time[:-1]) * seconds_dict[time[-1]] return time def _compare_retention(retention, tmp_path): # Get the new retention as [(secondsPerPoint, numPoints), ...] new_retention = [_convert_seconds(item) for item in list(retention)] info_string = [INFO_BIN, tmp_path] cur_ret_list = subprocess.Popen(info_string, stdout=subprocess.PIPE) cur_ret_list = cur_ret_list.communicate()[0].split('\n') cur_retention = [int(line.split(':')[1]) for line in cur_ret_list if 'retention' in line] return cur_retention == new_retention def _find_metrics(path): for f in scandir(path): if f.is_dir(follow_symlinks=False): for sf in _find_metrics(f.path): yield sf else: if not f.is_file(follow_symlinks=False) or \ not f.name.endswith('.wsp'): continue yield f.path def fix_metric(metric): if not SCHEMA_LIST: LOG.error("Didn't initialize schemas!") return [] if DEBUG: LOG.info("Testing %s for modification" % metric) devnull = open(os.devnull, 'w') command_string = list(BASE_COMMAND) + [metric] retention = DEFAULT_SCHEMA['retentions'] matching = metric[len(ROOT_PATH):] for schema, info in SCHEMA_LIST.iteritems(): if info['match'].search(matching): retention = info['retentions'] break command_string.extend(list(retention)) if DEBUG: LOG.info("Created command: %s" % command_string) if _compare_retention(retention, metric): LOG.debug('%s has the same retention as before!' % metric) return [(False, metric)] if DRY_RUN: res = 0 else: LOG.debug('Retention will be %s' % retention) if DEBUG: res = subprocess.check_call(command_string) else: res = subprocess.check_call(command_string, stdout=devnull) devnull.close() # wait for a second, so we don't kill I/O on the host time.sleep(0.3) """ We have manual commands for every failed file from these errors, so we can just go through each of these errors after a completed run. There shouldn't be many """ if res != 0: LOG.error('Failed to update schemas for %s' % metric) LOG.error('Attempted retention: %s' % retention) LOG.error('Attempted command string: %s' % command_string) return [(False, metric)] else: return [(True, metric)] def search_and_fix(subdir): if not SCHEMA_LIST: LOG.error("Didn't initialize schemas!") return fpath = os.path.join(ROOT_PATH, subdir) pool = Pool(cpu_count()) LOG.info('Creating new storage schemas for metrics under %s ...' % fpath) results = pool.map(fix_metric, _find_metrics(fpath), 100) pool.close() pool.join() return results # Parse command line options sent to the script def cli_opts(): parser = argparse.ArgumentParser("Correct storage settings on multiple whisper files") parser.add_argument('--cfg', action='store', dest='cfg', help='The storage-schemas.conf file path', required=True) parser.add_argument('--path', action='store', dest='path', help='The root path to find metrics in', required=True) parser.add_argument('--debug', action='store_true', dest='debug', help='Display debug information', default=False) parser.add_argument('--dry-run', action='store_true', dest='dry_run', help="Don't actually do anything", default=False) parser.add_argument('--subdir', action='store', dest='subdir', help="If you only want to process a particular subdir", default='') parser.add_argument('--nobackup', action='store_true', dest='nobackup', help="Passed through to whisper-resize.py, don't create a backup", default=False) parser.add_argument('--aggregate', action='store_true', dest='aggregate', help="Passed through to whisper-resize.py, roll up values", default=False) return parser.parse_args() if __name__ == '__main__': i_args = cli_opts() if os.getenv('USER') != 'root': print("You must run this script as root!") sys.exit(1) if i_args.debug: LOG.setLevel(logging.DEBUG) soh = logging.StreamHandler(sys.stdout) LOG.addHandler(soh) ROOT_PATH = i_args.path DEBUG = i_args.debug DRY_RUN = i_args.dry_run if i_args.nobackup: BASE_COMMAND.append('--nobackup') if i_args.aggregate: BASE_COMMAND.append('--aggregate') config_schemas(i_args.cfg) search_and_fix(i_args.subdir)