whisper-1.1.4/0000755000000000000000000000000013343335427013143 5ustar rootroot00000000000000whisper-1.1.4/bin/0000755000000000000000000000000013343335427013713 5ustar rootroot00000000000000whisper-1.1.4/bin/whisper-set-aggregation-method.py0000755000000000000000000000203313343334675022307 0ustar rootroot00000000000000#!/usr/bin/env python import sys import signal import optparse try: import whisper except ImportError: raise SystemExit('[ERROR] Please make sure whisper is installed properly') # Ignore SIGPIPE try: signal.signal(signal.SIGPIPE, signal.SIG_DFL) except AttributeError: # windows? pass option_parser = optparse.OptionParser( usage='%%prog path <%s> [xFilesFactor]' % '|'.join(whisper.aggregationMethods)) (options, args) = option_parser.parse_args() if len(args) < 2: option_parser.print_help() sys.exit(1) path = args[0] aggregationMethod = args[1] xFilesFactor = None if len(args) == 3: xFilesFactor = args[2] try: oldAggregationMethod = whisper.setAggregationMethod(path, aggregationMethod, xFilesFactor) except IOError: sys.stderr.write("[ERROR] File '%s' does not exist!\n\n" % path) option_parser.print_help() sys.exit(1) except whisper.WhisperException as exc: raise SystemExit('[ERROR] %s' % str(exc)) print('Updated aggregation method: %s (%s -> %s)' % (path, oldAggregationMethod, aggregationMethod)) whisper-1.1.4/bin/whisper-update.py0000755000000000000000000000171613343334675017242 0ustar rootroot00000000000000#!/usr/bin/env python import sys import time import signal import optparse try: import whisper except ImportError: raise SystemExit('[ERROR] Please make sure whisper is installed properly') # Ignore SIGPIPE signal.signal(signal.SIGPIPE, signal.SIG_DFL) now = int(time.time()) option_parser = optparse.OptionParser( usage='''%prog [options] path timestamp:value [timestamp:value]*''') (options, args) = option_parser.parse_args() if len(args) < 2: option_parser.print_help() sys.exit(1) path = args[0] datapoint_strings = args[1:] datapoint_strings = [point.replace('N:', '%d:' % now) for point in datapoint_strings] datapoints = [tuple(point.split(':')) for point in datapoint_strings] try: if len(datapoints) == 1: timestamp, value = datapoints[0] whisper.update(path, value, timestamp) else: whisper.update_many(path, datapoints) except whisper.WhisperException as exc: raise SystemExit('[ERROR] %s' % str(exc)) whisper-1.1.4/bin/whisper-resize.py0000755000000000000000000001363513343334675017264 0ustar rootroot00000000000000#!/usr/bin/env python import os import sys import time import bisect import signal import optparse import traceback try: import whisper except ImportError: raise SystemExit('[ERROR] Please make sure whisper is installed properly') # Ignore SIGPIPE signal.signal(signal.SIGPIPE, signal.SIG_DFL) now = int(time.time()) option_parser = optparse.OptionParser( usage='''%prog path timePerPoint:timeToStore [timePerPoint:timeToStore]* timePerPoint and timeToStore specify lengths of time, for example: 60:1440 60 seconds per datapoint, 1440 datapoints = 1 day of retention 15m:8 15 minutes per datapoint, 8 datapoints = 2 hours of retention 1h:7d 1 hour per datapoint, 7 days of retention 12h:2y 12 hours per datapoint, 2 years of retention ''') option_parser.add_option( '--xFilesFactor', default=None, type='float', help="Change the xFilesFactor") option_parser.add_option( '--aggregationMethod', default=None, type='string', help="Change the aggregation function (%s)" % ', '.join(whisper.aggregationMethods)) option_parser.add_option( '--force', default=False, action='store_true', help="Perform a destructive change") option_parser.add_option( '--newfile', default=None, action='store', help="Create a new database file without removing the existing one") option_parser.add_option( '--nobackup', action='store_true', help='Delete the .bak file after successful execution') option_parser.add_option( '--aggregate', action='store_true', help='Try to aggregate the values to fit the new archive better.' ' Note that this will make things slower and use more memory.') (options, args) = option_parser.parse_args() if len(args) < 2: option_parser.print_help() sys.exit(1) path = args[0] if not os.path.exists(path): sys.stderr.write("[ERROR] File '%s' does not exist!\n\n" % path) option_parser.print_help() sys.exit(1) info = whisper.info(path) new_archives = [whisper.parseRetentionDef(retentionDef) for retentionDef in args[1:]] old_archives = info['archives'] # sort by precision, lowest to highest old_archives.sort(key=lambda a: a['secondsPerPoint'], reverse=True) if options.xFilesFactor is None: xff = info['xFilesFactor'] else: xff = options.xFilesFactor if options.aggregationMethod is None: aggregationMethod = info['aggregationMethod'] else: aggregationMethod = options.aggregationMethod print('Retrieving all data from the archives') for archive in old_archives: fromTime = now - archive['retention'] + archive['secondsPerPoint'] untilTime = now timeinfo, values = whisper.fetch(path, fromTime, untilTime) archive['data'] = (timeinfo, values) if options.newfile is None: tmpfile = path + '.tmp' if os.path.exists(tmpfile): print('Removing previous temporary database file: %s' % tmpfile) os.unlink(tmpfile) newfile = tmpfile else: newfile = options.newfile print('Creating new whisper database: %s' % newfile) whisper.create(newfile, new_archives, xFilesFactor=xff, aggregationMethod=aggregationMethod) size = os.stat(newfile).st_size print('Created: %s (%d bytes)' % (newfile, size)) if options.aggregate: # This is where data will be interpolated (best effort) print('Migrating data with aggregation...') all_datapoints = [] for archive in old_archives: # Loading all datapoints into memory for fast querying timeinfo, values = archive['data'] new_datapoints = zip(range(*timeinfo), values) if all_datapoints: last_timestamp = all_datapoints[-1][0] slice_end = 0 for i, (timestamp, value) in enumerate(new_datapoints): if timestamp > last_timestamp: slice_end = i break all_datapoints += new_datapoints[i:] else: all_datapoints += new_datapoints oldtimestamps = map(lambda p: p[0], all_datapoints) oldvalues = map(lambda p: p[1], all_datapoints) print("oldtimestamps: %s" % oldtimestamps) # Simply cleaning up some used memory del all_datapoints new_info = whisper.info(newfile) new_archives = new_info['archives'] for archive in new_archives: step = archive['secondsPerPoint'] fromTime = now - archive['retention'] + now % step untilTime = now + now % step + step print("(%s,%s,%s)" % (fromTime, untilTime, step)) timepoints_to_update = range(fromTime, untilTime, step) print("timepoints_to_update: %s" % timepoints_to_update) newdatapoints = [] for tinterval in zip(timepoints_to_update[:-1], timepoints_to_update[1:]): # TODO: Setting lo= parameter for 'lefti' based on righti from previous # iteration. Obviously, this can only be done if # timepoints_to_update is always updated. Is it? lefti = bisect.bisect_left(oldtimestamps, tinterval[0]) righti = bisect.bisect_left(oldtimestamps, tinterval[1], lo=lefti) newvalues = oldvalues[lefti:righti] if newvalues: non_none = filter(lambda x: x is not None, newvalues) if 1.0 * len(non_none) / len(newvalues) >= xff: newdatapoints.append([tinterval[0], whisper.aggregate(aggregationMethod, non_none, newvalues)]) whisper.update_many(newfile, newdatapoints) else: print('Migrating data without aggregation...') for archive in old_archives: timeinfo, values = archive['data'] datapoints = zip(range(*timeinfo), values) datapoints = filter(lambda p: p[1] is not None, datapoints) whisper.update_many(newfile, datapoints) if options.newfile is not None: sys.exit(0) backup = path + '.bak' print('Renaming old database to: %s' % backup) os.rename(path, backup) try: print('Renaming new database to: %s' % path) os.rename(tmpfile, path) except (OSError, FileNotFoundError, PermissionError): traceback.print_exc() print('\nOperation failed, restoring backup') os.rename(backup, path) sys.exit(1) if options.nobackup: print("Unlinking backup: %s" % backup) os.unlink(backup) whisper-1.1.4/bin/find-corrupt-whisper-files.py0000644000000000000000000000344613343334675021473 0ustar rootroot00000000000000#!/usr/bin/env python # encoding: utf-8 """Find and (optionally) delete corrupt Whisper data files""" from __future__ import absolute_import, print_function, unicode_literals import argparse import os import sys import whisper def walk_dir(base_dir, delete_corrupt=False, verbose=False): for dirpath, dirnames, filenames in os.walk(base_dir): if verbose: print("Scanning %s…" % dirpath) whisper_files = (os.path.join(dirpath, i) for i in filenames if i.endswith('.wsp')) for f in whisper_files: try: info = whisper.info(f) except whisper.CorruptWhisperFile: if delete_corrupt: print('Deleting corrupt Whisper file: %s' % f, file=sys.stderr) os.unlink(f) else: print('Corrupt Whisper file: %s' % f, file=sys.stderr) continue if verbose: print('%s: %d points' % (f, sum(i['points'] for i in info.get('archives', {})))) if __name__ == "__main__": parser = argparse.ArgumentParser(description=__doc__.strip()) parser.add_argument('--delete-corrupt', default=False, action='store_true', help='Delete reported files') parser.add_argument('--verbose', default=False, action='store_true', help='Display progress info') parser.add_argument('directories', type=str, nargs='+', metavar='WHISPER_DIR', help='Directory containing Whisper files') args = parser.parse_args() for d in args.directories: d = os.path.realpath(d) if not os.path.isdir(d): parser.error("%d is not a directory!") walk_dir(d, delete_corrupt=args.delete_corrupt, verbose=args.verbose) whisper-1.1.4/bin/whisper-merge.py0000755000000000000000000000171213343334675017053 0ustar rootroot00000000000000#!/usr/bin/env python import os import sys import signal import optparse try: import whisper except ImportError: raise SystemExit('[ERROR] Please make sure whisper is installed properly') # Ignore SIGPIPE signal.signal(signal.SIGPIPE, signal.SIG_DFL) option_parser = optparse.OptionParser( usage='''%prog [options] from_path to_path''') option_parser.add_option( '--from', default=None, type='int', dest='_from', help=("Begining of interval, unix timestamp (default: epoch)")) option_parser.add_option( '--until', default=None, type='int', help="End of interval, unix timestamp (default: now)") (options, args) = option_parser.parse_args() if len(args) < 2: option_parser.print_help() sys.exit(1) path_from = args[0] path_to = args[1] for filename in (path_from, path_to): if not os.path.exists(filename): raise SystemExit('[ERROR] File "%s" does not exist!' % filename) whisper.merge(path_from, path_to, options._from, options.until) whisper-1.1.4/bin/whisper-create.py0000755000000000000000000000652213343334675017223 0ustar rootroot00000000000000#!/usr/bin/env python import os import sys import signal import optparse import math try: import whisper except ImportError: raise SystemExit('[ERROR] Please make sure whisper is installed properly') def byte_format(num): for x in ['bytes', 'KB', 'MB']: if num < 1024.0: return "%.3f%s" % (num, x) num /= 1024.0 return "%.3f%s" % (num, 'GB') # Ignore SIGPIPE try: signal.signal(signal.SIGPIPE, signal.SIG_DFL) except AttributeError: # OS=windows pass option_parser = optparse.OptionParser( usage='''%prog path timePerPoint:timeToStore [timePerPoint:timeToStore]* %prog --estimate timePerPoint:timeToStore [timePerPoint:timeToStore]* timePerPoint and timeToStore specify lengths of time, for example: 60:1440 60 seconds per datapoint, 1440 datapoints = 1 day of retention 15m:8 15 minutes per datapoint, 8 datapoints = 2 hours of retention 1h:7d 1 hour per datapoint, 7 days of retention 12h:2y 12 hours per datapoint, 2 years of retention ''') option_parser.add_option('--xFilesFactor', default=0.5, type='float') option_parser.add_option('--aggregationMethod', default='average', type='string', help="Function to use when aggregating values (%s)" % ', '.join(whisper.aggregationMethods)) option_parser.add_option('--overwrite', default=False, action='store_true') option_parser.add_option('--estimate', default=False, action='store_true', help="Don't create a whisper file, estimate storage " "requirements based on archive definitions") option_parser.add_option('--sparse', default=False, action='store_true', help="Create new whisper as sparse file") option_parser.add_option('--fallocate', default=False, action='store_true', help="Create new whisper and use fallocate") (options, args) = option_parser.parse_args() if options.estimate: if len(args) == 0: option_parser.print_usage() sys.exit(1) if len(args) == 1 and args[0].find(",") > 0: args = args[0].split(",") archives = 0 total_points = 0 for (precision, points) in map(whisper.parseRetentionDef, args): print("Archive %s: %s points of %ss precision" % (archives, points, precision)) archives += 1 total_points += points size = 16 + (archives * 12) + (total_points * 12) disk_size = int(math.ceil(size / 4096.0) * 4096) print("\nEstimated Whisper DB Size: %s (%s bytes on disk with 4k blocks)\n" % (byte_format(size), disk_size)) for x in [1, 5, 10, 50, 100, 500]: print("Estimated storage requirement for %sk metrics: %s" % (x, byte_format(x * 1000 * disk_size))) sys.exit(0) if len(args) < 2: option_parser.print_help() sys.exit(1) path = args[0] archives = [whisper.parseRetentionDef(retentionDef) for retentionDef in args[1:]] if os.path.exists(path) and options.overwrite: print('Overwriting existing file: %s' % path) os.unlink(path) try: whisper.create(path, archives, xFilesFactor=options.xFilesFactor, aggregationMethod=options.aggregationMethod, sparse=options.sparse, useFallocate=options.fallocate) except whisper.WhisperException as exc: raise SystemExit('[ERROR] %s' % str(exc)) size = os.stat(path).st_size print('Created: %s (%d bytes)' % (path, size)) whisper-1.1.4/bin/whisper-info.py0000755000000000000000000000257013343334675016712 0ustar rootroot00000000000000#!/usr/bin/env python import os import sys import signal import optparse import json try: import whisper except ImportError: raise SystemExit('[ERROR] Please make sure whisper is installed properly') # Ignore SIGPIPE try: signal.signal(signal.SIGPIPE, signal.SIG_DFL) except AttributeError: # OS=windows pass option_parser = optparse.OptionParser(usage='''%prog [options] path [field]''') option_parser.add_option('--json', default=False, action='store_true', help="Output results in JSON form") (options, args) = option_parser.parse_args() if len(args) < 1: option_parser.print_help() sys.exit(1) path = args[0] if len(args) > 1: field = args[1] else: field = None try: info = whisper.info(path) except whisper.WhisperException as exc: raise SystemExit('[ERROR] %s' % str(exc)) info['fileSize'] = os.stat(path).st_size if field: if field not in info: print('Unknown field "%s". Valid fields are %s' % (field, ','.join(info))) sys.exit(1) print(info[field]) sys.exit(0) if options.json: print(json.dumps(info, indent=2, separators=(',', ': '))) else: archives = info.pop('archives') for key, value in info.items(): print('%s: %s' % (key, value)) print('') for i, archive in enumerate(archives): print('Archive %d' % i) for key, value in archive.items(): print('%s: %s' % (key, value)) print('') whisper-1.1.4/bin/whisper-diff.py0000755000000000000000000001003013343334675016655 0ustar rootroot00000000000000#!/usr/bin/python -tt import sys import optparse import json try: import whisper except ImportError: raise SystemExit('[ERROR] Please make sure whisper is installed properly') option_parser = optparse.OptionParser(usage='''%prog [options] path_a path_b''') option_parser.add_option('--summary', default=False, action='store_true', help="show summary of differences") option_parser.add_option('--ignore-empty', default=False, action='store_true', help="skip comparison if either value is undefined") option_parser.add_option('--columns', default=False, action='store_true', help="print output in simple columns") option_parser.add_option('--no-headers', default=False, action='store_true', help="do not print column headers") option_parser.add_option('--until', default=None, type='int', help="Unix epoch time of the end of your requested " "interval (default: None)") option_parser.add_option('--json', default=False, action='store_true', help="Output results in JSON form") (options, args) = option_parser.parse_args() if len(args) != 2: option_parser.print_help() sys.exit(1) (path_a, path_b) = args[0::1] if options.until: until_time = int(options.until) else: until_time = None def print_diffs(diffs, pretty=True, headers=True): if pretty: h = "%7s %11s %13s %13s\n" f = "%7s %11d %13s %13s\n" else: h = "%s %s %s %s\n" f = "%s %d %s %s\n" if headers: sys.stdout.write(h % ('archive', 'timestamp', 'value_a', 'value_b')) for archive, points, total in diffs: if pretty: sys.stdout.write('Archive %d (%d of %d datapoints differ)\n' % (archive, points.__len__(), total)) sys.stdout.write(h % ('', 'timestamp', 'value_a', 'value_b')) for p in points: if pretty: sys.stdout.write(f % ('', p[0], p[1], p[2])) else: sys.stdout.write(f % (archive, p[0], p[1], p[2])) def print_summary(diffs, pretty=True, headers=True): if pretty: f = "%7s %9s %9s\n" else: f = "%s %s %s\n" if headers: sys.stdout.write(f % ('archive', 'total', 'differing')) for archive, points, total in diffs: sys.stdout.write(f % (archive, total, points.__len__())) def print_summary_json(diffs, path_a, path_b): print(json.dumps({'path_a': path_a, 'path_b': path_b, 'archives': [{'archive': archive, 'total': total, 'points': points.__len__()} for archive, points, total in diffs]}, sort_keys=True, indent=2, separators=(',', ' : '))) def print_diffs_json(diffs, path_a, path_b): print(json.dumps({'path_a': path_a, 'path_b': path_b, 'archives': [{'archive': archive, 'total': total, 'points': points.__len__(), 'datapoint': [{ 'timestamp': p[0], 'value_a': p[1], 'value_b': p[2] } for p in points]} for archive, points, total in diffs]}, sort_keys=True, indent=2, separators=(',', ' : '))) def main(): archive_diffs = whisper.diff(path_a, path_b, ignore_empty=options.ignore_empty, until_time=until_time) if options.summary: if options.json: print_summary_json(archive_diffs, path_a, path_b) else: print_summary(archive_diffs, pretty=(not options.columns), headers=(not options.no_headers)) else: if options.json: print_diffs_json(archive_diffs, path_a, path_b) else: print_diffs(archive_diffs, pretty=(not options.columns), headers=(not options.no_headers)) if __name__ == "__main__": main() whisper-1.1.4/bin/whisper-set-xfilesfactor.py0000755000000000000000000000163613343334675021243 0ustar rootroot00000000000000#!/usr/bin/env python import sys import argparse import whisper def main(): """Set xFilesFactor for existing whisper file""" parser = argparse.ArgumentParser( description='Set xFilesFactor for existing whisper file') parser.add_argument('path', type=str, help='path to whisper file') parser.add_argument('xff', metavar='xFilesFactor', type=float, help='new xFilesFactor, a float between 0 and 1') args = parser.parse_args() try: old_xff = whisper.setXFilesFactor(args.path, args.xff) except IOError: sys.stderr.write("[ERROR] File '%s' does not exist!\n\n" % args.path) parser.print_help() sys.exit(1) except whisper.WhisperException as exc: raise SystemExit('[ERROR] %s' % str(exc)) print('Updated xFilesFactor: %s (%s -> %s)' % (args.path, old_xff, args.xff)) if __name__ == "__main__": main() whisper-1.1.4/bin/whisper-fill.py0000755000000000000000000001075413343334675016710 0ustar rootroot00000000000000#!/usr/bin/env python # whisper-fill: unlike whisper-merge, don't overwrite data that's # already present in the target file, but instead, only add the missing # data (e.g. where the gaps in the target file are). Because no values # are overwritten, no data or precision gets lost. Also, unlike # whisper-merge, try to take the highest-precision archive to provide # the data, instead of the one with the largest retention. # Using this script, reconciliation between two replica instances can be # performed by whisper-fill-ing the data of the other replica with the # data that exists locally, without introducing the quite remarkable # gaps that whisper-merge leaves behind (filling a higher precision # archive with data from a lower precision one) # Work performed by Fabian Groffen @grobian while working at Booking.com. # additional patches are from https://github.com/jssjr/carbonate/ import whisper try: from whisper import operator HAS_OPERATOR = True except ImportError: HAS_OPERATOR = False import time import sys import optparse if sys.version_info >= (3, 0): xrange = range else: from future_builtins import filter, zip def itemgetter(*items): if HAS_OPERATOR: return operator.itemgetter(*items) else: if len(items) == 1: item = items[0] def g(obj): return obj[item] else: def g(obj): return tuple(obj[item] for item in items) return g def fill(src, dst, tstart, tstop): # fetch range start-stop from src, taking values from the highest # precision archive, thus optionally requiring multiple fetch + merges srcHeader = whisper.info(src) srcArchives = srcHeader['archives'] srcArchives.sort(key=itemgetter('retention')) # find oldest point in time, stored by both files srcTime = int(time.time()) - srcHeader['maxRetention'] if tstart < srcTime and tstop < srcTime: return # we want to retain as much precision as we can, hence we do backwards # walk in time # skip forward at max 'step' points at a time for archive in srcArchives: # skip over archives that don't have any data points rtime = time.time() - archive['retention'] if tstop <= rtime: continue untilTime = tstop fromTime = rtime if rtime > tstart else tstart (timeInfo, values) = whisper.fetch(src, fromTime, untilTime) (start, end, archive_step) = timeInfo pointsToWrite = list(filter( lambda points: points[1] is not None, zip(xrange(start, end, archive_step), values))) # order points by timestamp, newest first pointsToWrite.sort(key=lambda p: p[0], reverse=True) whisper.update_many(dst, pointsToWrite) tstop = fromTime # can stop when there's nothing to fetch any more if tstart == tstop: return def fill_archives(src, dst, startFrom): header = whisper.info(dst) archives = header['archives'] archives = sorted(archives, key=lambda t: t['retention']) for archive in archives: fromTime = time.time() - archive['retention'] if fromTime >= startFrom: continue (timeInfo, values) = whisper.fetch(dst, fromTime, startFrom) (start, end, step) = timeInfo gapstart = None for v in values: if not v and not gapstart: gapstart = start elif v and gapstart: # ignore single units lost if (start - gapstart) > archive['secondsPerPoint']: fill(src, dst, gapstart - step, start) gapstart = None elif gapstart and start == end - step: fill(src, dst, gapstart - step, start) start += step startFrom = fromTime def main(): option_parser = optparse.OptionParser( usage='%prog [--lock] src dst', description='copies data from src in dst, if missing') option_parser.add_option( '--lock', help='Lock whisper files', default=False, action='store_true') (options, args) = option_parser.parse_args() if len(args) != 2: option_parser.print_help() sys.exit(1) if options.lock is True and whisper.CAN_LOCK: whisper.LOCK = True src = args[0] dst = args[1] startFrom = time.time() fill_archives(src, dst, startFrom) if __name__ == "__main__": main() whisper-1.1.4/bin/whisper-dump.py0000755000000000000000000000723713343334675016731 0ustar rootroot00000000000000#!/usr/bin/env python import os import mmap import time import struct import signal import sys import optparse try: import whisper except ImportError: raise SystemExit('[ERROR] Please make sure whisper is installed properly') if sys.version_info >= (3, 0): xrange = range # Ignore SIGPIPE signal.signal(signal.SIGPIPE, signal.SIG_DFL) option_parser = optparse.OptionParser(usage='''%prog path''') option_parser.add_option( '--pretty', default=False, action='store_true', help="Show human-readable timestamps instead of unix times") option_parser.add_option( '-t', '--time-format', action='store', type='string', dest='time_format', help='Time format to use with --pretty; see time.strftime()') (options, args) = option_parser.parse_args() if len(args) != 1: option_parser.error("require one input file name") else: path = args[0] def mmap_file(filename): fd = os.open(filename, os.O_RDONLY) map = mmap.mmap(fd, os.fstat(fd).st_size, prot=mmap.PROT_READ) os.close(fd) return map def read_header(map): try: (aggregationType, maxRetention, xFilesFactor, archiveCount) \ = struct.unpack(whisper.metadataFormat, map[:whisper.metadataSize]) except (struct.error, ValueError, TypeError): raise whisper.CorruptWhisperFile("Unable to unpack header") archives = [] archiveOffset = whisper.metadataSize for i in xrange(archiveCount): try: (offset, secondsPerPoint, points) = struct.unpack( whisper.archiveInfoFormat, map[archiveOffset:archiveOffset + whisper.archiveInfoSize] ) except (struct.error, ValueError, TypeError): raise whisper.CorruptWhisperFile("Unable to read archive %d metadata" % i) archiveInfo = { 'offset': offset, 'secondsPerPoint': secondsPerPoint, 'points': points, 'retention': secondsPerPoint * points, 'size': points * whisper.pointSize, } archives.append(archiveInfo) archiveOffset += whisper.archiveInfoSize header = { 'aggregationMethod': whisper.aggregationTypeToMethod.get(aggregationType, 'average'), 'maxRetention': maxRetention, 'xFilesFactor': xFilesFactor, 'archives': archives, } return header def dump_header(header): print('Meta data:') print(' aggregation method: %s' % header['aggregationMethod']) print(' max retention: %d' % header['maxRetention']) print(' xFilesFactor: %g' % header['xFilesFactor']) print("") dump_archive_headers(header['archives']) def dump_archive_headers(archives): for i, archive in enumerate(archives): print('Archive %d info:' % i) print(' offset: %d' % archive['offset']) print(' seconds per point: %d' % archive['secondsPerPoint']) print(' points: %d' % archive['points']) print(' retention: %d' % archive['retention']) print(' size: %d' % archive['size']) print("") def dump_archives(archives, options): for i, archive in enumerate(archives): print('Archive %d data:' % i) offset = archive['offset'] for point in xrange(archive['points']): (timestamp, value) = struct.unpack( whisper.pointFormat, map[offset:offset + whisper.pointSize] ) if options.pretty: if options.time_format: timestr = time.localtime(timestamp) timestr = time.strftime(options.time_format, timestr) else: timestr = time.ctime(timestamp) else: timestr = str(timestamp) print('%d: %s, %10.35g' % (point, timestr, value)) offset += whisper.pointSize print if not os.path.exists(path): raise SystemExit('[ERROR] File "%s" does not exist!' % path) map = mmap_file(path) header = read_header(map) dump_header(header) dump_archives(header['archives'], options) whisper-1.1.4/bin/rrd2whisper.py0000755000000000000000000001146113343334675016552 0ustar rootroot00000000000000#!/usr/bin/env python import errno import os import sys import time import signal import optparse try: import rrdtool except ImportError as exc: raise SystemExit('[ERROR] Missing dependency: %s' % str(exc)) try: import whisper except ImportError: raise SystemExit('[ERROR] Please make sure whisper is installed properly') # Ignore SIGPIPE signal.signal(signal.SIGPIPE, signal.SIG_DFL) aggregationMethods = list(whisper.aggregationMethods) # RRD doesn't have a 'sum' or 'total' type aggregationMethods.remove('sum') # RRD doesn't have a 'absmax' type aggregationMethods.remove('absmax') # RRD doesn't have a 'absmin' type aggregationMethods.remove('absmin') option_parser = optparse.OptionParser(usage='''%prog rrd_path''') option_parser.add_option( '--xFilesFactor', help="The xFilesFactor to use in the output file. " + "Defaults to the input RRD's xFilesFactor", default=None, type='float') option_parser.add_option( '--aggregationMethod', help="The consolidation function to fetch from on input and " + "aggregationMethod to set on output. One of: %s" % ', '.join(aggregationMethods), default='average', type='string') option_parser.add_option( '--destinationPath', help="Path to place created whisper file. Defaults to the " + "RRD file's source path.", default=None, type='string') (options, args) = option_parser.parse_args() if len(args) < 1: option_parser.print_help() sys.exit(1) rrd_path = args[0] try: rrd_info = rrdtool.info(rrd_path) except rrdtool.error as exc: raise SystemExit('[ERROR] %s' % str(exc)) seconds_per_pdp = rrd_info['step'] # Reconcile old vs new python-rrdtool APIs (yuck) # leave consistent 'rras' and 'datasources' lists if 'rra' in rrd_info: rras = rrd_info['rra'] else: rra_indices = [] for key in rrd_info: if key.startswith('rra['): index = int(key.split('[')[1].split(']')[0]) rra_indices.append(index) rra_count = max(rra_indices) + 1 rras = [] for i in range(rra_count): rra_info = {} rra_info['pdp_per_row'] = rrd_info['rra[%d].pdp_per_row' % i] rra_info['rows'] = rrd_info['rra[%d].rows' % i] rra_info['cf'] = rrd_info['rra[%d].cf' % i] rra_info['xff'] = rrd_info['rra[%d].xff' % i] rras.append(rra_info) if 'ds' in rrd_info: datasources = rrd_info['ds'].keys() else: ds_keys = [key for key in rrd_info if key.startswith('ds[')] datasources = list(set(key[3:].split(']')[0] for key in ds_keys)) # Grab the archive configuration relevant_rras = [] for rra in rras: if rra['cf'] == options.aggregationMethod.upper(): relevant_rras.append(rra) if not relevant_rras: err = "[ERROR] Unable to find any RRAs with consolidation function: %s" % \ options.aggregationMethod.upper() raise SystemExit(err) archives = [] xFilesFactor = options.xFilesFactor for rra in relevant_rras: precision = rra['pdp_per_row'] * seconds_per_pdp points = rra['rows'] if not xFilesFactor: xFilesFactor = rra['xff'] archives.append((precision, points)) for datasource in datasources: now = int(time.time()) suffix = '_%s' % datasource if len(datasources) > 1 else '' if options.destinationPath: destination_path = options.destinationPath if not os.path.isdir(destination_path): try: os.makedirs(destination_path) except OSError as exc: # Python >2.5 if exc.errno == errno.EEXIST and os.path.isdir(destination_path): pass else: raise rrd_file = os.path.basename(rrd_path).replace('.rrd', '%s.wsp' % suffix) path = destination_path + '/' + rrd_file else: path = rrd_path.replace('.rrd', '%s.wsp' % suffix) try: whisper.create(path, archives, xFilesFactor=xFilesFactor) except whisper.InvalidConfiguration as e: raise SystemExit('[ERROR] %s' % str(e)) size = os.stat(path).st_size archiveConfig = ','.join(["%d:%d" % ar for ar in archives]) print("Created: %s (%d bytes) with archives: %s" % (path, size, archiveConfig)) print("Migrating data") archiveNumber = len(archives) - 1 for precision, points in reversed(archives): retention = precision * points endTime = now - now % precision startTime = endTime - retention (time_info, columns, rows) = rrdtool.fetch( rrd_path, options.aggregationMethod.upper(), '-r', str(precision), '-s', str(startTime), '-e', str(endTime)) column_index = list(columns).index(datasource) rows.pop() # remove the last datapoint because RRD sometimes gives funky values values = [row[column_index] for row in rows] timestamps = list(range(*time_info)) datapoints = zip(timestamps, values) datapoints = filter(lambda p: p[1] is not None, datapoints) print(' migrating %d datapoints from archive %d' % (len(datapoints), archiveNumber)) archiveNumber -= 1 whisper.update_many(path, datapoints) whisper-1.1.4/bin/whisper-fetch.py0000755000000000000000000000515113343334675017046 0ustar rootroot00000000000000#!/usr/bin/env python import sys import time import signal import optparse try: import whisper except ImportError: raise SystemExit('[ERROR] Please make sure whisper is installed properly') _DROP_FUNCTIONS = { 'zeroes': lambda x: x != 0, 'nulls': lambda x: x is not None, 'empty': lambda x: x != 0 and x is not None } # Ignore SIGPIPE signal.signal(signal.SIGPIPE, signal.SIG_DFL) now = int(time.time()) yesterday = now - (60 * 60 * 24) option_parser = optparse.OptionParser(usage='''%prog [options] path''') option_parser.add_option( '--from', default=yesterday, type='int', dest='_from', help=("Unix epoch time of the beginning of " "your requested interval (default: 24 hours ago)")) option_parser.add_option( '--until', default=now, type='int', help="Unix epoch time of the end of your requested interval (default: now)") option_parser.add_option( '--json', default=False, action='store_true', help="Output results in JSON form") option_parser.add_option( '--pretty', default=False, action='store_true', help="Show human-readable timestamps instead of unix times") option_parser.add_option( '-t', '--time-format', action='store', type='string', dest='time_format', help='Time format to use with --pretty; see time.strftime()') option_parser.add_option( '--drop', choices=list(_DROP_FUNCTIONS.keys()), action='store', help="Specify 'nulls' to drop all null values. " "Specify 'zeroes' to drop all zero values. " "Specify 'empty' to drop both null and zero values") (options, args) = option_parser.parse_args() if len(args) != 1: option_parser.print_help() sys.exit(1) path = args[0] from_time = int(options._from) until_time = int(options.until) try: data = whisper.fetch(path, from_time, until_time) if not data: raise SystemExit('No data in selected timerange') (timeInfo, values) = data except (whisper.WhisperException, IOError) as exc: raise SystemExit('[ERROR] %s' % str(exc)) if options.drop: fcn = _DROP_FUNCTIONS.get(options.drop) values = [x for x in values if fcn(x)] (start, end, step) = timeInfo if options.json: values_json = str(values).replace('None', 'null') print('''{ "start" : %d, "end" : %d, "step" : %d, "values" : %s }''' % (start, end, step, values_json)) sys.exit(0) t = start for value in values: if options.pretty: if options.time_format: timestr = time.strftime(options.time_format, time.localtime(t)) else: timestr = time.ctime(t) else: timestr = str(t) if value is None: valuestr = "None" else: valuestr = "%f" % value print("%s\t%s" % (timestr, valuestr)) t += step whisper-1.1.4/PKG-INFO0000644000000000000000000000135413343335427014243 0ustar rootroot00000000000000Metadata-Version: 1.1 Name: whisper Version: 1.1.4 Summary: Fixed size round-robin style database Home-page: http://graphiteapp.org/ Author: Chris Davis Author-email: chrismd@gmail.com License: Apache Software License 2.0 Description: UNKNOWN Platform: UNKNOWN Classifier: Programming Language :: Python :: 2 Classifier: Programming Language :: Python :: 2.7 Classifier: Programming Language :: Python :: 3 Classifier: Programming Language :: Python :: 3.4 Classifier: Programming Language :: Python :: 3.5 Classifier: Programming Language :: Python :: 3.6 Classifier: Programming Language :: Python :: 3.7 Classifier: Programming Language :: Python :: Implementation :: CPython Classifier: Programming Language :: Python :: Implementation :: PyPy whisper-1.1.4/setup.py0000644000000000000000000000155513343334703014657 0ustar rootroot00000000000000#!/usr/bin/env python from glob import glob from distutils.core import setup setup( name='whisper', version='1.1.4', url='http://graphiteapp.org/', author='Chris Davis', author_email='chrismd@gmail.com', license='Apache Software License 2.0', description='Fixed size round-robin style database', py_modules=['whisper'], scripts=glob('bin/*') + glob('contrib/*'), install_requires=['six'], classifiers=[ 'Programming Language :: Python :: 2', 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3', 'Programming Language :: Python :: 3.4', 'Programming Language :: Python :: 3.5', 'Programming Language :: Python :: 3.6', 'Programming Language :: Python :: 3.7', 'Programming Language :: Python :: Implementation :: CPython', 'Programming Language :: Python :: Implementation :: PyPy', ], ) whisper-1.1.4/whisper.py0000644000000000000000000010642713343334675015214 0ustar rootroot00000000000000# Copyright 2009-Present The Graphite Development Team # Copyright 2008 Orbitz WorldWide # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # # This module is an implementation of the Whisper database API # Here is the basic layout of a whisper data file # # File = Header,Data # Header = Metadata,ArchiveInfo+ # Metadata = aggregationType,maxRetention,xFilesFactor,archiveCount # ArchiveInfo = Offset,SecondsPerPoint,Points # Data = Archive+ # Archive = Point+ # Point = timestamp,value import itertools import operator import os import re import struct import sys import time izip = getattr(itertools, 'izip', zip) ifilter = getattr(itertools, 'ifilter', filter) if sys.version_info >= (3, 0): xrange = range try: import fcntl CAN_LOCK = True except ImportError: CAN_LOCK = False try: import ctypes import ctypes.util CAN_FALLOCATE = True except ImportError: CAN_FALLOCATE = False try: if sys.version_info >= (3, 0): from os import posix_fadvise, POSIX_FADV_RANDOM else: from fadvise import posix_fadvise, POSIX_FADV_RANDOM CAN_FADVISE = True except ImportError: CAN_FADVISE = False fallocate = None if CAN_FALLOCATE: libc_name = ctypes.util.find_library('c') libc = ctypes.CDLL(libc_name) c_off64_t = ctypes.c_int64 c_off_t = ctypes.c_int if os.uname()[0] == 'FreeBSD': # offset type is 64-bit on FreeBSD 32-bit & 64-bit platforms to address files more than 2GB c_off_t = ctypes.c_int64 try: _fallocate = libc.posix_fallocate64 _fallocate.restype = ctypes.c_int _fallocate.argtypes = [ctypes.c_int, c_off64_t, c_off64_t] except AttributeError: try: _fallocate = libc.posix_fallocate _fallocate.restype = ctypes.c_int _fallocate.argtypes = [ctypes.c_int, c_off_t, c_off_t] except AttributeError: CAN_FALLOCATE = False if CAN_FALLOCATE: def _py_fallocate(fd, offset, len_): res = _fallocate(fd.fileno(), offset, len_) if res != 0: raise IOError(res, 'fallocate') fallocate = _py_fallocate del libc del libc_name LOCK = False CACHE_HEADERS = False AUTOFLUSH = False FADVISE_RANDOM = False # Buffering setting applied to all operations that do *not* require # a full scan of the file in order to minimize cache thrashing. BUFFERING = 0 __headerCache = {} longFormat = "!L" longSize = struct.calcsize(longFormat) floatFormat = "!f" floatSize = struct.calcsize(floatFormat) valueFormat = "!d" valueSize = struct.calcsize(valueFormat) pointFormat = "!Ld" pointSize = struct.calcsize(pointFormat) metadataFormat = "!2LfL" metadataSize = struct.calcsize(metadataFormat) archiveInfoFormat = "!3L" archiveInfoSize = struct.calcsize(archiveInfoFormat) aggregationTypeToMethod = dict({ 1: 'average', 2: 'sum', 3: 'last', 4: 'max', 5: 'min', 6: 'avg_zero', 7: 'absmax', 8: 'absmin' }) aggregationMethodToType = dict([[v, k] for k, v in aggregationTypeToMethod.items()]) aggregationMethods = aggregationTypeToMethod.values() debug = startBlock = endBlock = lambda *a, **k: None UnitMultipliers = { 'seconds': 1, 'minutes': 60, 'hours': 3600, 'days': 86400, 'weeks': 86400 * 7, 'years': 86400 * 365 } def getUnitString(s): for value in ('seconds', 'minutes', 'hours', 'days', 'weeks', 'years'): if value.startswith(s): return value raise ValueError("Invalid unit '%s'" % s) def parseRetentionDef(retentionDef): try: (precision, points) = retentionDef.strip().split(':', 1) except ValueError: raise ValueError("Invalid retention definition '%s'" % retentionDef) if precision.isdigit(): precision = int(precision) * UnitMultipliers[getUnitString('s')] else: precision_re = re.compile(r'^(\d+)([a-z]+)$') match = precision_re.match(precision) if match: precision = int(match.group(1)) * UnitMultipliers[getUnitString(match.group(2))] else: raise ValueError("Invalid precision specification '%s'" % precision) if points.isdigit(): points = int(points) else: points_re = re.compile(r'^(\d+)([a-z]+)$') match = points_re.match(points) if match: points = int(match.group(1)) * UnitMultipliers[getUnitString(match.group(2))] // precision else: raise ValueError("Invalid retention specification '%s'" % points) return (precision, points) class WhisperException(Exception): """Base class for whisper exceptions.""" class InvalidConfiguration(WhisperException): """Invalid configuration.""" class InvalidAggregationMethod(WhisperException): """Invalid aggregation method.""" class InvalidTimeInterval(WhisperException): """Invalid time interval.""" class InvalidXFilesFactor(WhisperException): """Invalid xFilesFactor.""" class TimestampNotCovered(WhisperException): """Timestamp not covered by any archives in this database.""" class CorruptWhisperFile(WhisperException): def __init__(self, error, path): Exception.__init__(self, error) self.error = error self.path = path def __repr__(self): return "" % (self.path, self.error) def __str__(self): return "%s (%s)" % (self.error, self.path) def disableDebug(): """ Disable writing IO statistics to stdout """ global open try: open = _open except NameError: pass def enableDebug(): """ Enable writing IO statistics to stdout """ global open, _open, debug, startBlock, endBlock _open = open class open(object): def __init__(self, *args, **kwargs): self.f = _open(*args, **kwargs) self.writeCount = 0 self.readCount = 0 def __enter__(self): return self def __exit__(self, *args): self.f.close() def write(self, data): self.writeCount += 1 debug('WRITE %d bytes #%d' % (len(data), self.writeCount)) return self.f.write(data) def read(self, size): self.readCount += 1 debug('READ %d bytes #%d' % (size, self.readCount)) return self.f.read(size) def __getattr__(self, attr): return getattr(self.f, attr) def debug(message): print('DEBUG :: %s' % message) __timingBlocks = {} def startBlock(name): __timingBlocks[name] = time.time() def endBlock(name): debug("%s took %.5f seconds" % (name, time.time() - __timingBlocks.pop(name))) def __readHeader(fh): if CACHE_HEADERS: info = __headerCache.get(fh.name) if info: return info originalOffset = fh.tell() fh.seek(0) packedMetadata = fh.read(metadataSize) try: (aggregationType, maxRetention, xff, archiveCount) \ = struct.unpack(metadataFormat, packedMetadata) except (struct.error, ValueError, TypeError): raise CorruptWhisperFile("Unable to read header", fh.name) try: aggregationTypeToMethod[aggregationType] except KeyError: raise CorruptWhisperFile("Unable to read header", fh.name) if not 0 <= xff <= 1: raise CorruptWhisperFile("Unable to read header", fh.name) archives = [] for i in xrange(archiveCount): packedArchiveInfo = fh.read(archiveInfoSize) try: (offset, secondsPerPoint, points) = struct.unpack(archiveInfoFormat, packedArchiveInfo) except (struct.error, ValueError, TypeError): raise CorruptWhisperFile("Unable to read archive%d metadata" % i, fh.name) archiveInfo = { 'offset': offset, 'secondsPerPoint': secondsPerPoint, 'points': points, 'retention': secondsPerPoint * points, 'size': points * pointSize, } archives.append(archiveInfo) fh.seek(originalOffset) info = { 'aggregationMethod': aggregationTypeToMethod.get(aggregationType, 'average'), 'maxRetention': maxRetention, 'xFilesFactor': xff, 'archives': archives, } if CACHE_HEADERS: __headerCache[fh.name] = info return info def setXFilesFactor(path, xFilesFactor): """Sets the xFilesFactor for file in path path is a string pointing to a whisper file xFilesFactor is a float between 0 and 1 returns the old xFilesFactor """ (_, old_xff) = __setAggregation(path, xFilesFactor=xFilesFactor) return old_xff def setAggregationMethod(path, aggregationMethod, xFilesFactor=None): """Sets the aggregationMethod for file in path path is a string pointing to the whisper file aggregationMethod specifies the method to use when propagating data (see ``whisper.aggregationMethods``) xFilesFactor specifies the fraction of data points in a propagation interval that must have known values for a propagation to occur. If None, the existing xFilesFactor in path will not be changed returns the old aggregationMethod """ (old_agm, _) = __setAggregation(path, aggregationMethod, xFilesFactor) return old_agm def __setAggregation(path, aggregationMethod=None, xFilesFactor=None): """ Set aggregationMethod and or xFilesFactor for file in path""" with open(path, 'r+b', BUFFERING) as fh: if LOCK: fcntl.flock(fh.fileno(), fcntl.LOCK_EX) info = __readHeader(fh) if xFilesFactor is None: xFilesFactor = info['xFilesFactor'] if aggregationMethod is None: aggregationMethod = info['aggregationMethod'] __writeHeaderMetadata(fh, aggregationMethod, info['maxRetention'], xFilesFactor, len(info['archives'])) if AUTOFLUSH: fh.flush() os.fsync(fh.fileno()) if CACHE_HEADERS and fh.name in __headerCache: del __headerCache[fh.name] return (info['aggregationMethod'], info['xFilesFactor']) def __writeHeaderMetadata(fh, aggregationMethod, maxRetention, xFilesFactor, archiveCount): """ Writes header metadata to fh """ try: aggregationType = aggregationMethodToType[aggregationMethod] except KeyError: raise InvalidAggregationMethod("Unrecognized aggregation method: %s" % aggregationMethod) try: xFilesFactor = float(xFilesFactor) except ValueError: raise InvalidXFilesFactor("Invalid xFilesFactor %s, not a float" % xFilesFactor) if xFilesFactor < 0 or xFilesFactor > 1: raise InvalidXFilesFactor("Invalid xFilesFactor %s, not between 0 and 1" % xFilesFactor) aggregationType = struct.pack(longFormat, aggregationType) maxRetention = struct.pack(longFormat, maxRetention) xFilesFactor = struct.pack(floatFormat, xFilesFactor) archiveCount = struct.pack(longFormat, archiveCount) packedMetadata = aggregationType + maxRetention + xFilesFactor + archiveCount fh.seek(0) fh.write(packedMetadata) def validateArchiveList(archiveList): """ Validates an archiveList. An ArchiveList must: 1. Have at least one archive config. Example: (60, 86400) 2. No archive may be a duplicate of another. 3. Higher precision archives' precision must evenly divide all lower precision archives' precision. 4. Lower precision archives must cover larger time intervals than higher precision archives. 5. Each archive must have at least enough points to consolidate to the next archive Returns True or False """ if not archiveList: raise InvalidConfiguration("You must specify at least one archive configuration!") archiveList.sort(key=lambda a: a[0]) # Sort by precision (secondsPerPoint) for i, archive in enumerate(archiveList): if i == len(archiveList) - 1: break nextArchive = archiveList[i + 1] if not archive[0] < nextArchive[0]: raise InvalidConfiguration( "A Whisper database may not be configured having " "two archives with the same precision (archive%d: %s, archive%d: %s)" % (i, archive, i + 1, nextArchive)) if nextArchive[0] % archive[0] != 0: raise InvalidConfiguration( "Higher precision archives' precision " "must evenly divide all lower precision archives' precision " "(archive%d: %s, archive%d: %s)" % (i, archive[0], i + 1, nextArchive[0])) retention = archive[0] * archive[1] nextRetention = nextArchive[0] * nextArchive[1] if not nextRetention > retention: raise InvalidConfiguration( "Lower precision archives must cover " "larger time intervals than higher precision archives " "(archive%d: %s seconds, archive%d: %s seconds)" % (i, retention, i + 1, nextRetention)) archivePoints = archive[1] pointsPerConsolidation = nextArchive[0] // archive[0] if not archivePoints >= pointsPerConsolidation: raise InvalidConfiguration( "Each archive must have at least enough points " "to consolidate to the next archive (archive%d consolidates %d of " "archive%d's points but it has only %d total points)" % (i + 1, pointsPerConsolidation, i, archivePoints)) def create(path, archiveList, xFilesFactor=None, aggregationMethod=None, sparse=False, useFallocate=False): """create(path,archiveList,xFilesFactor=0.5,aggregationMethod='average') path is a string archiveList is a list of archives, each of which is of the form (secondsPerPoint, numberOfPoints) xFilesFactor specifies the fraction of data points in a propagation interval that must have known values for a propagation to occur aggregationMethod specifies the function to use when propagating data (see ``whisper.aggregationMethods``) """ # Set default params if xFilesFactor is None: xFilesFactor = 0.5 if aggregationMethod is None: aggregationMethod = 'average' # Validate archive configurations... validateArchiveList(archiveList) # Looks good, now we create the file and write the header if os.path.exists(path): raise InvalidConfiguration("File %s already exists!" % path) with open(path, 'wb', BUFFERING) as fh: try: if LOCK: fcntl.flock(fh.fileno(), fcntl.LOCK_EX) if CAN_FADVISE and FADVISE_RANDOM: posix_fadvise(fh.fileno(), 0, 0, POSIX_FADV_RANDOM) oldest = max([secondsPerPoint * points for secondsPerPoint, points in archiveList]) __writeHeaderMetadata(fh, aggregationMethod, oldest, xFilesFactor, len(archiveList)) headerSize = metadataSize + (archiveInfoSize * len(archiveList)) archiveOffsetPointer = headerSize for secondsPerPoint, points in archiveList: archiveInfo = struct.pack(archiveInfoFormat, archiveOffsetPointer, secondsPerPoint, points) fh.write(archiveInfo) archiveOffsetPointer += (points * pointSize) # If configured to use fallocate and capable of fallocate use that, else # attempt sparse if configure or zero pre-allocate if sparse isn't configured. if CAN_FALLOCATE and useFallocate: remaining = archiveOffsetPointer - headerSize fallocate(fh, headerSize, remaining) elif sparse: fh.seek(archiveOffsetPointer - 1) fh.write(b'\x00') else: remaining = archiveOffsetPointer - headerSize chunksize = 16384 zeroes = b'\x00' * chunksize while remaining > chunksize: fh.write(zeroes) remaining -= chunksize fh.write(zeroes[:remaining]) if AUTOFLUSH: fh.flush() os.fsync(fh.fileno()) # Explicitly close the file to catch IOError on close() fh.close() except IOError: # if we got an IOError above, the file is either empty or half created. # Better off deleting it to avoid surprises later os.unlink(fh.name) raise def aggregate(aggregationMethod, knownValues, neighborValues=None): if aggregationMethod == 'average': return float(sum(knownValues)) / float(len(knownValues)) elif aggregationMethod == 'sum': return float(sum(knownValues)) elif aggregationMethod == 'last': return knownValues[-1] elif aggregationMethod == 'max': return max(knownValues) elif aggregationMethod == 'min': return min(knownValues) elif aggregationMethod == 'avg_zero': if not neighborValues: raise InvalidAggregationMethod("Using avg_zero without neighborValues") values = [x or 0 for x in neighborValues] return float(sum(values)) / float(len(values)) elif aggregationMethod == 'absmax': return max(knownValues, key=abs) elif aggregationMethod == 'absmin': return min(knownValues, key=abs) else: raise InvalidAggregationMethod( "Unrecognized aggregation method %s" % aggregationMethod) def __propagate(fh, header, timestamp, higher, lower): aggregationMethod = header['aggregationMethod'] xff = header['xFilesFactor'] lowerIntervalStart = timestamp - (timestamp % lower['secondsPerPoint']) fh.seek(higher['offset']) packedPoint = fh.read(pointSize) (higherBaseInterval, higherBaseValue) = struct.unpack(pointFormat, packedPoint) if higherBaseInterval == 0: higherFirstOffset = higher['offset'] else: timeDistance = lowerIntervalStart - higherBaseInterval pointDistance = timeDistance // higher['secondsPerPoint'] byteDistance = pointDistance * pointSize higherFirstOffset = higher['offset'] + (byteDistance % higher['size']) higherPoints = lower['secondsPerPoint'] // higher['secondsPerPoint'] higherSize = higherPoints * pointSize relativeFirstOffset = higherFirstOffset - higher['offset'] relativeLastOffset = (relativeFirstOffset + higherSize) % higher['size'] higherLastOffset = relativeLastOffset + higher['offset'] fh.seek(higherFirstOffset) if higherFirstOffset < higherLastOffset: # We don't wrap the archive seriesString = fh.read(higherLastOffset - higherFirstOffset) else: # We do wrap the archive higherEnd = higher['offset'] + higher['size'] seriesString = fh.read(higherEnd - higherFirstOffset) fh.seek(higher['offset']) seriesString += fh.read(higherLastOffset - higher['offset']) # Now we unpack the series data we just read byteOrder, pointTypes = pointFormat[0], pointFormat[1:] points = len(seriesString) // pointSize seriesFormat = byteOrder + (pointTypes * points) unpackedSeries = struct.unpack(seriesFormat, seriesString) # And finally we construct a list of values neighborValues = [None] * points currentInterval = lowerIntervalStart step = higher['secondsPerPoint'] for i in xrange(0, len(unpackedSeries), 2): pointTime = unpackedSeries[i] if pointTime == currentInterval: neighborValues[i // 2] = unpackedSeries[i + 1] currentInterval += step # Propagate aggregateValue to propagate from neighborValues if we have enough known points knownValues = [v for v in neighborValues if v is not None] if not knownValues: return False knownPercent = float(len(knownValues)) / float(len(neighborValues)) if knownPercent >= xff: # We have enough data to propagate a value! aggregateValue = aggregate(aggregationMethod, knownValues, neighborValues) myPackedPoint = struct.pack(pointFormat, lowerIntervalStart, aggregateValue) fh.seek(lower['offset']) packedPoint = fh.read(pointSize) (lowerBaseInterval, lowerBaseValue) = struct.unpack(pointFormat, packedPoint) if lowerBaseInterval == 0: # First propagated update to this lower archive fh.seek(lower['offset']) fh.write(myPackedPoint) else: # Not our first propagated update to this lower archive timeDistance = lowerIntervalStart - lowerBaseInterval pointDistance = timeDistance // lower['secondsPerPoint'] byteDistance = pointDistance * pointSize lowerOffset = lower['offset'] + (byteDistance % lower['size']) fh.seek(lowerOffset) fh.write(myPackedPoint) return True else: return False def update(path, value, timestamp=None, now=None): """ update(path, value, timestamp=None) path is a string value is a float timestamp is either an int or float """ value = float(value) with open(path, 'r+b', BUFFERING) as fh: if CAN_FADVISE and FADVISE_RANDOM: posix_fadvise(fh.fileno(), 0, 0, POSIX_FADV_RANDOM) return file_update(fh, value, timestamp, now) def file_update(fh, value, timestamp, now=None): if LOCK: fcntl.flock(fh.fileno(), fcntl.LOCK_EX) header = __readHeader(fh) if now is None: now = int(time.time()) if timestamp is None: timestamp = now timestamp = int(timestamp) diff = now - timestamp if not ((diff < header['maxRetention']) and diff >= 0): raise TimestampNotCovered( "Timestamp not covered by any archives in this database.") # Find the highest-precision archive that covers timestamp for i, archive in enumerate(header['archives']): if archive['retention'] < diff: continue # We'll pass on the update to these lower precision archives later lowerArchives = header['archives'][i + 1:] break # First we update the highest-precision archive myInterval = timestamp - (timestamp % archive['secondsPerPoint']) myPackedPoint = struct.pack(pointFormat, myInterval, value) fh.seek(archive['offset']) packedPoint = fh.read(pointSize) (baseInterval, baseValue) = struct.unpack(pointFormat, packedPoint) if baseInterval == 0: # This file's first update fh.seek(archive['offset']) fh.write(myPackedPoint) baseInterval = myInterval else: # Not our first update timeDistance = myInterval - baseInterval pointDistance = timeDistance // archive['secondsPerPoint'] byteDistance = pointDistance * pointSize myOffset = archive['offset'] + (byteDistance % archive['size']) fh.seek(myOffset) fh.write(myPackedPoint) # Now we propagate the update to lower-precision archives higher = archive for lower in lowerArchives: if not __propagate(fh, header, myInterval, higher, lower): break higher = lower if AUTOFLUSH: fh.flush() os.fsync(fh.fileno()) def update_many(path, points, now=None): """update_many(path,points) path is a string points is a list of (timestamp,value) points """ if not points: return points = [(int(t), float(v)) for (t, v) in points] points.sort(key=lambda p: p[0], reverse=True) # Order points by timestamp, newest first with open(path, 'r+b', BUFFERING) as fh: if CAN_FADVISE and FADVISE_RANDOM: posix_fadvise(fh.fileno(), 0, 0, POSIX_FADV_RANDOM) return file_update_many(fh, points, now) def file_update_many(fh, points, now=None): if LOCK: fcntl.flock(fh.fileno(), fcntl.LOCK_EX) header = __readHeader(fh) if now is None: now = int(time.time()) archives = iter(header['archives']) currentArchive = next(archives) currentPoints = [] for point in points: age = now - point[0] while currentArchive['retention'] < age: # We can't fit any more points in this archive if currentPoints: # Commit all the points we've found that it can fit currentPoints.reverse() # Put points in chronological order __archive_update_many(fh, header, currentArchive, currentPoints) currentPoints = [] try: currentArchive = next(archives) except StopIteration: currentArchive = None break if not currentArchive: break # Drop remaining points that don't fit in the database currentPoints.append(point) # Don't forget to commit after we've checked all the archives if currentArchive and currentPoints: currentPoints.reverse() __archive_update_many(fh, header, currentArchive, currentPoints) if AUTOFLUSH: fh.flush() os.fsync(fh.fileno()) def __archive_update_many(fh, header, archive, points): step = archive['secondsPerPoint'] alignedPoints = [(timestamp - (timestamp % step), value) for (timestamp, value) in points] # Create a packed string for each contiguous sequence of points packedStrings = [] previousInterval = None currentString = b"" lenAlignedPoints = len(alignedPoints) for i in xrange(0, lenAlignedPoints): # Take last point in run of points with duplicate intervals if i + 1 < lenAlignedPoints and alignedPoints[i][0] == alignedPoints[i + 1][0]: continue (interval, value) = alignedPoints[i] if (not previousInterval) or (interval == previousInterval + step): currentString += struct.pack(pointFormat, interval, value) previousInterval = interval else: numberOfPoints = len(currentString) // pointSize startInterval = previousInterval - (step * (numberOfPoints - 1)) packedStrings.append((startInterval, currentString)) currentString = struct.pack(pointFormat, interval, value) previousInterval = interval if currentString: numberOfPoints = len(currentString) // pointSize startInterval = previousInterval - (step * (numberOfPoints - 1)) packedStrings.append((startInterval, currentString)) # Read base point and determine where our writes will start fh.seek(archive['offset']) packedBasePoint = fh.read(pointSize) (baseInterval, baseValue) = struct.unpack(pointFormat, packedBasePoint) if baseInterval == 0: # This file's first update baseInterval = packedStrings[0][0] # Use our first string as the base, so we start at the start # Write all of our packed strings in locations determined by the baseInterval for (interval, packedString) in packedStrings: timeDistance = interval - baseInterval pointDistance = timeDistance // step byteDistance = pointDistance * pointSize myOffset = archive['offset'] + (byteDistance % archive['size']) fh.seek(myOffset) archiveEnd = archive['offset'] + archive['size'] bytesBeyond = (myOffset + len(packedString)) - archiveEnd if bytesBeyond > 0: fh.write(packedString[:-bytesBeyond]) assert fh.tell() == archiveEnd, ( "archiveEnd=%d fh.tell=%d bytesBeyond=%d len(packedString)=%d" % (archiveEnd, fh.tell(), bytesBeyond, len(packedString)) ) fh.seek(archive['offset']) # Safe because it can't exceed the archive (retention checking logic above) fh.write(packedString[-bytesBeyond:]) else: fh.write(packedString) # Now we propagate the updates to lower-precision archives higher = archive lowerArchives = [arc for arc in header['archives'] if arc['secondsPerPoint'] > archive['secondsPerPoint']] for lower in lowerArchives: def fit(i): return i - (i % lower['secondsPerPoint']) lowerIntervals = [fit(p[0]) for p in alignedPoints] uniqueLowerIntervals = set(lowerIntervals) propagateFurther = False for interval in uniqueLowerIntervals: if __propagate(fh, header, interval, higher, lower): propagateFurther = True if not propagateFurther: break higher = lower def info(path): """ info(path) path is a string """ try: with open(path, 'rb') as fh: return __readHeader(fh) except (IOError, OSError): pass return None def fetch(path, fromTime, untilTime=None, now=None, archiveToSelect=None): """fetch(path,fromTime,untilTime=None,archiveToSelect=None) path is a string fromTime is an epoch time untilTime is also an epoch time, but defaults to now. archiveToSelect is the requested granularity, but defaults to None. Returns a tuple of (timeInfo, valueList) where timeInfo is itself a tuple of (fromTime, untilTime, step) Returns None if no data can be returned """ with open(path, 'rb') as fh: return file_fetch(fh, fromTime, untilTime, now, archiveToSelect) def file_fetch(fh, fromTime, untilTime, now=None, archiveToSelect=None): header = __readHeader(fh) if now is None: now = int(time.time()) if untilTime is None: untilTime = now fromTime = int(fromTime) untilTime = int(untilTime) # Here we try and be flexible and return as much data as we can. # If the range of data is from too far in the past or fully in the future, we # return nothing if fromTime > untilTime: raise InvalidTimeInterval( "Invalid time interval: from time '%s' is after until time '%s'" % (fromTime, untilTime)) oldestTime = now - header['maxRetention'] # Range is in the future if fromTime > now: return None # Range is beyond retention if untilTime < oldestTime: return None # Range requested is partially beyond retention, adjust if fromTime < oldestTime: fromTime = oldestTime # Range is partially in the future, adjust if untilTime > now: untilTime = now diff = now - fromTime # Parse granularity if requested if archiveToSelect: retentionStr = str(archiveToSelect) + ":1" archiveToSelect = parseRetentionDef(retentionStr)[0] for archive in header['archives']: if archiveToSelect: if archive['secondsPerPoint'] == archiveToSelect: break archive = None else: if archive['retention'] >= diff: break if archiveToSelect and not archive: raise ValueError("Invalid granularity: %s" % (archiveToSelect)) return __archive_fetch(fh, archive, fromTime, untilTime) def __archive_fetch(fh, archive, fromTime, untilTime): """ Fetch data from a single archive. Note that checks for validity of the time period requested happen above this level so it's possible to wrap around the archive on a read and request data older than the archive's retention """ step = archive['secondsPerPoint'] fromInterval = int(fromTime - (fromTime % step)) + step untilInterval = int(untilTime - (untilTime % step)) + step if fromInterval == untilInterval: # Zero-length time range: always include the next point untilInterval += step fh.seek(archive['offset']) packedPoint = fh.read(pointSize) (baseInterval, baseValue) = struct.unpack(pointFormat, packedPoint) if baseInterval == 0: points = (untilInterval - fromInterval) // step timeInfo = (fromInterval, untilInterval, step) valueList = [None] * points return (timeInfo, valueList) # Determine fromOffset timeDistance = fromInterval - baseInterval pointDistance = timeDistance // step byteDistance = pointDistance * pointSize fromOffset = archive['offset'] + (byteDistance % archive['size']) # Determine untilOffset timeDistance = untilInterval - baseInterval pointDistance = timeDistance // step byteDistance = pointDistance * pointSize untilOffset = archive['offset'] + (byteDistance % archive['size']) # Read all the points in the interval fh.seek(fromOffset) if fromOffset < untilOffset: # If we don't wrap around the archive seriesString = fh.read(untilOffset - fromOffset) else: # We do wrap around the archive, so we need two reads archiveEnd = archive['offset'] + archive['size'] seriesString = fh.read(archiveEnd - fromOffset) fh.seek(archive['offset']) seriesString += fh.read(untilOffset - archive['offset']) # Now we unpack the series data we just read (anything faster than unpack?) byteOrder, pointTypes = pointFormat[0], pointFormat[1:] points = len(seriesString) // pointSize seriesFormat = byteOrder + (pointTypes * points) unpackedSeries = struct.unpack(seriesFormat, seriesString) # And finally we construct a list of values (optimize this!) valueList = [None] * points # Pre-allocate entire list for speed currentInterval = fromInterval for i in xrange(0, len(unpackedSeries), 2): pointTime = unpackedSeries[i] if pointTime == currentInterval: pointValue = unpackedSeries[i + 1] valueList[i // 2] = pointValue # In-place reassignment is faster than append() currentInterval += step timeInfo = (fromInterval, untilInterval, step) return (timeInfo, valueList) def merge(path_from, path_to, time_from=None, time_to=None, now=None): """ Merges the data from one whisper file into another. Each file must have the same archive configuration. time_from and time_to can optionally be specified for the merge. """ # Python 2.7 will allow the following commented line # with open(path_from, 'rb') as fh_from, open(path_to, 'rb+') as fh_to: # But with Python 2.6 we need to use this (I prefer not to introduce # contextlib.nested just for this): with open(path_from, 'rb') as fh_from: with open(path_to, 'rb+') as fh_to: return file_merge(fh_from, fh_to, time_from, time_to, now) def file_merge(fh_from, fh_to, time_from=None, time_to=None, now=None): headerFrom = __readHeader(fh_from) headerTo = __readHeader(fh_to) if headerFrom['archives'] != headerTo['archives']: raise NotImplementedError( "%s and %s archive configurations are unalike. " "Resize the input before merging" % (fh_from.name, fh_to.name)) if now is None: now = int(time.time()) if (time_to is not None): untilTime = time_to else: untilTime = now if (time_from is not None): fromTime = time_from else: fromTime = 0 # Sanity check: do not mix the from/to values. if untilTime < fromTime: raise ValueError("time_to must be >= time_from") archives = headerFrom['archives'] archives.sort(key=operator.itemgetter('retention')) for archive in archives: archiveFrom = fromTime archiveTo = untilTime if archiveFrom < now - archive['retention']: archiveFrom = now - archive['retention'] # if untilTime is too old, skip this archive if archiveTo < now - archive['retention']: continue (timeInfo, values) = __archive_fetch(fh_from, archive, archiveFrom, archiveTo) (start, end, archive_step) = timeInfo pointsToWrite = list(ifilter( lambda points: points[1] is not None, izip(xrange(start, end, archive_step), values))) # skip if there are no points to write if len(pointsToWrite) == 0: continue __archive_update_many(fh_to, headerTo, archive, pointsToWrite) def diff(path_from, path_to, ignore_empty=False, until_time=None, now=None): """ Compare two whisper databases. Each file must have the same archive configuration """ with open(path_from, 'rb') as fh_from: with open(path_to, 'rb') as fh_to: return file_diff(fh_from, fh_to, ignore_empty, until_time, now) def file_diff(fh_from, fh_to, ignore_empty=False, until_time=None, now=None): headerFrom = __readHeader(fh_from) headerTo = __readHeader(fh_to) if headerFrom['archives'] != headerTo['archives']: # TODO: Add specific whisper-resize commands to right size things raise NotImplementedError( "%s and %s archive configurations are unalike. " "Resize the input before diffing" % (fh_from.name, fh_to.name)) archives = headerFrom['archives'] archives.sort(key=operator.itemgetter('retention')) archive_diffs = [] if now is None: now = int(time.time()) if until_time: untilTime = until_time else: untilTime = now for archive_number, archive in enumerate(archives): diffs = [] startTime = now - archive['retention'] (fromTimeInfo, fromValues) = \ __archive_fetch(fh_from, archive, startTime, untilTime) (toTimeInfo, toValues) = __archive_fetch(fh_to, archive, startTime, untilTime) (start, end, archive_step) = \ (min(fromTimeInfo[0], toTimeInfo[0]), max(fromTimeInfo[1], toTimeInfo[1]), min(fromTimeInfo[2], toTimeInfo[2])) points = map(lambda s: (s * archive_step + start, fromValues[s], toValues[s]), xrange(0, (end - start) // archive_step)) if ignore_empty: points = [p for p in points if p[1] is not None and p[2] is not None] else: points = [p for p in points if p[1] is not None or p[2] is not None] diffs = [p for p in points if p[1] != p[2]] archive_diffs.append((archive_number, diffs, points.__len__())) untilTime = min(startTime, untilTime) return archive_diffs whisper-1.1.4/test_whisper.py0000755000000000000000000010101213343334675016237 0ustar rootroot00000000000000#!/usr/bin/env python import os import sys import time import math import random import struct import errno from six.moves import StringIO from six import assertRegex try: from unittest.mock import patch, mock_open except ImportError: from mock import patch, mock_open try: import unittest2 as unittest except ImportError: import unittest # For py3k in TestWhisper.test_merge try: FileNotFoundError # noqa except NameError: class FileNotFoundError(Exception): pass import whisper class SimulatedCorruptWhisperFile(object): """ Simple context manager to be used as a decorator for simulating a corrupt whisper file for testing purposes. Example: >>> whisper.create('test.wsp', [(60, 10)]) >>> with SimulatedCorruptWhisperFile(): ... whisper.info('test.wsp') When 'corrupt_archive' is passed as True, the metadata will be left intact, but the archive will seem corrupted. """ def __init__(self, corrupt_archive=False): self.corrupt_archive = corrupt_archive self.metadataFormat = whisper.metadataFormat self.archiveInfoFormat = whisper.archiveInfoFormat self.CACHE_HEADERS = whisper.CACHE_HEADERS def __enter__(self): # Force the struct unpack to fail by changing the metadata # format. This simulates an actual corrupted whisper file if not self.corrupt_archive: whisper.metadataFormat = '!ssss' else: whisper.archiveInfoFormat = '!ssss' # Force whisper to reread the header instead of returning # the previous (correct) header from the header cache whisper.CACHE_HEADERS = False def __exit__(self, *args, **kwargs): whisper.metadataFormat = self.metadataFormat whisper.archiveInfoFormat = self.archiveInfoFormat whisper.CACHE_HEADERS = self.CACHE_HEADERS class AssertRaisesException(object): """ Context manager to not only assert the type of exception raised, but also the actual value of the exception matches what is expected >>> with AssertRaisesException(ValueError('beer > wine')): ... raise ValueError('beer > wine') This is better than unittest.TestCase.assertRaises as it also checks the contents of the exception vs just the type raised. """ def __init__(self, exc): self.expected_exc = exc def __enter__(self): yield def __exit__(self, e_type, e_value, tracebck): # Ensure an exception was actually raised if e_type is None: raise AssertionError("Exception of type '{}' was not raised".format( self.expected_exc.__class__.__name__, )) elif not isinstance(self.expected_exc, e_type): raise AssertionError("Exception type '{}' is not of type '{}'".format( getattr(e_type, '__name__', 'None'), self.expected_exc.__class__.__name__, )) # Ensure the actual values are the exact same. Since # two instances of an arbitrary exception will never # be considered equal, use the __dict__ attr to check # that all of the kwargs such as path for exceptions # such as CorruptWhisperFile are the exact same. elif e_value.__dict__ != self.expected_exc.__dict__: raise AssertionError("'{}' != '{}'".format( repr(self.expected_exc.__dict__), repr(e_value.__dict__), )) # Some builtin exceptions such as ValueError return {} for # ValueError.__dict__, so finally, cast those to strings to compare elif str(e_value) != str(self.expected_exc): raise AssertionError("String forms of: '{}' != '{}'".format( str(self.expected_exc), str(e_value), )) # Context managers need to return True in __exit__ to not # re-raise the exception held in the e_value variable return True class WhisperTestBase(unittest.TestCase): def setUp(self): self.filename = 'db.wsp' self.retention = [(1, 60), (60, 60)] def tearDown(self): self._remove(self.filename) @staticmethod def _remove(wsp_file): try: os.unlink(wsp_file) except (IOError, OSError, FileNotFoundError): pass class TestWhisper(WhisperTestBase): """ Testing functions for whisper. """ def test_validate_archive_list(self): """ blank archive config """ with AssertRaisesException( whisper.InvalidConfiguration( 'You must specify at least one archive configuration!')): whisper.validateArchiveList([]) def test_duplicate(self): """ Checking duplicates """ # TODO: Fix the lies with whisper.validateArchiveList() saying it returns True/False self.assertIsNone(whisper.validateArchiveList(self.retention)) with AssertRaisesException( whisper.InvalidConfiguration( 'A Whisper database may not be configured having two ' 'archives with the same precision (archive0: (1, 60), ' 'archive1: (1, 60))')): whisper.validateArchiveList([(1, 60), (60, 60), (1, 60)]) def test_even_precision_division(self): """ even precision division """ whisper.validateArchiveList([(60, 60), (6, 60)]) with AssertRaisesException( whisper.InvalidConfiguration( "Higher precision archives' precision must evenly divide " "all lower precision archives' precision (archive0: 7, " "archive1: 60)")): whisper.validateArchiveList([(60, 60), (7, 60)]) def test_timespan_coverage(self): """ timespan coverage """ whisper.validateArchiveList(self.retention) with AssertRaisesException( whisper.InvalidConfiguration( 'Lower precision archives must cover larger time intervals ' 'than higher precision archives (archive0: 60 seconds, ' 'archive1: 10 seconds)')): whisper.validateArchiveList([(1, 60), (10, 1)]) def test_number_of_points(self): """ number of points """ whisper.validateArchiveList(self.retention) with AssertRaisesException( whisper.InvalidConfiguration( "Each archive must have at least enough points to " "consolidate to the next archive (archive1 consolidates 60 " "of archive0's points but it has only 30 total points)")): whisper.validateArchiveList([(1, 30), (60, 60)]) def test_aggregate(self): """ aggregate functions """ # min of 1-4 self.assertEqual(whisper.aggregate('min', [1, 2, 3, 4]), 1) # max of 1-4 self.assertEqual(whisper.aggregate('max', [1, 2, 3, 4]), 4) # last element in the known values self.assertEqual(whisper.aggregate('last', [3, 2, 5, 4]), 4) # sum ALL THE VALUES! self.assertEqual(whisper.aggregate('sum', [10, 2, 3, 4]), 19) # average of the list elements self.assertEqual(whisper.aggregate('average', [1, 2, 3, 4]), 2.5) avg_zero = [1, 2, 3, 4, None, None, None, None] non_null = [i for i in avg_zero if i is not None] self.assertEqual(whisper.aggregate('avg_zero', non_null, avg_zero), 1.25) # avg_zero without neighborValues with self.assertRaises(whisper.InvalidAggregationMethod): whisper.aggregate('avg_zero', non_null) # absmax with negative max self.assertEqual(whisper.aggregate('absmax', [-3, -2, 1, 2]), -3) # absmax with positive max self.assertEqual(whisper.aggregate('absmax', [-2, -1, 2, 3]), 3) # absmin with positive min self.assertEqual(whisper.aggregate('absmin', [-3, -2, 1, 2]), 1) # absmin with negative min self.assertEqual(whisper.aggregate('absmin', [-2, -1, 2, 3]), -1) with AssertRaisesException( whisper.InvalidAggregationMethod( 'Unrecognized aggregation method derp')): whisper.aggregate('derp', [12, 2, 3123, 1]) def _test_create_exception(self, exception_method='write', e=None): """ Behaviour when creating a whisper file on a full filesystem """ m_open = mock_open() # Get the mocked file object and override interresting attributes m_file = m_open.return_value m_file.name = self.filename method = getattr(m_file, exception_method) if not e: e = IOError(errno.ENOSPC, "Mocked IOError") method.side_effect = e with patch('whisper.open', m_open, create=True): with patch('os.unlink') as m_unlink: self.assertRaises(e.__class__, whisper.create, self.filename, self.retention) return (m_file, m_unlink) def test_create_write_ENOSPC(self): """ Behaviour when creating a whisper file on a full filesystem (write) """ (m_file, m_unlink) = self._test_create_exception('write') m_unlink.assert_called_with(self.filename) def test_create_close_ENOSPC(self): """ Behaviour when creating a whisper file on a full filesystem (close) """ (m_file, m_unlink) = self._test_create_exception('close') m_unlink.assert_called_with(self.filename) def test_create_close_EIO(self): """ Behaviour when creating a whisper file and getting an I/O error (EIO) """ (m_file, m_unlink) = self._test_create_exception('close', e=IOError(errno.EIO)) self.assertTrue(m_unlink.called) def test_create_close_exception(self): """ Behaviour when creating a whisper file and getting a generic exception """ (m_file, m_unlink) = self._test_create_exception('close', e=Exception("boom!")) # Must not call os.unlink on exception other than IOError self.assertFalse(m_unlink.called) def test_create_and_info(self): """ Create a db and use info() to validate """ # check if invalid configuration fails successfully for retention in (0, []): with AssertRaisesException( whisper.InvalidConfiguration( 'You must specify at least one archive configuration!')): whisper.create(self.filename, retention) # create a new db with a valid configuration whisper.create(self.filename, self.retention) # Ensure another file can't be created when one exists already with AssertRaisesException( whisper.InvalidConfiguration( 'File {0} already exists!'.format(self.filename))): whisper.create(self.filename, self.retention) info = whisper.info(self.filename) # check header information self.assertEqual(info['maxRetention'], max([a[0] * a[1] for a in self.retention])) self.assertEqual(info['aggregationMethod'], 'average') self.assertEqual(info['xFilesFactor'], 0.5) # check archive information self.assertEqual(len(info['archives']), len(self.retention)) self.assertEqual(info['archives'][0]['points'], self.retention[0][1]) self.assertEqual(info['archives'][0]['secondsPerPoint'], self.retention[0][0]) self.assertEqual(info['archives'][0]['retention'], self.retention[0][0] * self.retention[0][1]) self.assertEqual(info['archives'][1]['retention'], self.retention[1][0] * self.retention[1][1]) def test_info_bogus_file(self): self.assertIsNone(whisper.info('bogus-file')) # Validate "corrupt" whisper metadata whisper.create(self.filename, self.retention) with SimulatedCorruptWhisperFile(): with AssertRaisesException( whisper.CorruptWhisperFile( 'Unable to read header', self.filename)): whisper.info(self.filename) # Validate "corrupt" whisper archive data with SimulatedCorruptWhisperFile(corrupt_archive=True): with AssertRaisesException( whisper.CorruptWhisperFile( 'Unable to read archive0 metadata', self.filename)): whisper.info(self.filename) def test_file_fetch_edge_cases(self): """ Test some of the edge cases in file_fetch() that should return None or raise an exception """ whisper.create(self.filename, [(1, 60)]) with open(self.filename, 'rb') as fh: msg = "Invalid time interval: from time '{0}' is after until time '{1}'" until_time = 0 from_time = int(time.time()) + 100 with AssertRaisesException( whisper.InvalidTimeInterval(msg.format(from_time, until_time))): whisper.file_fetch(fh, fromTime=from_time, untilTime=until_time) # fromTime > now aka metrics from the future self.assertIsNone( whisper.file_fetch(fh, fromTime=int(time.time()) + 100, untilTime=int(time.time()) + 200), ) # untilTime > oldest time stored in the archive headers = whisper.info(self.filename) the_past = int(time.time()) - headers['maxRetention'] - 200 self.assertIsNone( whisper.file_fetch(fh, fromTime=the_past - 1, untilTime=the_past), ) # untilTime > now, change untilTime to now now = int(time.time()) self.assertEqual( whisper.file_fetch(fh, fromTime=now, untilTime=now + 200, now=now), ((now + 1, now + 2, 1), [None]), ) def test_merge(self): """ test merging two databases """ testdb = "test-%s" % self.filename # Create 2 whisper databases and merge one into the other self._update() self._update(testdb) whisper.merge(self.filename, testdb) def test_merge_empty(self): """ test merging from an empty database """ testdb_a = "test-a-%s" % self.filename testdb_b = "test-b-%s" % self.filename # create two empty databases with same retention self.addCleanup(self._remove, testdb_a) whisper.create(testdb_a, self.retention) self.addCleanup(self._remove, testdb_b) whisper.create(testdb_b, self.retention) whisper.merge(testdb_a, testdb_b) def test_merge_bad_archive_config(self): testdb = "test-%s" % self.filename # Create 2 whisper databases with different schema self._update() self.addCleanup(self._remove, testdb) whisper.create(testdb, [(100, 1)]) with AssertRaisesException( NotImplementedError( 'db.wsp and test-db.wsp archive configurations are ' 'unalike. Resize the input before merging')): whisper.merge(self.filename, testdb) def test_diff(self): testdb = "test-%s" % self.filename now = int(time.time()) self.addCleanup(self._remove, testdb) whisper.create(testdb, self.retention) whisper.create(self.filename, self.retention) whisper.update(testdb, 1.0, now) whisper.update(self.filename, 2.0, now) results = whisper.diff(testdb, self.filename) expected = [(0, [(int(now), 1.0, 2.0)], 1), (1, [], 0)] self.assertEqual(results, expected) def test_diff_with_empty(self): testdb = "test-%s" % self.filename now = time.time() self.addCleanup(self._remove, testdb) whisper.create(testdb, self.retention) whisper.create(self.filename, self.retention) whisper.update(testdb, 1.0, now) whisper.update(self.filename, 2.0, now) # Purposefully insert nulls to strip out previous = now - self.retention[0][0] whisper.update(testdb, float('NaN'), previous) results = whisper.diff(testdb, self.filename, ignore_empty=True) self.assertEqual( results, [(0, [(int(now), 1.0, 2.0)], 1), (1, [], 0)], ) results_empties = whisper.diff(testdb, self.filename, ignore_empty=False) expected = [(0, [(int(previous), float('NaN'), None), (int(now), 1.0, 2.0)], 2), (1, [], 0)] # Stupidly, float('NaN') != float('NaN'), so assert that the # repr() results are the same :/ # # See this thread: # https://mail.python.org/pipermail/python-ideas/2010-March/006945.html self.assertEqual( repr(results_empties), repr(expected), ) # Since the above test is somewhat of a sham, ensure that there # is a nan where there should be. self.assertTrue( math.isnan(results_empties[0][1][0][1]) ) def test_file_diff(self): testdb = "test-%s" % self.filename now = time.time() self.addCleanup(self._remove, testdb) whisper.create(testdb, self.retention) whisper.create(self.filename, self.retention) whisper.update(testdb, 1.0, now) whisper.update(self.filename, 2.0, now) # Merging 2 archives with different retentions should fail with open(testdb, 'rb') as fh_1: with open(self.filename, 'rb+') as fh_2: results = whisper.file_diff(fh_1, fh_2) expected = [(0, [(int(now), 1.0, 2.0)], 1), (1, [], 0)] self.assertEqual(results, expected) def test_file_diff_invalid(self): testdb = "test-%s" % self.filename self.addCleanup(self._remove, testdb) whisper.create(testdb, [(120, 10)]) whisper.create(self.filename, self.retention) # Merging 2 archives with different retentions should fail with open(testdb, 'rb') as fh_1: with open(self.filename, 'rb+') as fh_2: with AssertRaisesException( NotImplementedError( 'test-db.wsp and db.wsp archive configurations are ' 'unalike. Resize the input before diffing')): whisper.file_diff(fh_1, fh_2) def test_fetch(self): """ fetch info from database """ # Don't use AssertRaisesException due to a super obscure bug in # python2.6 which returns an IOError in the 2nd argument of __exit__ # in a context manager as a tuple. See this for a minimal reproducer: # http://git.io/cKz30g with self.assertRaises(IOError): # check a db that doesnt exist whisper.fetch("this_db_does_not_exist", 0) # SECOND MINUTE HOUR DAY retention = [(1, 60), (60, 60), (3600, 24), (86400, 365)] whisper.create(self.filename, retention) # check a db with an invalid time range now = int(time.time()) past = now - 6000 msg = "Invalid time interval: from time '{0}' is after until time '{1}'" with AssertRaisesException(whisper.InvalidTimeInterval(msg.format(now, past))): whisper.fetch(self.filename, now, past) fetch = whisper.fetch(self.filename, 0) # check time range self.assertEqual(fetch[0][1] - fetch[0][0], retention[-1][0] * retention[-1][1]) # check number of points self.assertEqual(len(fetch[1]), retention[-1][1]) # check step size self.assertEqual(fetch[0][2], retention[-1][0]) def _update(self, wsp=None, schema=None, sparse=False, useFallocate=False): wsp = wsp or self.filename schema = schema or [(1, 20)] num_data_points = 20 # create sample data self.addCleanup(self._remove, wsp) whisper.create(wsp, schema, sparse=sparse, useFallocate=useFallocate) tn = int(time.time()) - num_data_points data = [] for i in range(num_data_points): data.append((tn + 1 + i, random.random() * 10)) # test single update whisper.update(wsp, data[0][1], data[0][0]) # test multi update whisper.update_many(wsp, data[1:]) return data def test_fadvise(self): original_fadvise = whisper.FADVISE_RANDOM whisper.FADVISE_RANDOM = True self._update() whisper.FADVISE_RANDOM = original_fadvise def test_lock(self): original_lock = whisper.LOCK whisper.LOCK = True self._update() whisper.LOCK = original_lock def test_autoflush(self): original_autoflush = whisper.AUTOFLUSH whisper.AUTOFLUSH = True self._update() whisper.AUTOFLUSH = original_autoflush def test_fallocate(self): self._update(useFallocate=True) def test_sparse(self): self._update(sparse=True) def test_set_xfilesfactor(self): """ Create a whisper file Update xFilesFactor Check if update succeeded Check if exceptions get raised with wrong input """ whisper.create(self.filename, [(1, 20)]) target_xff = 0.42 info0 = whisper.info(self.filename) old_xff = whisper.setXFilesFactor(self.filename, target_xff) # return value should match old xff self.assertEqual(info0['xFilesFactor'], old_xff) info1 = whisper.info(self.filename) # Other header information should not change self.assertEqual(info0['aggregationMethod'], info1['aggregationMethod']) self.assertEqual(info0['maxRetention'], info1['maxRetention']) self.assertEqual(info0['archives'], info1['archives']) # packing and unpacking because # AssertionError: 0.20000000298023224 != 0.2 target_xff = struct.unpack("!f", struct.pack("!f", target_xff))[0] self.assertEqual(info1['xFilesFactor'], target_xff) with AssertRaisesException( whisper.InvalidXFilesFactor('Invalid xFilesFactor zero, not a ' 'float')): whisper.setXFilesFactor(self.filename, "zero") for invalid_xff in -1, 2: with AssertRaisesException( whisper.InvalidXFilesFactor('Invalid xFilesFactor %s, not ' 'between 0 and 1' % float(invalid_xff))): whisper.setXFilesFactor(self.filename, invalid_xff) def test_update_single_archive(self): """ Update with a single leveled archive """ retention_schema = [(1, 20)] data = self._update(schema=retention_schema) # fetch the data fetch = whisper.fetch(self.filename, 0) # all data fetch_data = fetch[1] for i, (timestamp, value) in enumerate(data): # is value in the fetched data? self.assertEqual(value, fetch_data[i]) # check TimestampNotCovered with AssertRaisesException( whisper.TimestampNotCovered( 'Timestamp not covered by any archives in this database.')): # in the futur whisper.update(self.filename, 1.337, time.time() + 1) with AssertRaisesException( whisper.TimestampNotCovered( 'Timestamp not covered by any archives in this database.')): # before the past whisper.update(self.filename, 1.337, time.time() - retention_schema[0][1] - 1) # When no timestamp is passed in, it should use the current time original_lock = whisper.LOCK whisper.LOCK = True whisper.update(self.filename, 3.7337, None) fetched = whisper.fetch(self.filename, 0)[1] self.assertEqual(fetched[-1], 3.7337) whisper.LOCK = original_lock def test_update_many_excess(self): # given an empty db wsp = "test_update_many_excess.wsp" self.addCleanup(self._remove, wsp) archive_len = 3 archive_step = 1 whisper.create(wsp, [(archive_step, archive_len)]) # given too many points than the db can hold excess_len = 1 num_input_points = archive_len + excess_len test_now = int(time.time()) input_start = test_now - num_input_points + archive_step input_points = [(input_start + i, random.random() * 10) for i in range(num_input_points)] # when the db is updated with too many points whisper.update_many(wsp, input_points, now=test_now) # then only the most recent input points (those at the end) were written actual_time_info = whisper.fetch(wsp, 0, now=test_now)[0] self.assertEqual(actual_time_info, (input_points[-archive_len][0], input_points[-1][0] + archive_step, # untilInterval = newest + step archive_step)) def test_debug(self): """ Test creating a file with debug enabled Should print debug messages to stdout """ # debug prints to stdout, redirect it to a variable old_stdout = sys.stdout sys.stdout = StringIO() whisper.disableDebug() whisper.enableDebug() self._update() whisper.disableDebug() sys.stdout.seek(0) out = sys.stdout.read() sys.stdout = old_stdout assertRegex(self, out, '(DEBUG :: (WRITE|READ) \d+ bytes #\d+\n)+') # TODO: This test method takes more time than virtually every # single other test combined. Profile this code and potentially # fix the underlying reason def test_setAggregation(self): """ Create a db, change aggregation, xFilesFactor, then use info() to validate """ original_lock = whisper.LOCK original_caching = whisper.CACHE_HEADERS original_autoflush = whisper.AUTOFLUSH whisper.LOCK = True whisper.AUTOFLUSH = True whisper.CACHE_HEADERS = True # create a new db with a valid configuration whisper.create(self.filename, self.retention) with AssertRaisesException( whisper.InvalidAggregationMethod( 'Unrecognized aggregation method: yummy beer')): whisper.setAggregationMethod(self.filename, 'yummy beer') # set setting every AggregationMethod available for ag in whisper.aggregationMethods: for xff in 0.0, 0.2, 0.4, 0.7, 0.75, 1.0: # original xFilesFactor info0 = whisper.info(self.filename) # optional xFilesFactor not passed old_ag = whisper.setAggregationMethod(self.filename, ag) # should return old aggregationmethod self.assertEqual(old_ag, info0['aggregationMethod']) # original value should not change info1 = whisper.info(self.filename) self.assertEqual(info0['xFilesFactor'], info1['xFilesFactor']) # the selected aggregation method should have applied self.assertEqual(ag, info1['aggregationMethod']) # optional xFilesFactor used old_ag = whisper.setAggregationMethod(self.filename, ag, xff) # should return old aggregationmethod self.assertEqual(old_ag, info1['aggregationMethod']) # new info should match what we just set it to info2 = whisper.info(self.filename) # packing and unpacking because # AssertionError: 0.20000000298023224 != 0.2 target_xff = struct.unpack("!f", struct.pack("!f", xff))[0] self.assertEqual(info2['xFilesFactor'], target_xff) # same aggregationMethod assertion again, but double-checking since # we are playing with packed values and seek() self.assertEqual(ag, info2['aggregationMethod']) with SimulatedCorruptWhisperFile(): with AssertRaisesException( whisper.CorruptWhisperFile( 'Unable to read header', self.filename)): whisper.setAggregationMethod(self.filename, ag) whisper.LOCK = original_lock whisper.AUTOFLUSH = original_autoflush whisper.CACHE_HEADERS = original_caching def test_fetch_with_archive_to_select(self): """ fetch info from database providing the archive to select """ # SECOND MINUTE HOUR DAY retention = [(1, 60), (60, 60), (3600, 24), (86400, 365)] whisper.create(self.filename, retention) archives = ["1s", "1m", "1h", "1d"] for i in range(len(archives)): fetch = whisper.fetch(self.filename, 0, archiveToSelect=archives[i]) self.assertEqual(fetch[0][2], retention[i][0]) # check time range self.assertEqual(fetch[0][1] - fetch[0][0], retention[-1][0] * retention[-1][1]) with AssertRaisesException(ValueError("Invalid granularity: 2")): fetch = whisper.fetch(self.filename, 0, archiveToSelect="2s") class TestgetUnitString(unittest.TestCase): def test_function(self): for unit in ('seconds', 'minutes', 'hours', 'days', 'weeks'): value = whisper.getUnitString(unit[0]) self.assertEqual(value, unit) def test_invalid_unit(self): with AssertRaisesException(ValueError("Invalid unit 'z'")): whisper.getUnitString('z') # If you send an invalid file, this deadlocks my Fedora 21 / Linux 3.17 laptop # TODO: Find a way to pass in corrupt whisper files that don't deadlock the testing box class TestReadHeader(WhisperTestBase): def test_normal(self): whisper.create(self.filename, [(1, 60), (60, 60)]) whisper.CACHE_HEADERS = True whisper.info(self.filename) whisper.info(self.filename) whisper.CACHE_HEADERS = False class TestParseRetentionDef(unittest.TestCase): def test_valid_retentions(self): retention_map = ( ('60:10', (60, 10)), ('10:60', (10, 60)), ('10s:10h', (10, 3600)), ) for retention, expected in retention_map: results = whisper.parseRetentionDef(retention) self.assertEqual(results, expected) def test_invalid_retentions(self): retention_map = ( # From getUnitString ('10x:10', ValueError("Invalid unit 'x'")), ('60:10x', ValueError("Invalid unit 'x'")), # From parseRetentionDef ('10', ValueError("Invalid retention definition '10'")), ('10X:10', ValueError("Invalid precision specification '10X'")), ('10:10$', ValueError("Invalid retention specification '10$'")), ('60:10', (60, 10)), ) for retention, expected_exc in retention_map: try: results = whisper.parseRetentionDef(retention) except expected_exc.__class__ as exc: self.assertEqual( str(expected_exc), str(exc), ) self.assertEqual( expected_exc.__class__, exc.__class__, ) else: # When there isn't an exception raised self.assertEqual(results, expected_exc) class TestCorruptWhisperFile(unittest.TestCase): def setUp(self): self.path = '/opt/graphite/storage/whisper/moolah.wsp' self.error = 'What is the average velocity of an unladen swallow?' def test_error(self): try: raise whisper.CorruptWhisperFile(self.error, self.path) except whisper.CorruptWhisperFile as exc: self.assertEqual(exc.error, self.error) def test_path(self): try: raise whisper.CorruptWhisperFile(self.error, self.path) except whisper.CorruptWhisperFile as exc: self.assertEqual(exc.path, self.path) def test_repr(self): try: raise whisper.CorruptWhisperFile(self.error, self.path) except whisper.CorruptWhisperFile as exc: self.assertEqual( repr(exc), '' % (self.path, self.error), ) def test_str(self): try: raise whisper.CorruptWhisperFile(self.error, self.path) except whisper.CorruptWhisperFile as exc: self.assertEqual( str(exc), "{0} ({1})".format(self.error, self.path) ) if __name__ == '__main__': unittest.main() whisper-1.1.4/contrib/0000755000000000000000000000000013343335427014603 5ustar rootroot00000000000000whisper-1.1.4/contrib/whisper-auto-resize.py0000755000000000000000000002136213343334675021116 0ustar rootroot00000000000000#!/usr/bin/env python import sys import os import fnmatch import shlex from subprocess import call from optparse import OptionParser from distutils.spawn import find_executable from os.path import basename from six.moves import input # On Debian systems whisper-resize.py is available as whisper-resize whisperResizeExecutable = find_executable("whisper-resize.py") if whisperResizeExecutable is None: whisperResizeExecutable = find_executable("whisper-resize") if whisperResizeExecutable is None: # Probably will fail later, set it nevertheless whisperResizeExecutable = "whisper-resize.py" option_parser = OptionParser( usage='''%prog storagePath configPath storagePath the Path to the directory containing whisper files (CAN NOT BE A SUBDIR, use --subdir for that) configPath the path to your carbon config files ''', version="%prog 0.1") option_parser.add_option( '--doit', default=False, action='store_true', help="This is not a drill, lets do it") option_parser.add_option( '-q', '--quiet', default=False, action='store_true', help="Display extra debugging info") option_parser.add_option( '--subdir', default=None, type='string', help="only process a subdir of whisper files") option_parser.add_option( '--carbonlib', default=None, type='string', help="folder where the carbon lib files are if its not in your path already") option_parser.add_option( '--whisperlib', default=None, type='string', help="folder where the whisper lib files are if its not in your path already") option_parser.add_option( '--confirm', default=False, action='store_true', help="ask for comfirmation prior to resizing a whisper file") option_parser.add_option( '-x', '--extra_args', default='', type='string', help="pass any additional arguments to the %s script" % basename(whisperResizeExecutable)) (options, args) = option_parser.parse_args() if len(args) < 2: option_parser.print_help() sys.exit(1) storagePath = args[0] configPath = args[1] # check to see if we are processing a subfolder # we need to have a separate config option for this since # otherwise the metric test thinks the metric is at the root # of the storage path and can match schemas incorrectly if options.subdir is None: processPath = args[0] else: processPath = options.subdir # Injecting the Whisper Lib Path if needed if options.whisperlib is not None: sys.path.insert(0, options.whisperlib) try: import whisper except ImportError: raise SystemExit('[ERROR] Can\'t find the whisper module, try using ' '--whisperlib to explicitly include the path') # Injecting the Carbon Lib Path if needed if options.carbonlib is not None: sys.path.insert(0, options.carbonlib) try: from carbon.conf import settings except ImportError: raise SystemExit('[ERROR] Can\'t find the carbon module, try using ' '--carbonlib to explicitly include the path') # carbon.conf not seeing the config files so give it a nudge settings.CONF_DIR = configPath settings.LOCAL_DATA_DIR = storagePath # import these once we have the settings figured out from carbon.storage import loadStorageSchemas, loadAggregationSchemas # Load the Defined Schemas from our config files schemas = loadStorageSchemas() agg_schemas = loadAggregationSchemas() # check to see if a metric needs to be resized based on the current config def processMetric(fullPath, schemas, agg_schemas): """ method to process a given metric, and resize it if necessary Parameters: fullPath - full path to the metric whisper file schemas - carbon storage schemas loaded from config agg_schemas - carbon storage aggregation schemas load from confg """ schema_config_args = '' schema_file_args = '' rebuild = False messages = '' # get archive info from whisper file info = whisper.info(fullPath) # get graphite metric name from fullPath metric = getMetricFromPath(fullPath) # loop the carbon-storage schemas for schema in schemas: if schema.matches(metric): # returns secondsPerPoint and points for this schema in tuple format archive_config = [archive.getTuple() for archive in schema.archives] break # loop through the carbon-aggregation schemas for agg_schema in agg_schemas: if agg_schema.matches(metric): xFilesFactor, aggregationMethod = agg_schema.archives break if xFilesFactor is None: xFilesFactor = 0.5 if aggregationMethod is None: aggregationMethod = 'average' # loop through the bucket tuples and convert to string format for resizing for retention in archive_config: current_schema = '%s:%s ' % (retention[0], retention[1]) schema_config_args += current_schema # loop through the current files bucket sizes and convert to string format # to compare for resizing for fileRetention in info['archives']: current_schema = '%s:%s ' % (fileRetention['secondsPerPoint'], fileRetention['points']) schema_file_args += current_schema # check to see if the current and configured schemas are the same or rebuild if (schema_config_args != schema_file_args): rebuild = True messages += 'updating Retentions from: %s to: %s \n' % \ (schema_file_args, schema_config_args) # only care about the first two decimals in the comparison since there is # floaty stuff going on. info_xFilesFactor = "{0:.2f}".format(info['xFilesFactor']) str_xFilesFactor = "{0:.2f}".format(xFilesFactor) # check to see if the current and configured xFilesFactor are the same if (str_xFilesFactor != info_xFilesFactor): rebuild = True messages += '%s xFilesFactor differs real: %s should be: %s \n' % \ (metric, info_xFilesFactor, str_xFilesFactor) # check to see if the current and configured aggregationMethods are the same if (aggregationMethod != info['aggregationMethod']): rebuild = True messages += '%s aggregation schema differs real: %s should be: %s \n' % \ (metric, info['aggregationMethod'], aggregationMethod) # if we need to rebuild, lets do it. if rebuild is True: cmd = [whisperResizeExecutable, fullPath] for x in shlex.split(options.extra_args): cmd.append(x) cmd.append('--xFilesFactor=' + str(xFilesFactor)) cmd.append('--aggregationMethod=' + str(aggregationMethod)) for x in shlex.split(schema_config_args): cmd.append(x) if options.quiet is not True or options.confirm is True: print(messages) print(cmd) if options.confirm is True: options.doit = confirm("Would you like to run this command? [y/n]: ") if options.doit is False: print("Skipping command \n") if options.doit is True: exitcode = call(cmd) # if the command failed lets bail so we can take a look before proceeding if (exitcode > 0): print('Error running: %s' % (cmd)) sys.exit(1) def getMetricFromPath(filePath): """ this method takes the full file path of a whisper file an converts it to a gaphite metric name Parameters: filePath - full file path to a whisper file Returns a string representing the metric name """ # sanitize directory since we may get a trailing slash or not, and if we # don't it creates a leading '.' data_dir = os.path.normpath(settings.LOCAL_DATA_DIR) + os.sep # pull the data dir off and convert to the graphite metric name metric_name = filePath.replace(data_dir, '') metric_name = metric_name.replace('.wsp', '') metric_name = metric_name.replace('/', '.') return metric_name def confirm(question, error_response='Valid options : yes or no'): """ ask the user if they would like to perform the action Parameters: question - the question you would like to ask the user to confirm. error_response - the message to display if an invalid option is given. """ while True: answer = input(question).lower() if answer in ('y', 'yes'): return True if answer in ('n', 'no'): return False print(error_response) if os.path.isfile(processPath) and processPath.endswith('.wsp'): processMetric(processPath, schemas, agg_schemas) else: for root, _, files in os.walk(processPath): # we only want to deal with non-hidden whisper files for f in fnmatch.filter(files, '*.wsp'): fullpath = os.path.join(root, f) processMetric(fullpath, schemas, agg_schemas) whisper-1.1.4/contrib/whisper-auto-update.py0000755000000000000000000000375613343334675021106 0ustar rootroot00000000000000#!/usr/bin/env python import sys import time import signal import optparse try: import whisper except ImportError: raise SystemExit('[ERROR] Please make sure whisper is installed properly') # update this callback to do the logic you want. # a future version could use a config while in which this fn is defined. def update_value(timestamp, value): if value is None: return value return value * 1024 * 1024 * 1024 # Ignore SIGPIPE signal.signal(signal.SIGPIPE, signal.SIG_DFL) now = int(time.time()) yesterday = now - (60 * 60 * 24) option_parser = optparse.OptionParser(usage='''%prog [options] path''') option_parser.add_option( '--from', default=yesterday, type='int', dest='_from', help=("Unix epoch time of the beginning of " "your requested interval (default: 24 hours ago)")) option_parser.add_option( '--until', default=now, type='int', help="Unix epoch time of the end of your requested interval (default: now)") option_parser.add_option( '--pretty', default=False, action='store_true', help="Show human-readable timestamps instead of unix times") (options, args) = option_parser.parse_args() if len(args) < 1: option_parser.print_usage() sys.exit(1) path = args[0] from_time = int(options._from) until_time = int(options.until) try: data = whisper.fetch(path, from_time, until_time) if not data: raise SystemExit('No data in selected timerange') (timeInfo, values_old) = data except whisper.WhisperException as exc: raise SystemExit('[ERROR] %s' % str(exc)) (start, end, step) = timeInfo t = start for value_old in values_old: value_str_old = str(value_old) value_new = update_value(t, value_old) value_str_new = str(value_new) if options.pretty: timestr = time.ctime(t) else: timestr = str(t) print("%s\t%s -> %s" % (timestr, value_str_old, value_str_new)) try: if value_new is not None: whisper.update(path, value_new, t) t += step except whisper.WhisperException as exc: raise SystemExit('[ERROR] %s' % str(exc)) whisper-1.1.4/contrib/update-storage-times.py0000755000000000000000000001465413343334675021241 0ustar rootroot00000000000000#!/usr/bin/env python # @package update_storage_schemas.py # Correct/Update storage schemas\n # @code # # Usage example for update_storage_schemas.py # sudo ./update_storage_schemas.py --path /opt/graphite/whisper --cfg /opt/graphite/conf/schemas # @endcode import sys import os import logging import subprocess import argparse import re import time from multiprocessing import Pool, cpu_count from configobj import ConfigObj # Assuming Python 2, we'll want scandir if possible, it's much faster try: from scandir import scandir except ImportError: from os import listdir as scandir RESIZE_BIN = "/opt/graphite/bin/whisper-resize.py" INFO_BIN = "/opt/graphite/bin/whisper-info.py" LOG = logging.getLogger() LOG.setLevel(logging.INFO) SCHEMA_LIST = {} # The very basic default retentions DEFAULT_SCHEMA = {'match': re.compile('.*'), 'retentions': '1m:7d'} DEBUG = False DRY_RUN = False BASE_COMMAND = [RESIZE_BIN] ROOT_PATH = "" def config_schemas(cfg): schema_conf = ConfigObj(cfg) for schema in schema_conf.items(): item = schema[1]['pattern'] if item == '.*': DEFAULT_SCHEMA['retentions'] = schema[1]['retentions'] else: if item[0] == '^': item = item[1:] SCHEMA_LIST[item] = {'retentions': schema[1]['retentions'], 'match': re.compile(item)} def _convert_seconds(time): seconds_dict = {'s': 1, 'm': 60, 'h': 3600, 'min': 60, 'd': 86400, 'w': 604800, 'y': 31536000} (points, time) = time.split(':') if str.isalpha(time[-1]): time = int(time[:-1]) * seconds_dict[time[-1]] return time def _compare_retention(retention, tmp_path): # Get the new retention as [(secondsPerPoint, numPoints), ...] new_retention = [_convert_seconds(item) for item in list(retention)] info_string = [INFO_BIN, tmp_path] cur_ret_list = subprocess.Popen(info_string, stdout=subprocess.PIPE) cur_ret_list = cur_ret_list.communicate()[0].split('\n') cur_retention = [int(line.split(':')[1]) for line in cur_ret_list if 'retention' in line] return cur_retention == new_retention def _find_metrics(path): for f in scandir(path): if f.is_dir(follow_symlinks=False): for sf in _find_metrics(f.path): yield sf else: if not f.is_file(follow_symlinks=False) or \ not f.name.endswith('.wsp'): continue yield f.path def fix_metric(metric): if not SCHEMA_LIST: LOG.error("Didn't initialize schemas!") return [] if DEBUG: LOG.info("Testing %s for modification" % metric) devnull = open(os.devnull, 'w') command_string = list(BASE_COMMAND) + [metric] retention = DEFAULT_SCHEMA['retentions'] matching = metric[len(ROOT_PATH):] for schema, info in SCHEMA_LIST.iteritems(): if info['match'].search(matching): retention = info['retentions'] break command_string.extend(list(retention)) if DEBUG: LOG.info("Created command: %s" % command_string) if _compare_retention(retention, metric): LOG.debug('%s has the same retention as before!' % metric) return [(False, metric)] if DRY_RUN: res = 0 else: LOG.debug('Retention will be %s' % retention) if DEBUG: res = subprocess.check_call(command_string) else: res = subprocess.check_call(command_string, stdout=devnull) devnull.close() # wait for a second, so we don't kill I/O on the host time.sleep(0.3) """ We have manual commands for every failed file from these errors, so we can just go through each of these errors after a completed run. There shouldn't be many """ if res != 0: LOG.error('Failed to update schemas for %s' % metric) LOG.error('Attempted retention: %s' % retention) LOG.error('Attempted command string: %s' % command_string) return [(False, metric)] else: return [(True, metric)] def search_and_fix(subdir): if not SCHEMA_LIST: LOG.error("Didn't initialize schemas!") return fpath = os.path.join(ROOT_PATH, subdir) pool = Pool(cpu_count()) LOG.info('Creating new storage schemas for metrics under %s ...' % fpath) results = pool.map(fix_metric, _find_metrics(fpath), 100) pool.close() pool.join() return results # Parse command line options sent to the script def cli_opts(): parser = argparse.ArgumentParser("Correct storage settings on multiple whisper files") parser.add_argument('--cfg', action='store', dest='cfg', help='The storage-schemas.conf file path', required=True) parser.add_argument('--path', action='store', dest='path', help='The root path to find metrics in', required=True) parser.add_argument('--debug', action='store_true', dest='debug', help='Display debug information', default=False) parser.add_argument('--dry-run', action='store_true', dest='dry_run', help="Don't actually do anything", default=False) parser.add_argument('--subdir', action='store', dest='subdir', help="If you only want to process a particular subdir", default='') parser.add_argument('--nobackup', action='store_true', dest='nobackup', help="Passed through to whisper-resize.py, don't create a backup", default=False) parser.add_argument('--aggregate', action='store_true', dest='aggregate', help="Passed through to whisper-resize.py, roll up values", default=False) return parser.parse_args() if __name__ == '__main__': i_args = cli_opts() if os.getenv('USER') != 'root': print("You must run this script as root!") sys.exit(1) if i_args.debug: LOG.setLevel(logging.DEBUG) soh = logging.StreamHandler(sys.stdout) LOG.addHandler(soh) ROOT_PATH = i_args.path DEBUG = i_args.debug DRY_RUN = i_args.dry_run if i_args.nobackup: BASE_COMMAND.append('--nobackup') if i_args.aggregate: BASE_COMMAND.append('--aggregate') config_schemas(i_args.cfg) search_and_fix(i_args.subdir)