././@PaxHeader0000000000000000000000000000003400000000000011452 xustar000000000000000028 mtime=1653243593.8137162 whisper-1.1.10/0000755000000000000000000000000000000000000013212 5ustar00rootroot00000000000000././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1653242170.0 whisper-1.1.10/LICENSE0000664000000000000000000002613600000000000014231 0ustar00rootroot00000000000000 Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ././@PaxHeader0000000000000000000000000000003400000000000011452 xustar000000000000000028 mtime=1653243593.8137162 whisper-1.1.10/PKG-INFO0000644000000000000000000002137300000000000014315 0ustar00rootroot00000000000000Metadata-Version: 2.1 Name: whisper Version: 1.1.10 Summary: Fixed size round-robin style database Home-page: http://graphiteapp.org/ Author: Chris Davis Author-email: chrismd@gmail.com License: Apache Software License 2.0 Classifier: Programming Language :: Python :: 2 Classifier: Programming Language :: Python :: 2.7 Classifier: Programming Language :: Python :: 3 Classifier: Programming Language :: Python :: 3.5 Classifier: Programming Language :: Python :: 3.6 Classifier: Programming Language :: Python :: 3.7 Classifier: Programming Language :: Python :: Implementation :: CPython Classifier: Programming Language :: Python :: Implementation :: PyPy Description-Content-Type: text/markdown License-File: LICENSE # Whisper [![Codacy Badge](https://api.codacy.com/project/badge/Grade/f00d0b65802742e29de56f3744503ab0)](https://www.codacy.com/app/graphite-project/whisper?utm_source=github.com&utm_medium=referral&utm_content=graphite-project/whisper&utm_campaign=badger) [![Build Status](https://secure.travis-ci.org/graphite-project/whisper.png)](http://travis-ci.org/graphite-project/whisper) [![FOSSA Status](https://app.fossa.io/api/projects/git%2Bhttps%3A%2F%2Fgithub.com%2Fgraphite-project%2Fwhisper.svg?type=shield)](https://app.fossa.io/projects/git%2Bhttps%3A%2F%2Fgithub.com%2Fgraphite-project%2Fwhisper?ref=badge_shield) ## Overview Whisper is one of three components within the Graphite project: 1. [Graphite-Web](https://github.com/graphite-project/graphite-web), a Django-based web application that renders graphs and dashboards 2. The [Carbon](https://github.com/graphite-project/carbon) metric processing daemons 3. The Whisper time-series database library ![Graphite Components](https://github.com/graphite-project/graphite-web/raw/master/webapp/content/img/overview.png "Graphite Components") Whisper is a fixed-size database, similar in design and purpose to RRD (round-robin-database). It provides fast, reliable storage of numeric data over time. Whisper allows for higher resolution (seconds per point) of recent data to degrade into lower resolutions for long-term retention of historical data. ## Installation, Configuration and Usage Please refer to the instructions at [readthedocs](http://graphite.readthedocs.org/). ## Whisper Scripts rrd2whisper.py -------------- Convert a rrd file into a whisper (.wsp) file. ``` Usage: rrd2whisper.py rrd_path Options: -h, --help show this help message and exit --xFilesFactor=XFILESFACTOR The xFilesFactor to use in the output file. Defaults to the input RRD's xFilesFactor --aggregationMethod=AGGREGATIONMETHOD The consolidation function to fetch from on input and aggregationMethod to set on output. One of: average, last, max, min, avg_zero, absmax, absmin --destinationPath=DESTINATIONPATH Path to place created whisper file. Defaults to the RRD file's source path. ``` whisper-create.py ----------------- Create a new whisper database file. ``` Usage: whisper-create.py path timePerPoint:timeToStore [timePerPoint:timeToStore]* whisper-create.py --estimate timePerPoint:timeToStore [timePerPoint:timeToStore]* timePerPoint and timeToStore specify lengths of time, for example: 60:1440 60 seconds per datapoint, 1440 datapoints = 1 day of retention 15m:8 15 minutes per datapoint, 8 datapoints = 2 hours of retention 1h:7d 1 hour per datapoint, 7 days of retention 12h:2y 12 hours per datapoint, 2 years of retention Options: -h, --help show this help message and exit --xFilesFactor=XFILESFACTOR --aggregationMethod=AGGREGATIONMETHOD Function to use when aggregating values (average, sum, last, max, min, avg_zero, absmax, absmin) --overwrite --estimate Don't create a whisper file, estimate storage requirements based on archive definitions ``` whisper-dump.py --------------- Dump the whole whisper file content to stdout. ``` Usage: whisper-dump.py path Options: -h, --help show this help message and exit --pretty Show human-readable timestamps instead of unix times -t TIME_FORMAT, --time-format=TIME_FORMAT Time format to use with --pretty; see time.strftime() -r, --raw Dump value only in the same format for whisper-update (UTC timestamps) ``` whisper-fetch.py ---------------- Fetch all the metrics stored in a whisper file to stdout. ``` Usage: whisper-fetch.py [options] path Options: -h, --help show this help message and exit --from=_FROM Unix epoch time of the beginning of your requested interval (default: 24 hours ago) --until=UNTIL Unix epoch time of the end of your requested interval (default: now) --json Output results in JSON form --pretty Show human-readable timestamps instead of unix times -t TIME_FORMAT, --time-format=TIME_FORMAT Time format to use with --pretty; see time.strftime() --drop=DROP Specify 'nulls' to drop all null values. Specify 'zeroes' to drop all zero values. Specify 'empty' to drop both null and zero values. ``` whisper-info.py --------------- Dump the metadata about a whisper file to stdout. ``` Usage: whisper-info.py [options] path [field] Options: -h, --help show this help message and exit --json Output results in JSON form ``` whisper-merge.py ---------------- Join two existing whisper files together. ``` Usage: whisper-merge.py [options] from_path to_path Options: -h, --help show this help message and exit ``` whisper-fill.py ---------------- Copies data from src in dst, if missing. Unlike whisper-merge, don't overwrite data that's already present in the target file, but instead, only add the missing data (e.g. where the gaps in the target file are). Because no values are overwritten, no data or precision gets lost. Also, unlike whisper-merge, try to take the highest-precision archive to provide the data, instead of the one with the largest retention. ``` Usage: whisper-fill.py [options] src_path dst_path Options: -h, --help show this help message and exit ``` whisper-resize.py ----------------- Change the retention rates of an existing whisper file. ``` Usage: whisper-resize.py path timePerPoint:timeToStore [timePerPoint:timeToStore]* timePerPoint and timeToStore specify lengths of time, for example: 60:1440 60 seconds per datapoint, 1440 datapoints = 1 day of retention 15m:8 15 minutes per datapoint, 8 datapoints = 2 hours of retention 1h:7d 1 hour per datapoint, 7 days of retention 12h:2y 12 hours per datapoint, 2 years of retention Options: -h, --help show this help message and exit --xFilesFactor=XFILESFACTOR Change the xFilesFactor --aggregationMethod=AGGREGATIONMETHOD Change the aggregation function (average, sum, last, max, min, avg_zero, absmax, absmin) --force Perform a destructive change --newfile=NEWFILE Create a new database file without removing the existing one --nobackup Delete the .bak file after successful execution --aggregate Try to aggregate the values to fit the new archive better. Note that this will make things slower and use more memory. ``` whisper-set-aggregation-method.py --------------------------------- Change the aggregation method of an existing whisper file. ``` Usage: whisper-set-aggregation-method.py path Options: -h, --help show this help message and exit ``` whisper-update.py ----------------- Update a whisper file with 1 or many values, must provide a time stamp with the value. ``` Usage: whisper-update.py [options] path timestamp:value [timestamp:value]* Options: -h, --help show this help message and exit ``` whisper-diff.py --------------- Check the differences between whisper files. Use sanity check before merging. ``` Usage: whisper-diff.py [options] path_a path_b Options: -h, --help show this help message and exit --summary show summary of differences --ignore-empty skip comparison if either value is undefined --columns print output in simple columns --no-headers do not print column headers --until=UNTIL Unix epoch time of the end of your requested interval (default: now) --json Output results in JSON form ``` ## License Whisper is licensed under version 2.0 of the Apache License. See the [LICENSE](https://github.com/graphite-project/carbon/blob/master/LICENSE) file for details. ././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1653242170.0 whisper-1.1.10/README.md0000664000000000000000000002005000000000000014470 0ustar00rootroot00000000000000# Whisper [![Codacy Badge](https://api.codacy.com/project/badge/Grade/f00d0b65802742e29de56f3744503ab0)](https://www.codacy.com/app/graphite-project/whisper?utm_source=github.com&utm_medium=referral&utm_content=graphite-project/whisper&utm_campaign=badger) [![Build Status](https://secure.travis-ci.org/graphite-project/whisper.png)](http://travis-ci.org/graphite-project/whisper) [![FOSSA Status](https://app.fossa.io/api/projects/git%2Bhttps%3A%2F%2Fgithub.com%2Fgraphite-project%2Fwhisper.svg?type=shield)](https://app.fossa.io/projects/git%2Bhttps%3A%2F%2Fgithub.com%2Fgraphite-project%2Fwhisper?ref=badge_shield) ## Overview Whisper is one of three components within the Graphite project: 1. [Graphite-Web](https://github.com/graphite-project/graphite-web), a Django-based web application that renders graphs and dashboards 2. The [Carbon](https://github.com/graphite-project/carbon) metric processing daemons 3. The Whisper time-series database library ![Graphite Components](https://github.com/graphite-project/graphite-web/raw/master/webapp/content/img/overview.png "Graphite Components") Whisper is a fixed-size database, similar in design and purpose to RRD (round-robin-database). It provides fast, reliable storage of numeric data over time. Whisper allows for higher resolution (seconds per point) of recent data to degrade into lower resolutions for long-term retention of historical data. ## Installation, Configuration and Usage Please refer to the instructions at [readthedocs](http://graphite.readthedocs.org/). ## Whisper Scripts rrd2whisper.py -------------- Convert a rrd file into a whisper (.wsp) file. ``` Usage: rrd2whisper.py rrd_path Options: -h, --help show this help message and exit --xFilesFactor=XFILESFACTOR The xFilesFactor to use in the output file. Defaults to the input RRD's xFilesFactor --aggregationMethod=AGGREGATIONMETHOD The consolidation function to fetch from on input and aggregationMethod to set on output. One of: average, last, max, min, avg_zero, absmax, absmin --destinationPath=DESTINATIONPATH Path to place created whisper file. Defaults to the RRD file's source path. ``` whisper-create.py ----------------- Create a new whisper database file. ``` Usage: whisper-create.py path timePerPoint:timeToStore [timePerPoint:timeToStore]* whisper-create.py --estimate timePerPoint:timeToStore [timePerPoint:timeToStore]* timePerPoint and timeToStore specify lengths of time, for example: 60:1440 60 seconds per datapoint, 1440 datapoints = 1 day of retention 15m:8 15 minutes per datapoint, 8 datapoints = 2 hours of retention 1h:7d 1 hour per datapoint, 7 days of retention 12h:2y 12 hours per datapoint, 2 years of retention Options: -h, --help show this help message and exit --xFilesFactor=XFILESFACTOR --aggregationMethod=AGGREGATIONMETHOD Function to use when aggregating values (average, sum, last, max, min, avg_zero, absmax, absmin) --overwrite --estimate Don't create a whisper file, estimate storage requirements based on archive definitions ``` whisper-dump.py --------------- Dump the whole whisper file content to stdout. ``` Usage: whisper-dump.py path Options: -h, --help show this help message and exit --pretty Show human-readable timestamps instead of unix times -t TIME_FORMAT, --time-format=TIME_FORMAT Time format to use with --pretty; see time.strftime() -r, --raw Dump value only in the same format for whisper-update (UTC timestamps) ``` whisper-fetch.py ---------------- Fetch all the metrics stored in a whisper file to stdout. ``` Usage: whisper-fetch.py [options] path Options: -h, --help show this help message and exit --from=_FROM Unix epoch time of the beginning of your requested interval (default: 24 hours ago) --until=UNTIL Unix epoch time of the end of your requested interval (default: now) --json Output results in JSON form --pretty Show human-readable timestamps instead of unix times -t TIME_FORMAT, --time-format=TIME_FORMAT Time format to use with --pretty; see time.strftime() --drop=DROP Specify 'nulls' to drop all null values. Specify 'zeroes' to drop all zero values. Specify 'empty' to drop both null and zero values. ``` whisper-info.py --------------- Dump the metadata about a whisper file to stdout. ``` Usage: whisper-info.py [options] path [field] Options: -h, --help show this help message and exit --json Output results in JSON form ``` whisper-merge.py ---------------- Join two existing whisper files together. ``` Usage: whisper-merge.py [options] from_path to_path Options: -h, --help show this help message and exit ``` whisper-fill.py ---------------- Copies data from src in dst, if missing. Unlike whisper-merge, don't overwrite data that's already present in the target file, but instead, only add the missing data (e.g. where the gaps in the target file are). Because no values are overwritten, no data or precision gets lost. Also, unlike whisper-merge, try to take the highest-precision archive to provide the data, instead of the one with the largest retention. ``` Usage: whisper-fill.py [options] src_path dst_path Options: -h, --help show this help message and exit ``` whisper-resize.py ----------------- Change the retention rates of an existing whisper file. ``` Usage: whisper-resize.py path timePerPoint:timeToStore [timePerPoint:timeToStore]* timePerPoint and timeToStore specify lengths of time, for example: 60:1440 60 seconds per datapoint, 1440 datapoints = 1 day of retention 15m:8 15 minutes per datapoint, 8 datapoints = 2 hours of retention 1h:7d 1 hour per datapoint, 7 days of retention 12h:2y 12 hours per datapoint, 2 years of retention Options: -h, --help show this help message and exit --xFilesFactor=XFILESFACTOR Change the xFilesFactor --aggregationMethod=AGGREGATIONMETHOD Change the aggregation function (average, sum, last, max, min, avg_zero, absmax, absmin) --force Perform a destructive change --newfile=NEWFILE Create a new database file without removing the existing one --nobackup Delete the .bak file after successful execution --aggregate Try to aggregate the values to fit the new archive better. Note that this will make things slower and use more memory. ``` whisper-set-aggregation-method.py --------------------------------- Change the aggregation method of an existing whisper file. ``` Usage: whisper-set-aggregation-method.py path Options: -h, --help show this help message and exit ``` whisper-update.py ----------------- Update a whisper file with 1 or many values, must provide a time stamp with the value. ``` Usage: whisper-update.py [options] path timestamp:value [timestamp:value]* Options: -h, --help show this help message and exit ``` whisper-diff.py --------------- Check the differences between whisper files. Use sanity check before merging. ``` Usage: whisper-diff.py [options] path_a path_b Options: -h, --help show this help message and exit --summary show summary of differences --ignore-empty skip comparison if either value is undefined --columns print output in simple columns --no-headers do not print column headers --until=UNTIL Unix epoch time of the end of your requested interval (default: now) --json Output results in JSON form ``` ## License Whisper is licensed under version 2.0 of the Apache License. See the [LICENSE](https://github.com/graphite-project/carbon/blob/master/LICENSE) file for details. ././@PaxHeader0000000000000000000000000000003400000000000011452 xustar000000000000000028 mtime=1653243593.8137162 whisper-1.1.10/bin/0000755000000000000000000000000000000000000013762 5ustar00rootroot00000000000000././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1653242170.0 whisper-1.1.10/bin/find-corrupt-whisper-files.py0000664000000000000000000000344600000000000021540 0ustar00rootroot00000000000000#!/usr/bin/env python # encoding: utf-8 """Find and (optionally) delete corrupt Whisper data files""" from __future__ import absolute_import, print_function, unicode_literals import argparse import os import sys import whisper def walk_dir(base_dir, delete_corrupt=False, verbose=False): for dirpath, dirnames, filenames in os.walk(base_dir): if verbose: print("Scanning %s…" % dirpath) whisper_files = (os.path.join(dirpath, i) for i in filenames if i.endswith('.wsp')) for f in whisper_files: try: info = whisper.info(f) except whisper.CorruptWhisperFile: if delete_corrupt: print('Deleting corrupt Whisper file: %s' % f, file=sys.stderr) os.unlink(f) else: print('Corrupt Whisper file: %s' % f, file=sys.stderr) continue if verbose: print('%s: %d points' % (f, sum(i['points'] for i in info.get('archives', {})))) if __name__ == "__main__": parser = argparse.ArgumentParser(description=__doc__.strip()) parser.add_argument('--delete-corrupt', default=False, action='store_true', help='Delete reported files') parser.add_argument('--verbose', default=False, action='store_true', help='Display progress info') parser.add_argument('directories', type=str, nargs='+', metavar='WHISPER_DIR', help='Directory containing Whisper files') args = parser.parse_args() for d in args.directories: d = os.path.realpath(d) if not os.path.isdir(d): parser.error("%d is not a directory!") walk_dir(d, delete_corrupt=args.delete_corrupt, verbose=args.verbose) ././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1653242170.0 whisper-1.1.10/bin/rrd2whisper.py0000775000000000000000000001150600000000000016617 0ustar00rootroot00000000000000#!/usr/bin/env python import errno import os import sys import time import signal import optparse try: import rrdtool except ImportError as exc: raise SystemExit('[ERROR] Missing dependency: %s' % str(exc)) try: import whisper except ImportError: raise SystemExit('[ERROR] Please make sure whisper is installed properly') # Ignore SIGPIPE signal.signal(signal.SIGPIPE, signal.SIG_DFL) aggregationMethods = list(whisper.aggregationMethods) # RRD doesn't have a 'sum' or 'total' type aggregationMethods.remove('sum') # RRD doesn't have a 'absmax' type aggregationMethods.remove('absmax') # RRD doesn't have a 'absmin' type aggregationMethods.remove('absmin') option_parser = optparse.OptionParser(usage='''%prog rrd_path''') option_parser.add_option( '--xFilesFactor', help="The xFilesFactor to use in the output file. " + "Defaults to the input RRD's xFilesFactor", default=None, type='float') option_parser.add_option( '--aggregationMethod', help="The consolidation function to fetch from on input and " + "aggregationMethod to set on output. One of: %s" % ', '.join(aggregationMethods), default='average', type='string') option_parser.add_option( '--destinationPath', help="Path to place created whisper file. Defaults to the " + "RRD file's source path.", default=None, type='string') (options, args) = option_parser.parse_args() if len(args) < 1: option_parser.print_help() sys.exit(1) rrd_path = args[0] try: rrd_info = rrdtool.info(rrd_path) except rrdtool.error as exc: raise SystemExit('[ERROR] %s' % str(exc)) seconds_per_pdp = rrd_info['step'] # Reconcile old vs new python-rrdtool APIs (yuck) # leave consistent 'rras' and 'datasources' lists if 'rra' in rrd_info: rras = rrd_info['rra'] else: rra_indices = [] for key in rrd_info: if key.startswith('rra['): index = int(key.split('[')[1].split(']')[0]) rra_indices.append(index) rra_count = max(rra_indices) + 1 rras = [] for i in range(rra_count): rra_info = {} rra_info['pdp_per_row'] = rrd_info['rra[%d].pdp_per_row' % i] rra_info['rows'] = rrd_info['rra[%d].rows' % i] rra_info['cf'] = rrd_info['rra[%d].cf' % i] rra_info['xff'] = rrd_info['rra[%d].xff' % i] rras.append(rra_info) if 'ds' in rrd_info: datasources = rrd_info['ds'].keys() else: ds_keys = [key for key in rrd_info if key.startswith('ds[')] datasources = list(set(key[3:].split(']')[0] for key in ds_keys)) # Grab the archive configuration relevant_rras = [] for rra in rras: if rra['cf'] == options.aggregationMethod.upper(): relevant_rras.append(rra) if not relevant_rras: err = "[ERROR] Unable to find any RRAs with consolidation function: %s" % \ options.aggregationMethod.upper() raise SystemExit(err) archives = [] xFilesFactor = options.xFilesFactor for rra in relevant_rras: precision = rra['pdp_per_row'] * seconds_per_pdp points = rra['rows'] if not xFilesFactor: xFilesFactor = rra['xff'] archives.append((precision, points)) for datasource in datasources: now = int(time.time()) suffix = '_%s' % datasource if len(datasources) > 1 else '' if options.destinationPath: destination_path = options.destinationPath if not os.path.isdir(destination_path): try: os.makedirs(destination_path) except OSError as exc: # Python >2.5 if exc.errno == errno.EEXIST and os.path.isdir(destination_path): pass else: raise rrd_file = os.path.basename(rrd_path).replace('.rrd', '%s.wsp' % suffix) path = destination_path + '/' + rrd_file else: path = rrd_path.replace('.rrd', '%s.wsp' % suffix) try: whisper.create(path, archives, xFilesFactor=xFilesFactor) except whisper.InvalidConfiguration as e: raise SystemExit('[ERROR] %s' % str(e)) size = os.stat(path).st_size archiveConfig = ','.join(["%d:%d" % ar for ar in archives]) print("Created: %s (%d bytes) with archives: %s" % (path, size, archiveConfig)) print("Migrating data") archiveNumber = len(archives) - 1 for precision, points in reversed(archives): retention = precision * points endTime = now - now % precision startTime = endTime - retention (time_info, columns, rows) = rrdtool.fetch( rrd_path, options.aggregationMethod.upper(), '-r', str(precision), '-s', str(startTime), '-e', str(endTime)) column_index = list(columns).index(datasource) rows.pop() # remove the last datapoint because RRD sometimes gives funky values values = [row[column_index] for row in rows] timestamps = list(range(*time_info)) datapoints = zip(timestamps, values) datapoints = [datapoint for datapoint in datapoints if datapoint[1] is not None] print(' migrating %d datapoints from archive %d' % (len(datapoints), archiveNumber)) archiveNumber -= 1 whisper.update_many(path, datapoints) ././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1653242170.0 whisper-1.1.10/bin/whisper-create.py0000775000000000000000000000652200000000000017270 0ustar00rootroot00000000000000#!/usr/bin/env python import os import sys import signal import optparse import math try: import whisper except ImportError: raise SystemExit('[ERROR] Please make sure whisper is installed properly') def byte_format(num): for x in ['bytes', 'KB', 'MB']: if num < 1024.0: return "%.3f%s" % (num, x) num /= 1024.0 return "%.3f%s" % (num, 'GB') # Ignore SIGPIPE try: signal.signal(signal.SIGPIPE, signal.SIG_DFL) except AttributeError: # OS=windows pass option_parser = optparse.OptionParser( usage='''%prog path timePerPoint:timeToStore [timePerPoint:timeToStore]* %prog --estimate timePerPoint:timeToStore [timePerPoint:timeToStore]* timePerPoint and timeToStore specify lengths of time, for example: 60:1440 60 seconds per datapoint, 1440 datapoints = 1 day of retention 15m:8 15 minutes per datapoint, 8 datapoints = 2 hours of retention 1h:7d 1 hour per datapoint, 7 days of retention 12h:2y 12 hours per datapoint, 2 years of retention ''') option_parser.add_option('--xFilesFactor', default=0.5, type='float') option_parser.add_option('--aggregationMethod', default='average', type='string', help="Function to use when aggregating values (%s)" % ', '.join(whisper.aggregationMethods)) option_parser.add_option('--overwrite', default=False, action='store_true') option_parser.add_option('--estimate', default=False, action='store_true', help="Don't create a whisper file, estimate storage " "requirements based on archive definitions") option_parser.add_option('--sparse', default=False, action='store_true', help="Create new whisper as sparse file") option_parser.add_option('--fallocate', default=False, action='store_true', help="Create new whisper and use fallocate") (options, args) = option_parser.parse_args() if options.estimate: if len(args) == 0: option_parser.print_usage() sys.exit(1) if len(args) == 1 and args[0].find(",") > 0: args = args[0].split(",") archives = 0 total_points = 0 for (precision, points) in map(whisper.parseRetentionDef, args): print("Archive %s: %s points of %ss precision" % (archives, points, precision)) archives += 1 total_points += points size = 16 + (archives * 12) + (total_points * 12) disk_size = int(math.ceil(size / 4096.0) * 4096) print("\nEstimated Whisper DB Size: %s (%s bytes on disk with 4k blocks)\n" % (byte_format(size), disk_size)) for x in [1, 5, 10, 50, 100, 500]: print("Estimated storage requirement for %sk metrics: %s" % (x, byte_format(x * 1000 * disk_size))) sys.exit(0) if len(args) < 2: option_parser.print_help() sys.exit(1) path = args[0] archives = [whisper.parseRetentionDef(retentionDef) for retentionDef in args[1:]] if os.path.exists(path) and options.overwrite: print('Overwriting existing file: %s' % path) os.unlink(path) try: whisper.create(path, archives, xFilesFactor=options.xFilesFactor, aggregationMethod=options.aggregationMethod, sparse=options.sparse, useFallocate=options.fallocate) except whisper.WhisperException as exc: raise SystemExit('[ERROR] %s' % str(exc)) size = os.stat(path).st_size print('Created: %s (%d bytes)' % (path, size)) ././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1653242170.0 whisper-1.1.10/bin/whisper-diff.py0000775000000000000000000001003000000000000016722 0ustar00rootroot00000000000000#!/usr/bin/python -tt import sys import optparse import json try: import whisper except ImportError: raise SystemExit('[ERROR] Please make sure whisper is installed properly') option_parser = optparse.OptionParser(usage='''%prog [options] path_a path_b''') option_parser.add_option('--summary', default=False, action='store_true', help="show summary of differences") option_parser.add_option('--ignore-empty', default=False, action='store_true', help="skip comparison if either value is undefined") option_parser.add_option('--columns', default=False, action='store_true', help="print output in simple columns") option_parser.add_option('--no-headers', default=False, action='store_true', help="do not print column headers") option_parser.add_option('--until', default=None, type='int', help="Unix epoch time of the end of your requested " "interval (default: None)") option_parser.add_option('--json', default=False, action='store_true', help="Output results in JSON form") (options, args) = option_parser.parse_args() if len(args) != 2: option_parser.print_help() sys.exit(1) (path_a, path_b) = args[0::1] if options.until: until_time = int(options.until) else: until_time = None def print_diffs(diffs, pretty=True, headers=True): if pretty: h = "%7s %11s %13s %13s\n" f = "%7s %11d %13s %13s\n" else: h = "%s %s %s %s\n" f = "%s %d %s %s\n" if headers: sys.stdout.write(h % ('archive', 'timestamp', 'value_a', 'value_b')) for archive, points, total in diffs: if pretty: sys.stdout.write('Archive %d (%d of %d datapoints differ)\n' % (archive, points.__len__(), total)) sys.stdout.write(h % ('', 'timestamp', 'value_a', 'value_b')) for p in points: if pretty: sys.stdout.write(f % ('', p[0], p[1], p[2])) else: sys.stdout.write(f % (archive, p[0], p[1], p[2])) def print_summary(diffs, pretty=True, headers=True): if pretty: f = "%7s %9s %9s\n" else: f = "%s %s %s\n" if headers: sys.stdout.write(f % ('archive', 'total', 'differing')) for archive, points, total in diffs: sys.stdout.write(f % (archive, total, points.__len__())) def print_summary_json(diffs, path_a, path_b): print(json.dumps({'path_a': path_a, 'path_b': path_b, 'archives': [{'archive': archive, 'total': total, 'points': points.__len__()} for archive, points, total in diffs]}, sort_keys=True, indent=2, separators=(',', ' : '))) def print_diffs_json(diffs, path_a, path_b): print(json.dumps({'path_a': path_a, 'path_b': path_b, 'archives': [{'archive': archive, 'total': total, 'points': points.__len__(), 'datapoint': [{ 'timestamp': p[0], 'value_a': p[1], 'value_b': p[2] } for p in points]} for archive, points, total in diffs]}, sort_keys=True, indent=2, separators=(',', ' : '))) def main(): archive_diffs = whisper.diff(path_a, path_b, ignore_empty=options.ignore_empty, until_time=until_time) if options.summary: if options.json: print_summary_json(archive_diffs, path_a, path_b) else: print_summary(archive_diffs, pretty=(not options.columns), headers=(not options.no_headers)) else: if options.json: print_diffs_json(archive_diffs, path_a, path_b) else: print_diffs(archive_diffs, pretty=(not options.columns), headers=(not options.no_headers)) if __name__ == "__main__": main() ././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1653242170.0 whisper-1.1.10/bin/whisper-dump.py0000775000000000000000000000770200000000000016773 0ustar00rootroot00000000000000#!/usr/bin/env python import os import mmap import time import struct import signal import sys import optparse try: import whisper except ImportError: raise SystemExit('[ERROR] Please make sure whisper is installed properly') if sys.version_info >= (3, 0): xrange = range # Ignore SIGPIPE signal.signal(signal.SIGPIPE, signal.SIG_DFL) option_parser = optparse.OptionParser(usage='''%prog path''') option_parser.add_option( '--pretty', default=False, action='store_true', help="Show human-readable timestamps instead of unix times") option_parser.add_option( '-t', '--time-format', action='store', type='string', dest='time_format', help='Time format to use with --pretty; see time.strftime()') option_parser.add_option( '-r', '--raw', default=False, action='store_true', help='Dump value only in the same format for whisper-update (UTC timestamps)') (options, args) = option_parser.parse_args() if len(args) != 1: option_parser.error("require one input file name") else: path = args[0] def mmap_file(filename): fd = os.open(filename, os.O_RDONLY) map = mmap.mmap(fd, os.fstat(fd).st_size, prot=mmap.PROT_READ) os.close(fd) return map def read_header(map): try: (aggregationType, maxRetention, xFilesFactor, archiveCount) \ = struct.unpack(whisper.metadataFormat, map[:whisper.metadataSize]) except (struct.error, ValueError, TypeError): raise whisper.CorruptWhisperFile("Unable to unpack header") archives = [] archiveOffset = whisper.metadataSize for i in xrange(archiveCount): try: (offset, secondsPerPoint, points) = struct.unpack( whisper.archiveInfoFormat, map[archiveOffset:archiveOffset + whisper.archiveInfoSize] ) except (struct.error, ValueError, TypeError): raise whisper.CorruptWhisperFile("Unable to read archive %d metadata" % i) archiveInfo = { 'offset': offset, 'secondsPerPoint': secondsPerPoint, 'points': points, 'retention': secondsPerPoint * points, 'size': points * whisper.pointSize, } archives.append(archiveInfo) archiveOffset += whisper.archiveInfoSize header = { 'aggregationMethod': whisper.aggregationTypeToMethod.get(aggregationType, 'average'), 'maxRetention': maxRetention, 'xFilesFactor': xFilesFactor, 'archives': archives, } return header def dump_header(header): print('Meta data:') print(' aggregation method: %s' % header['aggregationMethod']) print(' max retention: %d' % header['maxRetention']) print(' xFilesFactor: %g' % header['xFilesFactor']) print("") dump_archive_headers(header['archives']) def dump_archive_headers(archives): for i, archive in enumerate(archives): print('Archive %d info:' % i) print(' offset: %d' % archive['offset']) print(' seconds per point: %d' % archive['secondsPerPoint']) print(' points: %d' % archive['points']) print(' retention: %d' % archive['retention']) print(' size: %d' % archive['size']) print("") def dump_archives(archives, options): for i, archive in enumerate(archives): if not options.raw: print('Archive %d data:' % i) offset = archive['offset'] for point in xrange(archive['points']): (timestamp, value) = struct.unpack( whisper.pointFormat, map[offset:offset + whisper.pointSize] ) if options.pretty: if options.time_format: timestr = time.localtime(timestamp) timestr = time.strftime(options.time_format, timestr) else: timestr = time.ctime(timestamp) else: timestr = str(timestamp) if options.raw: print('%s:%.35g' % (timestamp, value)) else: print('%d: %s, %10.35g' % (point, timestr, value)) offset += whisper.pointSize print if not os.path.exists(path): raise SystemExit('[ERROR] File "%s" does not exist!' % path) map = mmap_file(path) header = read_header(map) if not options.raw: dump_header(header) dump_archives(header['archives'], options) ././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1653242170.0 whisper-1.1.10/bin/whisper-fetch.py0000775000000000000000000000515100000000000017113 0ustar00rootroot00000000000000#!/usr/bin/env python import sys import time import signal import optparse try: import whisper except ImportError: raise SystemExit('[ERROR] Please make sure whisper is installed properly') _DROP_FUNCTIONS = { 'zeroes': lambda x: x != 0, 'nulls': lambda x: x is not None, 'empty': lambda x: x != 0 and x is not None } # Ignore SIGPIPE signal.signal(signal.SIGPIPE, signal.SIG_DFL) now = int(time.time()) yesterday = now - (60 * 60 * 24) option_parser = optparse.OptionParser(usage='''%prog [options] path''') option_parser.add_option( '--from', default=yesterday, type='int', dest='_from', help=("Unix epoch time of the beginning of " "your requested interval (default: 24 hours ago)")) option_parser.add_option( '--until', default=now, type='int', help="Unix epoch time of the end of your requested interval (default: now)") option_parser.add_option( '--json', default=False, action='store_true', help="Output results in JSON form") option_parser.add_option( '--pretty', default=False, action='store_true', help="Show human-readable timestamps instead of unix times") option_parser.add_option( '-t', '--time-format', action='store', type='string', dest='time_format', help='Time format to use with --pretty; see time.strftime()') option_parser.add_option( '--drop', choices=list(_DROP_FUNCTIONS.keys()), action='store', help="Specify 'nulls' to drop all null values. " "Specify 'zeroes' to drop all zero values. " "Specify 'empty' to drop both null and zero values") (options, args) = option_parser.parse_args() if len(args) != 1: option_parser.print_help() sys.exit(1) path = args[0] from_time = int(options._from) until_time = int(options.until) try: data = whisper.fetch(path, from_time, until_time) if not data: raise SystemExit('No data in selected timerange') (timeInfo, values) = data except (whisper.WhisperException, IOError) as exc: raise SystemExit('[ERROR] %s' % str(exc)) if options.drop: fcn = _DROP_FUNCTIONS.get(options.drop) values = [x for x in values if fcn(x)] (start, end, step) = timeInfo if options.json: values_json = str(values).replace('None', 'null') print('''{ "start" : %d, "end" : %d, "step" : %d, "values" : %s }''' % (start, end, step, values_json)) sys.exit(0) t = start for value in values: if options.pretty: if options.time_format: timestr = time.strftime(options.time_format, time.localtime(t)) else: timestr = time.ctime(t) else: timestr = str(t) if value is None: valuestr = "None" else: valuestr = "%f" % value print("%s\t%s" % (timestr, valuestr)) t += step ././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1653242170.0 whisper-1.1.10/bin/whisper-fill.py0000775000000000000000000001064000000000000016747 0ustar00rootroot00000000000000#!/usr/bin/env python # whisper-fill: unlike whisper-merge, don't overwrite data that's # already present in the target file, but instead, only add the missing # data (e.g. where the gaps in the target file are). Because no values # are overwritten, no data or precision gets lost. Also, unlike # whisper-merge, try to take the highest-precision archive to provide # the data, instead of the one with the largest retention. # Using this script, reconciliation between two replica instances can be # performed by whisper-fill-ing the data of the other replica with the # data that exists locally, without introducing the quite remarkable # gaps that whisper-merge leaves behind (filling a higher precision # archive with data from a lower precision one) # Work performed by Fabian Groffen @grobian while working at Booking.com. # additional patches are from https://github.com/jssjr/carbonate/ import whisper try: from whisper import operator HAS_OPERATOR = True except ImportError: HAS_OPERATOR = False import time import sys import optparse if sys.version_info >= (3, 0): xrange = range else: from future_builtins import filter, zip def itemgetter(*items): if HAS_OPERATOR: return operator.itemgetter(*items) else: if len(items) == 1: item = items[0] def g(obj): return obj[item] else: def g(obj): return tuple(obj[item] for item in items) return g def fill(src, dst, tstart, tstop): # fetch range start-stop from src, taking values from the highest # precision archive, thus optionally requiring multiple fetch + merges srcHeader = whisper.info(src) srcArchives = srcHeader['archives'] srcArchives.sort(key=itemgetter('retention')) # find oldest point in time, stored by both files srcTime = int(time.time()) - srcHeader['maxRetention'] if tstart < srcTime and tstop < srcTime: return # we want to retain as much precision as we can, hence we do backwards # walk in time # skip forward at max 'step' points at a time for archive in srcArchives: # skip over archives that don't have any data points rtime = time.time() - archive['retention'] if tstop <= rtime: continue untilTime = tstop fromTime = rtime if rtime > tstart else tstart (timeInfo, values) = whisper.fetch(src, fromTime, untilTime) (start, end, archive_step) = timeInfo pointsToWrite = list(filter( lambda points: points[1] is not None, zip(xrange(start, end, archive_step), values))) # order points by timestamp, newest first pointsToWrite.sort(key=lambda p: p[0], reverse=True) whisper.update_many(dst, pointsToWrite) tstop = fromTime # can stop when there's nothing to fetch any more if tstart == tstop: return def fill_archives(src, dst, startFrom): header = whisper.info(dst) archives = header['archives'] archives = sorted(archives, key=lambda t: t['retention']) for archive in archives: fromTime = time.time() - archive['retention'] if fromTime >= startFrom: continue (timeInfo, values) = whisper.fetch(dst, fromTime, startFrom) (start, end, step) = timeInfo gapstart = None for v in values: if not v and not gapstart: gapstart = start elif v and gapstart: # ignore single units lost if (start - gapstart) > archive['secondsPerPoint']: fill(src, dst, gapstart - step, start) gapstart = None elif gapstart and start == end - step: fill(src, dst, gapstart - step, start) start += step startFrom = fromTime def main(): option_parser = optparse.OptionParser( usage='%prog [--lock] src dst', description='copies data from src in dst, if missing') option_parser.add_option( '--lock', help='Lock whisper files', default=False, action='store_true') (options, args) = option_parser.parse_args() if len(args) != 2: option_parser.print_help() sys.exit(1) if options.lock is True and whisper.CAN_LOCK: whisper.LOCK = True src = args[0] dst = args[1] startFrom = time.time() fill_archives(src, dst, startFrom) if __name__ == "__main__": main() ././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1653242170.0 whisper-1.1.10/bin/whisper-info.py0000775000000000000000000000257000000000000016757 0ustar00rootroot00000000000000#!/usr/bin/env python import os import sys import signal import optparse import json try: import whisper except ImportError: raise SystemExit('[ERROR] Please make sure whisper is installed properly') # Ignore SIGPIPE try: signal.signal(signal.SIGPIPE, signal.SIG_DFL) except AttributeError: # OS=windows pass option_parser = optparse.OptionParser(usage='''%prog [options] path [field]''') option_parser.add_option('--json', default=False, action='store_true', help="Output results in JSON form") (options, args) = option_parser.parse_args() if len(args) < 1: option_parser.print_help() sys.exit(1) path = args[0] if len(args) > 1: field = args[1] else: field = None try: info = whisper.info(path) except whisper.WhisperException as exc: raise SystemExit('[ERROR] %s' % str(exc)) info['fileSize'] = os.stat(path).st_size if field: if field not in info: print('Unknown field "%s". Valid fields are %s' % (field, ','.join(info))) sys.exit(1) print(info[field]) sys.exit(0) if options.json: print(json.dumps(info, indent=2, separators=(',', ': '))) else: archives = info.pop('archives') for key, value in info.items(): print('%s: %s' % (key, value)) print('') for i, archive in enumerate(archives): print('Archive %d' % i) for key, value in archive.items(): print('%s: %s' % (key, value)) print('') ././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1653242170.0 whisper-1.1.10/bin/whisper-merge.py0000775000000000000000000000171200000000000017120 0ustar00rootroot00000000000000#!/usr/bin/env python import os import sys import signal import optparse try: import whisper except ImportError: raise SystemExit('[ERROR] Please make sure whisper is installed properly') # Ignore SIGPIPE signal.signal(signal.SIGPIPE, signal.SIG_DFL) option_parser = optparse.OptionParser( usage='''%prog [options] from_path to_path''') option_parser.add_option( '--from', default=None, type='int', dest='_from', help=("Begining of interval, unix timestamp (default: epoch)")) option_parser.add_option( '--until', default=None, type='int', help="End of interval, unix timestamp (default: now)") (options, args) = option_parser.parse_args() if len(args) < 2: option_parser.print_help() sys.exit(1) path_from = args[0] path_to = args[1] for filename in (path_from, path_to): if not os.path.exists(filename): raise SystemExit('[ERROR] File "%s" does not exist!' % filename) whisper.merge(path_from, path_to, options._from, options.until) ././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1653242170.0 whisper-1.1.10/bin/whisper-resize.py0000775000000000000000000001401200000000000017317 0ustar00rootroot00000000000000#!/usr/bin/env python import os import sys import time import bisect import signal import optparse import traceback try: import whisper except ImportError: raise SystemExit('[ERROR] Please make sure whisper is installed properly') # Ignore SIGPIPE signal.signal(signal.SIGPIPE, signal.SIG_DFL) now = int(time.time()) option_parser = optparse.OptionParser( usage='''%prog path timePerPoint:timeToStore [timePerPoint:timeToStore]* timePerPoint and timeToStore specify lengths of time, for example: 60:1440 60 seconds per datapoint, 1440 datapoints = 1 day of retention 15m:8 15 minutes per datapoint, 8 datapoints = 2 hours of retention 1h:7d 1 hour per datapoint, 7 days of retention 12h:2y 12 hours per datapoint, 2 years of retention ''') option_parser.add_option( '--xFilesFactor', default=None, type='float', help="Change the xFilesFactor") option_parser.add_option( '--aggregationMethod', default=None, type='string', help="Change the aggregation function (%s)" % ', '.join(whisper.aggregationMethods)) option_parser.add_option( '--force', default=False, action='store_true', help="Perform a destructive change") option_parser.add_option( '--newfile', default=None, action='store', help="Create a new database file without removing the existing one") option_parser.add_option( '--nobackup', action='store_true', help='Delete the .bak file after successful execution') option_parser.add_option( '--aggregate', action='store_true', help='Try to aggregate the values to fit the new archive better.' ' Note that this will make things slower and use more memory.') (options, args) = option_parser.parse_args() if len(args) < 2: option_parser.print_help() sys.exit(1) path = args[0] if not os.path.exists(path): sys.stderr.write("[ERROR] File '%s' does not exist!\n\n" % path) option_parser.print_help() sys.exit(1) info = whisper.info(path) new_archives = [whisper.parseRetentionDef(retentionDef) for retentionDef in args[1:]] old_archives = info['archives'] # sort by precision, lowest to highest old_archives.sort(key=lambda a: a['secondsPerPoint'], reverse=True) if options.xFilesFactor is None: xff = info['xFilesFactor'] else: xff = options.xFilesFactor if options.aggregationMethod is None: aggregationMethod = info['aggregationMethod'] else: aggregationMethod = options.aggregationMethod print('Retrieving all data from the archives') for archive in old_archives: fromTime = now - archive['retention'] + archive['secondsPerPoint'] untilTime = now timeinfo, values = whisper.fetch(path, fromTime, untilTime) archive['data'] = (timeinfo, values) if options.newfile is None: tmpfile = path + '.tmp' if os.path.exists(tmpfile): print('Removing previous temporary database file: %s' % tmpfile) os.unlink(tmpfile) newfile = tmpfile else: newfile = options.newfile print('Creating new whisper database: %s' % newfile) whisper.create(newfile, new_archives, xFilesFactor=xff, aggregationMethod=aggregationMethod) size = os.stat(newfile).st_size print('Created: %s (%d bytes)' % (newfile, size)) if options.aggregate: # This is where data will be interpolated (best effort) print('Migrating data with aggregation...') all_datapoints = [] for archive in sorted(old_archives, key=lambda x: x['secondsPerPoint']): # Loading all datapoints into memory for fast querying timeinfo, values = archive['data'] new_datapoints = list(zip(range(*timeinfo), values)) new_datapoints.reverse() if all_datapoints: last_timestamp = all_datapoints[-1][0] slice_end = 0 for i, (timestamp, value) in enumerate(new_datapoints): if timestamp < last_timestamp: slice_end = i break all_datapoints += new_datapoints[slice_end:] else: all_datapoints += new_datapoints all_datapoints.reverse() oldtimestamps = list(map(lambda p: p[0], all_datapoints)) oldvalues = list(map(lambda p: p[1], all_datapoints)) print("oldtimestamps: %s" % oldtimestamps) # Simply cleaning up some used memory del all_datapoints new_info = whisper.info(newfile) new_archives = new_info['archives'] for archive in new_archives: step = archive['secondsPerPoint'] fromTime = now - archive['retention'] + now % step untilTime = now + now % step + step print("(%s,%s,%s)" % (fromTime, untilTime, step)) timepoints_to_update = range(fromTime, untilTime, step) print("timepoints_to_update: %s" % timepoints_to_update) newdatapoints = [] for tinterval in zip(timepoints_to_update[:-1], timepoints_to_update[1:]): # TODO: Setting lo= parameter for 'lefti' based on righti from previous # iteration. Obviously, this can only be done if # timepoints_to_update is always updated. Is it? lefti = bisect.bisect_left(oldtimestamps, tinterval[0]) righti = bisect.bisect_left(oldtimestamps, tinterval[1], lo=lefti) newvalues = oldvalues[lefti:righti] if newvalues: non_none = list(filter(lambda x: x is not None, newvalues)) if non_none and 1.0 * len(non_none) / len(newvalues) >= xff: newdatapoints.append([tinterval[0], whisper.aggregate(aggregationMethod, non_none, newvalues)]) whisper.update_many(newfile, newdatapoints) else: print('Migrating data without aggregation...') for archive in old_archives: timeinfo, values = archive['data'] datapoints = zip(range(*timeinfo), values) datapoints = filter(lambda p: p[1] is not None, datapoints) whisper.update_many(newfile, datapoints) if options.newfile is not None: sys.exit(0) backup = path + '.bak' print('Renaming old database to: %s' % backup) os.rename(path, backup) try: print('Renaming new database to: %s' % path) os.rename(tmpfile, path) except (OSError): traceback.print_exc() print('\nOperation failed, restoring backup') os.rename(backup, path) sys.exit(1) if options.nobackup: print("Unlinking backup: %s" % backup) os.unlink(backup) ././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1653242170.0 whisper-1.1.10/bin/whisper-set-aggregation-method.py0000775000000000000000000000203300000000000022354 0ustar00rootroot00000000000000#!/usr/bin/env python import sys import signal import optparse try: import whisper except ImportError: raise SystemExit('[ERROR] Please make sure whisper is installed properly') # Ignore SIGPIPE try: signal.signal(signal.SIGPIPE, signal.SIG_DFL) except AttributeError: # windows? pass option_parser = optparse.OptionParser( usage='%%prog path <%s> [xFilesFactor]' % '|'.join(whisper.aggregationMethods)) (options, args) = option_parser.parse_args() if len(args) < 2: option_parser.print_help() sys.exit(1) path = args[0] aggregationMethod = args[1] xFilesFactor = None if len(args) == 3: xFilesFactor = args[2] try: oldAggregationMethod = whisper.setAggregationMethod(path, aggregationMethod, xFilesFactor) except IOError: sys.stderr.write("[ERROR] File '%s' does not exist!\n\n" % path) option_parser.print_help() sys.exit(1) except whisper.WhisperException as exc: raise SystemExit('[ERROR] %s' % str(exc)) print('Updated aggregation method: %s (%s -> %s)' % (path, oldAggregationMethod, aggregationMethod)) ././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1653242170.0 whisper-1.1.10/bin/whisper-set-xfilesfactor.py0000775000000000000000000000163600000000000021310 0ustar00rootroot00000000000000#!/usr/bin/env python import sys import argparse import whisper def main(): """Set xFilesFactor for existing whisper file""" parser = argparse.ArgumentParser( description='Set xFilesFactor for existing whisper file') parser.add_argument('path', type=str, help='path to whisper file') parser.add_argument('xff', metavar='xFilesFactor', type=float, help='new xFilesFactor, a float between 0 and 1') args = parser.parse_args() try: old_xff = whisper.setXFilesFactor(args.path, args.xff) except IOError: sys.stderr.write("[ERROR] File '%s' does not exist!\n\n" % args.path) parser.print_help() sys.exit(1) except whisper.WhisperException as exc: raise SystemExit('[ERROR] %s' % str(exc)) print('Updated xFilesFactor: %s (%s -> %s)' % (args.path, old_xff, args.xff)) if __name__ == "__main__": main() ././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1653242170.0 whisper-1.1.10/bin/whisper-update.py0000775000000000000000000000215100000000000017301 0ustar00rootroot00000000000000#!/usr/bin/env python import sys import time import signal import optparse try: import whisper except ImportError: raise SystemExit('[ERROR] Please make sure whisper is installed properly') # Ignore SIGPIPE signal.signal(signal.SIGPIPE, signal.SIG_DFL) now = int(time.time()) option_parser = optparse.OptionParser( usage='''%prog [options] path [timestamp:value]* If no values are passed as arguments, they are read one-per-line from stdin.''') (options, args) = option_parser.parse_args() if not args: option_parser.print_help() sys.exit(1) path = args[0] if len(args) >= 2: datapoint_strings = args[1:] else: # no argv values, so read from stdin datapoint_strings = sys.stdin datapoint_strings = [point.replace('N:', '%d:' % now) for point in datapoint_strings] datapoints = [tuple(point.split(':')) for point in datapoint_strings] try: if len(datapoints) == 1: timestamp, value = datapoints[0] whisper.update(path, value, timestamp) else: whisper.update_many(path, datapoints) except whisper.WhisperException as exc: raise SystemExit('[ERROR] %s' % str(exc)) ././@PaxHeader0000000000000000000000000000003400000000000011452 xustar000000000000000028 mtime=1653243593.8137162 whisper-1.1.10/contrib/0000755000000000000000000000000000000000000014652 5ustar00rootroot00000000000000././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1653242170.0 whisper-1.1.10/contrib/update-storage-times.py0000775000000000000000000001614400000000000021302 0ustar00rootroot00000000000000#!/usr/bin/env python # @package update_storage_schemas.py # Correct/Update storage schemas\n # @code # # Usage example for update_storage_schemas.py # sudo ./update_storage_schemas.py --path /opt/graphite/whisper --cfg /opt/graphite/conf/schemas # @endcode import sys import os import logging import subprocess import argparse import re import time from multiprocessing import Pool, cpu_count from configobj import ConfigObj # Assuming Python 2, we'll want scandir if possible, it's much faster try: from scandir import scandir except ImportError: from os import listdir as scandir LOG = logging.getLogger() LOG.setLevel(logging.INFO) SCHEMA_LIST = {} # The very basic default retentions DEFAULT_SCHEMA = {'match': re.compile('.*'), 'retentions': '1m:7d'} DEBUG = False DRY_RUN = False ROOT_PATH = "" def config_schemas(cfg): schema_conf = ConfigObj(cfg) for schema in schema_conf.items(): item = schema[1]['pattern'] if item == '.*': DEFAULT_SCHEMA['retentions'] = schema[1]['retentions'] else: if item[0] == '^': item = item[1:] SCHEMA_LIST[item] = {'retentions': schema[1]['retentions'], 'match': re.compile(item)} def _convert_seconds(time): seconds_dict = {'s': 1, 'm': 60, 'h': 3600, 'min': 60, 'd': 86400, 'w': 604800, 'y': 31536000} (points, time) = time.split(':') if str.isalpha(time[-1]): time = int(time[:-1]) * seconds_dict[time[-1]] return time def _compare_retention(retention, tmp_path): # Get the new retention as [(secondsPerPoint, numPoints), ...] new_retention = [_convert_seconds(item) for item in list(retention)] info_string = [INFO_BIN, tmp_path] cur_ret_list = subprocess.Popen(info_string, stdout=subprocess.PIPE) cur_ret_list = cur_ret_list.communicate()[0].split('\n') cur_retention = [int(line.split(':')[1]) for line in cur_ret_list if 'retention' in line] return cur_retention == new_retention def _find_metrics(path): for f in scandir(path): if f.is_dir(follow_symlinks=False): for sf in _find_metrics(f.path): yield sf else: if not f.is_file(follow_symlinks=False) or \ not f.name.endswith('.wsp'): continue yield f.path def fix_metric(metric): if not SCHEMA_LIST: LOG.error("Didn't initialize schemas!") return [] if DEBUG: LOG.info("Testing %s for modification" % metric) devnull = open(os.devnull, 'w') command_string = list(BASE_COMMAND) + [metric] retention = DEFAULT_SCHEMA['retentions'] matching = metric[len(ROOT_PATH):].replace('/', '.') for schema, info in SCHEMA_LIST.iteritems(): if info['match'].search(matching): retention = info['retentions'] break command_string.extend(list(retention)) if DEBUG: LOG.info("Created command: %s" % command_string) if _compare_retention(retention, metric): LOG.debug('%s has the same retention as before!' % metric) return [(False, metric)] if DRY_RUN: res = 0 else: LOG.debug('Retention will be %s' % retention) # record file owner/group and perms to set properly after whisper-resize.py is complete st = os.stat(metric) if DEBUG: res = subprocess.check_call(command_string) else: res = subprocess.check_call(command_string, stdout=devnull) os.chmod(metric, st.st_mode) os.chown(metric, st.st_uid, st.st_gid) devnull.close() # wait for a second, so we don't kill I/O on the host time.sleep(SLEEP) """ We have manual commands for every failed file from these errors, so we can just go through each of these errors after a completed run. There shouldn't be many """ if res != 0: LOG.error('Failed to update schemas for %s' % metric) LOG.error('Attempted retention: %s' % retention) LOG.error('Attempted command string: %s' % command_string) return [(False, metric)] else: return [(True, metric)] def search_and_fix(subdir): if not SCHEMA_LIST: LOG.error("Didn't initialize schemas!") return fpath = os.path.join(ROOT_PATH, subdir) pool = Pool(cpu_count()) LOG.info('Creating new storage schemas for metrics under %s ...' % fpath) results = pool.map(fix_metric, _find_metrics(fpath), 100) pool.close() pool.join() return results # Parse command line options sent to the script def cli_opts(): parser = argparse.ArgumentParser("Correct storage settings on multiple whisper files") parser.add_argument('--cfg', action='store', dest='cfg', help='The storage-schemas.conf file path', required=True) parser.add_argument('--path', action='store', dest='path', help='The root path to find metrics in', required=True) parser.add_argument('--debug', action='store_true', dest='debug', help='Display debug information', default=False) parser.add_argument('--dry-run', action='store_true', dest='dry_run', help="Don't actually do anything", default=False) parser.add_argument('--subdir', action='store', dest='subdir', help="If you only want to process a particular subdir", default='') parser.add_argument('--nobackup', action='store_true', dest='nobackup', help="Passed through to whisper-resize.py, don't create a backup", default=False) parser.add_argument('--aggregate', action='store_true', dest='aggregate', help="Passed through to whisper-resize.py, roll up values", default=False) parser.add_argument('--bindir', action='store', dest='bindir', help="The root path to whisper-resize.py and whisper-info.py", default='/opt/graphite/bin') parser.add_argument('--sleep', action='store', type=float, dest='sleep', help="Sleep this amount of time in seconds between metric comparisons", default=0.3) return parser.parse_args() if __name__ == '__main__': i_args = cli_opts() if os.getenv('USER') != 'root': print("You must run this script as root!") sys.exit(1) if i_args.debug: LOG.setLevel(logging.DEBUG) soh = logging.StreamHandler(sys.stdout) LOG.addHandler(soh) ROOT_PATH = i_args.path DEBUG = i_args.debug DRY_RUN = i_args.dry_run BINDIR = i_args.bindir SLEEP = i_args.sleep RESIZE_BIN = BINDIR + "/whisper-resize.py" INFO_BIN = BINDIR + "/whisper-info.py" BASE_COMMAND = [RESIZE_BIN] if i_args.nobackup: BASE_COMMAND.append('--nobackup') if i_args.aggregate: BASE_COMMAND.append('--aggregate') config_schemas(i_args.cfg) search_and_fix(i_args.subdir) ././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1653242170.0 whisper-1.1.10/contrib/whisper-auto-resize.py0000775000000000000000000002136200000000000021163 0ustar00rootroot00000000000000#!/usr/bin/env python import sys import os import fnmatch import shlex from subprocess import call from optparse import OptionParser from distutils.spawn import find_executable from os.path import basename from six.moves import input # On Debian systems whisper-resize.py is available as whisper-resize whisperResizeExecutable = find_executable("whisper-resize.py") if whisperResizeExecutable is None: whisperResizeExecutable = find_executable("whisper-resize") if whisperResizeExecutable is None: # Probably will fail later, set it nevertheless whisperResizeExecutable = "whisper-resize.py" option_parser = OptionParser( usage='''%prog storagePath configPath storagePath the Path to the directory containing whisper files (CAN NOT BE A SUBDIR, use --subdir for that) configPath the path to your carbon config files ''', version="%prog 0.1") option_parser.add_option( '--doit', default=False, action='store_true', help="This is not a drill, lets do it") option_parser.add_option( '-q', '--quiet', default=False, action='store_true', help="Display extra debugging info") option_parser.add_option( '--subdir', default=None, type='string', help="only process a subdir of whisper files") option_parser.add_option( '--carbonlib', default=None, type='string', help="folder where the carbon lib files are if its not in your path already") option_parser.add_option( '--whisperlib', default=None, type='string', help="folder where the whisper lib files are if its not in your path already") option_parser.add_option( '--confirm', default=False, action='store_true', help="ask for comfirmation prior to resizing a whisper file") option_parser.add_option( '-x', '--extra_args', default='', type='string', help="pass any additional arguments to the %s script" % basename(whisperResizeExecutable)) (options, args) = option_parser.parse_args() if len(args) < 2: option_parser.print_help() sys.exit(1) storagePath = args[0] configPath = args[1] # check to see if we are processing a subfolder # we need to have a separate config option for this since # otherwise the metric test thinks the metric is at the root # of the storage path and can match schemas incorrectly if options.subdir is None: processPath = args[0] else: processPath = options.subdir # Injecting the Whisper Lib Path if needed if options.whisperlib is not None: sys.path.insert(0, options.whisperlib) try: import whisper except ImportError: raise SystemExit('[ERROR] Can\'t find the whisper module, try using ' '--whisperlib to explicitly include the path') # Injecting the Carbon Lib Path if needed if options.carbonlib is not None: sys.path.insert(0, options.carbonlib) try: from carbon.conf import settings except ImportError: raise SystemExit('[ERROR] Can\'t find the carbon module, try using ' '--carbonlib to explicitly include the path') # carbon.conf not seeing the config files so give it a nudge settings.CONF_DIR = configPath settings.LOCAL_DATA_DIR = storagePath # import these once we have the settings figured out from carbon.storage import loadStorageSchemas, loadAggregationSchemas # Load the Defined Schemas from our config files schemas = loadStorageSchemas() agg_schemas = loadAggregationSchemas() # check to see if a metric needs to be resized based on the current config def processMetric(fullPath, schemas, agg_schemas): """ method to process a given metric, and resize it if necessary Parameters: fullPath - full path to the metric whisper file schemas - carbon storage schemas loaded from config agg_schemas - carbon storage aggregation schemas load from confg """ schema_config_args = '' schema_file_args = '' rebuild = False messages = '' # get archive info from whisper file info = whisper.info(fullPath) # get graphite metric name from fullPath metric = getMetricFromPath(fullPath) # loop the carbon-storage schemas for schema in schemas: if schema.matches(metric): # returns secondsPerPoint and points for this schema in tuple format archive_config = [archive.getTuple() for archive in schema.archives] break # loop through the carbon-aggregation schemas for agg_schema in agg_schemas: if agg_schema.matches(metric): xFilesFactor, aggregationMethod = agg_schema.archives break if xFilesFactor is None: xFilesFactor = 0.5 if aggregationMethod is None: aggregationMethod = 'average' # loop through the bucket tuples and convert to string format for resizing for retention in archive_config: current_schema = '%s:%s ' % (retention[0], retention[1]) schema_config_args += current_schema # loop through the current files bucket sizes and convert to string format # to compare for resizing for fileRetention in info['archives']: current_schema = '%s:%s ' % (fileRetention['secondsPerPoint'], fileRetention['points']) schema_file_args += current_schema # check to see if the current and configured schemas are the same or rebuild if (schema_config_args != schema_file_args): rebuild = True messages += 'updating Retentions from: %s to: %s \n' % \ (schema_file_args, schema_config_args) # only care about the first two decimals in the comparison since there is # floaty stuff going on. info_xFilesFactor = "{0:.2f}".format(info['xFilesFactor']) str_xFilesFactor = "{0:.2f}".format(xFilesFactor) # check to see if the current and configured xFilesFactor are the same if (str_xFilesFactor != info_xFilesFactor): rebuild = True messages += '%s xFilesFactor differs real: %s should be: %s \n' % \ (metric, info_xFilesFactor, str_xFilesFactor) # check to see if the current and configured aggregationMethods are the same if (aggregationMethod != info['aggregationMethod']): rebuild = True messages += '%s aggregation schema differs real: %s should be: %s \n' % \ (metric, info['aggregationMethod'], aggregationMethod) # if we need to rebuild, lets do it. if rebuild is True: cmd = [whisperResizeExecutable, fullPath] for x in shlex.split(options.extra_args): cmd.append(x) cmd.append('--xFilesFactor=' + str(xFilesFactor)) cmd.append('--aggregationMethod=' + str(aggregationMethod)) for x in shlex.split(schema_config_args): cmd.append(x) if options.quiet is not True or options.confirm is True: print(messages) print(cmd) if options.confirm is True: options.doit = confirm("Would you like to run this command? [y/n]: ") if options.doit is False: print("Skipping command \n") if options.doit is True: exitcode = call(cmd) # if the command failed lets bail so we can take a look before proceeding if (exitcode > 0): print('Error running: %s' % (cmd)) sys.exit(1) def getMetricFromPath(filePath): """ this method takes the full file path of a whisper file an converts it to a gaphite metric name Parameters: filePath - full file path to a whisper file Returns a string representing the metric name """ # sanitize directory since we may get a trailing slash or not, and if we # don't it creates a leading '.' data_dir = os.path.normpath(settings.LOCAL_DATA_DIR) + os.sep # pull the data dir off and convert to the graphite metric name metric_name = filePath.replace(data_dir, '') metric_name = metric_name.replace('.wsp', '') metric_name = metric_name.replace('/', '.') return metric_name def confirm(question, error_response='Valid options : yes or no'): """ ask the user if they would like to perform the action Parameters: question - the question you would like to ask the user to confirm. error_response - the message to display if an invalid option is given. """ while True: answer = input(question).lower() if answer in ('y', 'yes'): return True if answer in ('n', 'no'): return False print(error_response) if os.path.isfile(processPath) and processPath.endswith('.wsp'): processMetric(processPath, schemas, agg_schemas) else: for root, _, files in os.walk(processPath): # we only want to deal with non-hidden whisper files for f in fnmatch.filter(files, '*.wsp'): fullpath = os.path.join(root, f) processMetric(fullpath, schemas, agg_schemas) ././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1653242170.0 whisper-1.1.10/contrib/whisper-auto-update.py0000775000000000000000000000375600000000000021153 0ustar00rootroot00000000000000#!/usr/bin/env python import sys import time import signal import optparse try: import whisper except ImportError: raise SystemExit('[ERROR] Please make sure whisper is installed properly') # update this callback to do the logic you want. # a future version could use a config while in which this fn is defined. def update_value(timestamp, value): if value is None: return value return value * 1024 * 1024 * 1024 # Ignore SIGPIPE signal.signal(signal.SIGPIPE, signal.SIG_DFL) now = int(time.time()) yesterday = now - (60 * 60 * 24) option_parser = optparse.OptionParser(usage='''%prog [options] path''') option_parser.add_option( '--from', default=yesterday, type='int', dest='_from', help=("Unix epoch time of the beginning of " "your requested interval (default: 24 hours ago)")) option_parser.add_option( '--until', default=now, type='int', help="Unix epoch time of the end of your requested interval (default: now)") option_parser.add_option( '--pretty', default=False, action='store_true', help="Show human-readable timestamps instead of unix times") (options, args) = option_parser.parse_args() if len(args) < 1: option_parser.print_usage() sys.exit(1) path = args[0] from_time = int(options._from) until_time = int(options.until) try: data = whisper.fetch(path, from_time, until_time) if not data: raise SystemExit('No data in selected timerange') (timeInfo, values_old) = data except whisper.WhisperException as exc: raise SystemExit('[ERROR] %s' % str(exc)) (start, end, step) = timeInfo t = start for value_old in values_old: value_str_old = str(value_old) value_new = update_value(t, value_old) value_str_new = str(value_new) if options.pretty: timestr = time.ctime(t) else: timestr = str(t) print("%s\t%s -> %s" % (timestr, value_str_old, value_str_new)) try: if value_new is not None: whisper.update(path, value_new, t) t += step except whisper.WhisperException as exc: raise SystemExit('[ERROR] %s' % str(exc)) ././@PaxHeader0000000000000000000000000000003400000000000011452 xustar000000000000000028 mtime=1653243593.8137162 whisper-1.1.10/setup.cfg0000644000000000000000000000004600000000000015033 0ustar00rootroot00000000000000[egg_info] tag_build = tag_date = 0 ././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1653242170.0 whisper-1.1.10/setup.py0000664000000000000000000000203600000000000014727 0ustar00rootroot00000000000000#!/usr/bin/env python import os from glob import glob from setuptools import setup def read(fname): with open(os.path.join(os.path.dirname(__file__), fname)) as f: return f.read() setup( name='whisper', version='1.1.10', url='http://graphiteapp.org/', author='Chris Davis', author_email='chrismd@gmail.com', license='Apache Software License 2.0', description='Fixed size round-robin style database', long_description=read('README.md'), long_description_content_type='text/markdown', py_modules=['whisper'], scripts=glob('bin/*') + glob('contrib/*'), install_requires=['six'], classifiers=[ 'Programming Language :: Python :: 2', 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3', 'Programming Language :: Python :: 3.5', 'Programming Language :: Python :: 3.6', 'Programming Language :: Python :: 3.7', 'Programming Language :: Python :: Implementation :: CPython', 'Programming Language :: Python :: Implementation :: PyPy', ], zip_safe=False ) ././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1653242170.0 whisper-1.1.10/test_whisper.py0000775000000000000000000010617600000000000016324 0ustar00rootroot00000000000000#!/usr/bin/env python import os import sys import time import math import random import struct import errno from datetime import datetime from six.moves import StringIO from six import assertRegex try: from unittest.mock import patch, mock_open except ImportError: from mock import patch, mock_open try: import unittest2 as unittest except ImportError: import unittest # For py3k in TestWhisper.test_merge try: FileNotFoundError # noqa except NameError: class FileNotFoundError(Exception): pass import whisper class SimulatedCorruptWhisperFile(object): """ Simple context manager to be used as a decorator for simulating a corrupt whisper file for testing purposes. Example: >>> whisper.create('test.wsp', [(60, 10)]) >>> with SimulatedCorruptWhisperFile(): ... whisper.info('test.wsp') When 'corrupt_archive' is passed as True, the metadata will be left intact, but the archive will seem corrupted. """ def __init__(self, corrupt_archive=False): self.corrupt_archive = corrupt_archive self.metadataFormat = whisper.metadataFormat self.archiveInfoFormat = whisper.archiveInfoFormat self.CACHE_HEADERS = whisper.CACHE_HEADERS def __enter__(self): # Force the struct unpack to fail by changing the metadata # format. This simulates an actual corrupted whisper file if not self.corrupt_archive: whisper.metadataFormat = '!ssss' else: whisper.archiveInfoFormat = '!ssss' # Force whisper to reread the header instead of returning # the previous (correct) header from the header cache whisper.CACHE_HEADERS = False def __exit__(self, *args, **kwargs): whisper.metadataFormat = self.metadataFormat whisper.archiveInfoFormat = self.archiveInfoFormat whisper.CACHE_HEADERS = self.CACHE_HEADERS class AssertRaisesException(object): """ Context manager to not only assert the type of exception raised, but also the actual value of the exception matches what is expected >>> with AssertRaisesException(ValueError('beer > wine')): ... raise ValueError('beer > wine') This is better than unittest.TestCase.assertRaises as it also checks the contents of the exception vs just the type raised. """ def __init__(self, exc): self.expected_exc = exc def __enter__(self): yield def __exit__(self, e_type, e_value, tracebck): # Ensure an exception was actually raised if e_type is None: raise AssertionError("Exception of type '{}' was not raised".format( self.expected_exc.__class__.__name__, )) elif not isinstance(self.expected_exc, e_type): raise AssertionError("Exception type '{}' is not of type '{}'".format( getattr(e_type, '__name__', 'None'), self.expected_exc.__class__.__name__, )) # Ensure the actual values are the exact same. Since # two instances of an arbitrary exception will never # be considered equal, use the __dict__ attr to check # that all of the kwargs such as path for exceptions # such as CorruptWhisperFile are the exact same. elif e_value.__dict__ != self.expected_exc.__dict__: raise AssertionError("'{}' != '{}'".format( repr(self.expected_exc.__dict__), repr(e_value.__dict__), )) # Some builtin exceptions such as ValueError return {} for # ValueError.__dict__, so finally, cast those to strings to compare elif str(e_value) != str(self.expected_exc): raise AssertionError("String forms of: '{}' != '{}'".format( str(self.expected_exc), str(e_value), )) # Context managers need to return True in __exit__ to not # re-raise the exception held in the e_value variable return True class WhisperTestBase(unittest.TestCase): def setUp(self): self.filename = 'db.wsp' self.retention = [(1, 60), (60, 60)] def tearDown(self): self._remove(self.filename) @staticmethod def _remove(wsp_file): try: os.unlink(wsp_file) except (IOError, OSError, FileNotFoundError): pass class TestWhisper(WhisperTestBase): """ Testing functions for whisper. """ def test_validate_archive_list(self): """ blank archive config """ with AssertRaisesException( whisper.InvalidConfiguration( 'You must specify at least one archive configuration!')): whisper.validateArchiveList([]) def test_duplicate(self): """ Checking duplicates """ # TODO: Fix the lies with whisper.validateArchiveList() saying it returns True/False self.assertIsNone(whisper.validateArchiveList(self.retention)) with AssertRaisesException( whisper.InvalidConfiguration( 'A Whisper database may not be configured having two ' 'archives with the same precision (archive0: (1, 60), ' 'archive1: (1, 60))')): whisper.validateArchiveList([(1, 60), (60, 60), (1, 60)]) def test_even_precision_division(self): """ even precision division """ whisper.validateArchiveList([(60, 60), (6, 60)]) with AssertRaisesException( whisper.InvalidConfiguration( "Higher precision archives' precision must evenly divide " "all lower precision archives' precision (archive0: 7, " "archive1: 60)")): whisper.validateArchiveList([(60, 60), (7, 60)]) def test_timespan_coverage(self): """ timespan coverage """ whisper.validateArchiveList(self.retention) with AssertRaisesException( whisper.InvalidConfiguration( 'Lower precision archives must cover larger time intervals ' 'than higher precision archives (archive0: 60 seconds, ' 'archive1: 10 seconds)')): whisper.validateArchiveList([(1, 60), (10, 1)]) def test_number_of_points(self): """ number of points """ whisper.validateArchiveList(self.retention) with AssertRaisesException( whisper.InvalidConfiguration( "Each archive must have at least enough points to " "consolidate to the next archive (archive1 consolidates 60 " "of archive0's points but it has only 30 total points)")): whisper.validateArchiveList([(1, 30), (60, 60)]) def test_aggregate(self): """ aggregate functions """ # min of 1-4 self.assertEqual(whisper.aggregate('min', [1, 2, 3, 4]), 1) # max of 1-4 self.assertEqual(whisper.aggregate('max', [1, 2, 3, 4]), 4) # last element in the known values self.assertEqual(whisper.aggregate('last', [3, 2, 5, 4]), 4) # sum ALL THE VALUES! self.assertEqual(whisper.aggregate('sum', [10, 2, 3, 4]), 19) # average of the list elements self.assertEqual(whisper.aggregate('average', [1, 2, 3, 4]), 2.5) avg_zero = [1, 2, 3, 4, None, None, None, None] non_null = [i for i in avg_zero if i is not None] self.assertEqual(whisper.aggregate('avg_zero', non_null, avg_zero), 1.25) # avg_zero without neighborValues with self.assertRaises(whisper.InvalidAggregationMethod): whisper.aggregate('avg_zero', non_null) # absmax with negative max self.assertEqual(whisper.aggregate('absmax', [-3, -2, 1, 2]), -3) # absmax with positive max self.assertEqual(whisper.aggregate('absmax', [-2, -1, 2, 3]), 3) # absmin with positive min self.assertEqual(whisper.aggregate('absmin', [-3, -2, 1, 2]), 1) # absmin with negative min self.assertEqual(whisper.aggregate('absmin', [-2, -1, 2, 3]), -1) with AssertRaisesException( whisper.InvalidAggregationMethod( 'Unrecognized aggregation method derp')): whisper.aggregate('derp', [12, 2, 3123, 1]) def _test_create_exception(self, exception_method='write', e=None): """ Behaviour when creating a whisper file on a full filesystem """ m_open = mock_open() # Get the mocked file object and override interesting attributes m_file = m_open.return_value m_file.name = self.filename method = getattr(m_file, exception_method) if not e: e = IOError(errno.ENOSPC, "Mocked IOError") method.side_effect = e with patch('whisper.open', m_open, create=True): with patch('os.unlink') as m_unlink: self.assertRaises(e.__class__, whisper.create, self.filename, self.retention) return (m_file, m_unlink) def test_create_write_ENOSPC(self): """ Behaviour when creating a whisper file on a full filesystem (write) """ (m_file, m_unlink) = self._test_create_exception('write') m_unlink.assert_called_with(self.filename) def test_create_close_ENOSPC(self): """ Behaviour when creating a whisper file on a full filesystem (close) """ (m_file, m_unlink) = self._test_create_exception('close') m_unlink.assert_called_with(self.filename) def test_create_close_EIO(self): """ Behaviour when creating a whisper file and getting an I/O error (EIO) """ (m_file, m_unlink) = self._test_create_exception('close', e=IOError(errno.EIO)) self.assertTrue(m_unlink.called) def test_create_close_exception(self): """ Behaviour when creating a whisper file and getting a generic exception """ (m_file, m_unlink) = self._test_create_exception('close', e=Exception("boom!")) # Must not call os.unlink on exception other than IOError self.assertFalse(m_unlink.called) def test_create_and_info(self): """ Create a db and use info() to validate """ # check if invalid configuration fails successfully for retention in (0, []): with AssertRaisesException( whisper.InvalidConfiguration( 'You must specify at least one archive configuration!')): whisper.create(self.filename, retention) # create a new db with a valid configuration whisper.create(self.filename, self.retention) # Ensure another file can't be created when one exists already with AssertRaisesException( whisper.InvalidConfiguration( 'File {0} already exists!'.format(self.filename))): whisper.create(self.filename, self.retention) info = whisper.info(self.filename) # check header information self.assertEqual(info['maxRetention'], max([a[0] * a[1] for a in self.retention])) self.assertEqual(info['aggregationMethod'], 'average') self.assertEqual(info['xFilesFactor'], 0.5) # check archive information self.assertEqual(len(info['archives']), len(self.retention)) self.assertEqual(info['archives'][0]['points'], self.retention[0][1]) self.assertEqual(info['archives'][0]['secondsPerPoint'], self.retention[0][0]) self.assertEqual(info['archives'][0]['retention'], self.retention[0][0] * self.retention[0][1]) self.assertEqual(info['archives'][1]['retention'], self.retention[1][0] * self.retention[1][1]) def test_info_bogus_file(self): self.assertIsNone(whisper.info('bogus-file')) # Validate "corrupt" whisper metadata whisper.create(self.filename, self.retention) with SimulatedCorruptWhisperFile(): with AssertRaisesException( whisper.CorruptWhisperFile( 'Unable to read header', self.filename)): whisper.info(self.filename) # Validate "corrupt" whisper archive data with SimulatedCorruptWhisperFile(corrupt_archive=True): with AssertRaisesException( whisper.CorruptWhisperFile( 'Unable to read archive0 metadata', self.filename)): whisper.info(self.filename) def test_file_fetch_edge_cases(self): """ Test some of the edge cases in file_fetch() that should return None or raise an exception """ whisper.create(self.filename, [(1, 60)]) with open(self.filename, 'rb') as fh: msg = "Invalid time interval: from time '{0}' is after until time '{1}'" until_time = 0 from_time = int(time.time()) + 100 with AssertRaisesException( whisper.InvalidTimeInterval(msg.format(from_time, until_time))): whisper.file_fetch(fh, fromTime=from_time, untilTime=until_time) # fromTime > now aka metrics from the future self.assertIsNone( whisper.file_fetch(fh, fromTime=int(time.time()) + 100, untilTime=int(time.time()) + 200), ) # untilTime > oldest time stored in the archive headers = whisper.info(self.filename) the_past = int(time.time()) - headers['maxRetention'] - 200 self.assertIsNone( whisper.file_fetch(fh, fromTime=the_past - 1, untilTime=the_past), ) # untilTime > now, change untilTime to now now = int(time.time()) self.assertEqual( whisper.file_fetch(fh, fromTime=now, untilTime=now + 200, now=now), ((now + 1, now + 2, 1), [None]), ) def test_merge(self): """ test merging two databases """ testdb = "test-%s" % self.filename # Create 2 whisper databases and merge one into the other self._update() self._update(testdb) whisper.merge(self.filename, testdb) def test_merge_empty(self): """ test merging from an empty database """ testdb_a = "test-a-%s" % self.filename testdb_b = "test-b-%s" % self.filename # create two empty databases with same retention self.addCleanup(self._remove, testdb_a) whisper.create(testdb_a, self.retention) self.addCleanup(self._remove, testdb_b) whisper.create(testdb_b, self.retention) whisper.merge(testdb_a, testdb_b) def test_merge_bad_archive_config(self): testdb = "test-%s" % self.filename # Create 2 whisper databases with different schema self._update() self.addCleanup(self._remove, testdb) whisper.create(testdb, [(100, 1)]) with AssertRaisesException( NotImplementedError( 'db.wsp and test-db.wsp archive configurations are ' 'unalike. Resize the input before merging')): whisper.merge(self.filename, testdb) def test_diff(self): testdb = "test-%s" % self.filename now = int(time.time()) self.addCleanup(self._remove, testdb) whisper.create(testdb, self.retention) whisper.create(self.filename, self.retention) whisper.update(testdb, 1.0, now) whisper.update(self.filename, 2.0, now) results = whisper.diff(testdb, self.filename) expected = [(0, [(int(now), 1.0, 2.0)], 1), (1, [], 0)] self.assertEqual(results, expected) def test_diff_with_empty(self): testdb = "test-%s" % self.filename now = time.time() self.addCleanup(self._remove, testdb) whisper.create(testdb, self.retention) whisper.create(self.filename, self.retention) whisper.update(testdb, 1.0, now) whisper.update(self.filename, 2.0, now) # Purposefully insert nulls to strip out previous = now - self.retention[0][0] whisper.update(testdb, float('NaN'), previous) results = whisper.diff(testdb, self.filename, ignore_empty=True) self.assertEqual( results, [(0, [(int(now), 1.0, 2.0)], 1), (1, [], 0)], ) results_empties = whisper.diff(testdb, self.filename, ignore_empty=False) expected = [(0, [(int(previous), float('NaN'), None), (int(now), 1.0, 2.0)], 2), (1, [], 0)] # Stupidly, float('NaN') != float('NaN'), so assert that the # repr() results are the same :/ # # See this thread: # https://mail.python.org/pipermail/python-ideas/2010-March/006945.html self.assertEqual( repr(results_empties), repr(expected), ) # Since the above test is somewhat of a sham, ensure that there # is a nan where there should be. self.assertTrue( math.isnan(results_empties[0][1][0][1]) ) def test_file_diff(self): testdb = "test-%s" % self.filename now = time.time() self.addCleanup(self._remove, testdb) whisper.create(testdb, self.retention) whisper.create(self.filename, self.retention) whisper.update(testdb, 1.0, now) whisper.update(self.filename, 2.0, now) # Merging 2 archives with different retentions should fail with open(testdb, 'rb') as fh_1: with open(self.filename, 'rb+') as fh_2: results = whisper.file_diff(fh_1, fh_2) expected = [(0, [(int(now), 1.0, 2.0)], 1), (1, [], 0)] self.assertEqual(results, expected) def test_file_diff_invalid(self): testdb = "test-%s" % self.filename self.addCleanup(self._remove, testdb) whisper.create(testdb, [(120, 10)]) whisper.create(self.filename, self.retention) # Merging 2 archives with different retentions should fail with open(testdb, 'rb') as fh_1: with open(self.filename, 'rb+') as fh_2: with AssertRaisesException( NotImplementedError( 'test-db.wsp and db.wsp archive configurations are ' 'unalike. Resize the input before diffing')): whisper.file_diff(fh_1, fh_2) def test_fetch(self): """ fetch info from database """ # Don't use AssertRaisesException due to a super obscure bug in # python2.6 which returns an IOError in the 2nd argument of __exit__ # in a context manager as a tuple. See this for a minimal reproducer: # http://git.io/cKz30g with self.assertRaises(IOError): # check a db that doesnt exist whisper.fetch("this_db_does_not_exist", 0) # SECOND MINUTE HOUR DAY retention = [(1, 60), (60, 60), (3600, 24), (86400, 365)] whisper.create(self.filename, retention) # check a db with an invalid time range now = int(time.time()) past = now - 6000 msg = "Invalid time interval: from time '{0}' is after until time '{1}'" with AssertRaisesException(whisper.InvalidTimeInterval(msg.format(now, past))): whisper.fetch(self.filename, now, past) fetch = whisper.fetch(self.filename, 0) # check time range self.assertEqual(fetch[0][1] - fetch[0][0], retention[-1][0] * retention[-1][1]) # check number of points self.assertEqual(len(fetch[1]), retention[-1][1]) # check step size self.assertEqual(fetch[0][2], retention[-1][0]) def _update(self, wsp=None, schema=None, sparse=False, useFallocate=False): wsp = wsp or self.filename schema = schema or [(1, 20)] num_data_points = 20 # create sample data self.addCleanup(self._remove, wsp) whisper.create(wsp, schema, sparse=sparse, useFallocate=useFallocate) tn = int(time.time()) - num_data_points data = [] for i in range(num_data_points): data.append((tn + 1 + i, random.random() * 10)) # test single update whisper.update(wsp, data[0][1], data[0][0]) # test multi update whisper.update_many(wsp, data[1:]) return data def test_fadvise(self): original_fadvise = whisper.FADVISE_RANDOM whisper.FADVISE_RANDOM = True self._update() whisper.FADVISE_RANDOM = original_fadvise def test_lock(self): original_lock = whisper.LOCK whisper.LOCK = True self._update() whisper.LOCK = original_lock def test_autoflush(self): original_autoflush = whisper.AUTOFLUSH whisper.AUTOFLUSH = True self._update() whisper.AUTOFLUSH = original_autoflush def test_fallocate(self): self._update(useFallocate=True) def test_sparse(self): self._update(sparse=True) def test_set_xfilesfactor(self): """ Create a whisper file Update xFilesFactor Check if update succeeded Check if exceptions get raised with wrong input """ whisper.create(self.filename, [(1, 20)]) target_xff = 0.42 info0 = whisper.info(self.filename) old_xff = whisper.setXFilesFactor(self.filename, target_xff) # return value should match old xff self.assertEqual(info0['xFilesFactor'], old_xff) info1 = whisper.info(self.filename) # Other header information should not change self.assertEqual(info0['aggregationMethod'], info1['aggregationMethod']) self.assertEqual(info0['maxRetention'], info1['maxRetention']) self.assertEqual(info0['archives'], info1['archives']) # packing and unpacking because # AssertionError: 0.20000000298023224 != 0.2 target_xff = struct.unpack("!f", struct.pack("!f", target_xff))[0] self.assertEqual(info1['xFilesFactor'], target_xff) with AssertRaisesException( whisper.InvalidXFilesFactor('Invalid xFilesFactor zero, not a ' 'float')): whisper.setXFilesFactor(self.filename, "zero") for invalid_xff in -1, 2: with AssertRaisesException( whisper.InvalidXFilesFactor('Invalid xFilesFactor %s, not ' 'between 0 and 1' % float(invalid_xff))): whisper.setXFilesFactor(self.filename, invalid_xff) def test_update_single_archive(self): """ Update with a single leveled archive """ retention_schema = [(1, 20)] data = self._update(schema=retention_schema) # fetch the data fetch = whisper.fetch(self.filename, 0) # all data fetch_data = fetch[1] for i, (timestamp, value) in enumerate(data): # is value in the fetched data? self.assertEqual(value, fetch_data[i]) # check TimestampNotCovered with AssertRaisesException( whisper.TimestampNotCovered( 'Timestamp not covered by any archives in this database.')): # in the futur whisper.update(self.filename, 1.337, time.time() + 1) with AssertRaisesException( whisper.TimestampNotCovered( 'Timestamp not covered by any archives in this database.')): # before the past whisper.update(self.filename, 1.337, time.time() - retention_schema[0][1] - 1) # When no timestamp is passed in, it should use the current time original_lock = whisper.LOCK whisper.LOCK = True whisper.update(self.filename, 3.7337, None) fetched = whisper.fetch(self.filename, 0)[1] self.assertEqual(fetched[-1], 3.7337) whisper.LOCK = original_lock def test_update_many_excess(self): # given an empty db wsp = "test_update_many_excess.wsp" self.addCleanup(self._remove, wsp) archive_len = 3 archive_step = 1 whisper.create(wsp, [(archive_step, archive_len)]) # given too many points than the db can hold excess_len = 1 num_input_points = archive_len + excess_len test_now = int(time.time()) input_start = test_now - num_input_points + archive_step input_points = [(input_start + i, random.random() * 10) for i in range(num_input_points)] # when the db is updated with too many points whisper.update_many(wsp, input_points, now=test_now) # then only the most recent input points (those at the end) were written actual_time_info = whisper.fetch(wsp, 0, now=test_now)[0] self.assertEqual(actual_time_info, (input_points[-archive_len][0], input_points[-1][0] + archive_step, # untilInterval = newest + step archive_step)) def test_debug(self): """ Test creating a file with debug enabled Should print debug messages to stdout """ # debug prints to stdout, redirect it to a variable old_stdout = sys.stdout sys.stdout = StringIO() whisper.disableDebug() whisper.enableDebug() self._update() whisper.disableDebug() sys.stdout.seek(0) out = sys.stdout.read() sys.stdout = old_stdout assertRegex(self, out, r'(DEBUG :: (WRITE|READ) \d+ bytes #\d+\n)+') # TODO: This test method takes more time than virtually every # single other test combined. Profile this code and potentially # fix the underlying reason def test_setAggregation(self): """ Create a db, change aggregation, xFilesFactor, then use info() to validate """ original_lock = whisper.LOCK original_caching = whisper.CACHE_HEADERS original_autoflush = whisper.AUTOFLUSH whisper.LOCK = True whisper.AUTOFLUSH = True whisper.CACHE_HEADERS = True # create a new db with a valid configuration whisper.create(self.filename, self.retention) with AssertRaisesException( whisper.InvalidAggregationMethod( 'Unrecognized aggregation method: yummy beer')): whisper.setAggregationMethod(self.filename, 'yummy beer') # set setting every AggregationMethod available for ag in whisper.aggregationMethods: for xff in 0.0, 0.2, 0.4, 0.7, 0.75, 1.0: # original xFilesFactor info0 = whisper.info(self.filename) # optional xFilesFactor not passed old_ag = whisper.setAggregationMethod(self.filename, ag) # should return old aggregationmethod self.assertEqual(old_ag, info0['aggregationMethod']) # original value should not change info1 = whisper.info(self.filename) self.assertEqual(info0['xFilesFactor'], info1['xFilesFactor']) # the selected aggregation method should have applied self.assertEqual(ag, info1['aggregationMethod']) # optional xFilesFactor used old_ag = whisper.setAggregationMethod(self.filename, ag, xff) # should return old aggregationmethod self.assertEqual(old_ag, info1['aggregationMethod']) # new info should match what we just set it to info2 = whisper.info(self.filename) # packing and unpacking because # AssertionError: 0.20000000298023224 != 0.2 target_xff = struct.unpack("!f", struct.pack("!f", xff))[0] self.assertEqual(info2['xFilesFactor'], target_xff) # same aggregationMethod assertion again, but double-checking since # we are playing with packed values and seek() self.assertEqual(ag, info2['aggregationMethod']) with SimulatedCorruptWhisperFile(): with AssertRaisesException( whisper.CorruptWhisperFile( 'Unable to read header', self.filename)): whisper.setAggregationMethod(self.filename, ag) whisper.LOCK = original_lock whisper.AUTOFLUSH = original_autoflush whisper.CACHE_HEADERS = original_caching def test_fetch_with_archive_to_select(self): """ fetch info from database providing the archive to select """ # SECOND MINUTE HOUR DAY retention = [(1, 60), (60, 60), (3600, 24), (86400, 365)] whisper.create(self.filename, retention) archives = ["1s", "1m", "1h", "1d"] for i in range(len(archives)): fetch = whisper.fetch(self.filename, 0, archiveToSelect=archives[i]) self.assertEqual(fetch[0][2], retention[i][0]) # check time range self.assertEqual(fetch[0][1] - fetch[0][0], retention[-1][0] * retention[-1][1]) with AssertRaisesException(ValueError("Invalid granularity: 2")): fetch = whisper.fetch(self.filename, 0, archiveToSelect="2s") def test_resize_with_aggregate(self): """resize whisper file with aggregate""" # 60s per point save two days retention = [(60, 60 * 24 * 2)] whisper.create(self.filename, retention) # insert data now_timestamp = int((datetime.now() - datetime(1970, 1, 1)).total_seconds()) now_timestamp -= now_timestamp % 60 # format timestamp points = [(now_timestamp - i * 60, i) for i in range(0, 60 * 24 * 2)] whisper.update_many(self.filename, points) data = whisper.fetch(self.filename, fromTime=now_timestamp - 3600 * 25, untilTime=now_timestamp - 3600 * 25 + 60 * 10) self.assertEqual(len(data[1]), 10) self.assertEqual(data[0][2], 60) # high retention == 60 for d in data[1]: self.assertIsNotNone(d) # resize from high to low os.system('whisper-resize.py %s 60s:1d 300s:2d --aggregate --nobackup >/dev/null' % self.filename) # noqa data_low = whisper.fetch( self.filename, fromTime=now_timestamp - 3600 * 25, untilTime=now_timestamp - 3600 * 25 + 60 * 10) self.assertEqual(len(data_low[1]), 2) self.assertEqual(data_low[0][2], 300) # low retention == 300 for d in data_low[1]: self.assertIsNotNone(d) data_high = whisper.fetch( self.filename, fromTime=now_timestamp - 60 * 10, untilTime=now_timestamp ) self.assertEqual(len(data_high[1]), 10) self.assertEqual(data_high[0][2], 60) # high retention == 60 # resize from low to high os.system('whisper-resize.py %s 60s:2d --aggregate --nobackup >/dev/null' % self.filename) # noqa data1 = whisper.fetch( self.filename, fromTime=now_timestamp - 3600 * 25, untilTime=now_timestamp - 3600 * 25 + 60 * 10) self.assertEqual(len(data1[1]), 10) # noqa data1 looks like ((1588836720, 1588837320, 60), [None, None, 1490.0, None, None, None, None, 1485.0, None, None]) # data1[1] have two not none value self.assertEqual(len(list(filter(lambda x: x is not None, data1[1]))), 2) data2 = whisper.fetch( self.filename, fromTime=now_timestamp - 60 * 15, untilTime=now_timestamp - 60 * 5) # noqa data2 looks like ((1588925820, 1588926420, 60), [10.0, 11.0, 10.0, 9.0, 8.0, 5.0, 6.0, 5.0, 4.0, 3.0]) self.assertEqual(len(list(filter(lambda x: x is not None, data2[1]))), 10) # clean up self.tearDown() class TestgetUnitString(unittest.TestCase): def test_function(self): for unit in ('seconds', 'minutes', 'hours', 'days', 'weeks'): value = whisper.getUnitString(unit[0]) self.assertEqual(value, unit) def test_invalid_unit(self): with AssertRaisesException(ValueError("Invalid unit 'z'")): whisper.getUnitString('z') # If you send an invalid file, this deadlocks my Fedora 21 / Linux 3.17 laptop # TODO: Find a way to pass in corrupt whisper files that don't deadlock the testing box class TestReadHeader(WhisperTestBase): def test_normal(self): whisper.create(self.filename, [(1, 60), (60, 60)]) whisper.CACHE_HEADERS = True whisper.info(self.filename) whisper.info(self.filename) whisper.CACHE_HEADERS = False class TestParseRetentionDef(unittest.TestCase): def test_valid_retentions(self): retention_map = ( ('60:10', (60, 10)), ('10:60', (10, 60)), ('10s:10h', (10, 3600)), ) for retention, expected in retention_map: results = whisper.parseRetentionDef(retention) self.assertEqual(results, expected) def test_invalid_retentions(self): retention_map = ( # From getUnitString ('10x:10', ValueError("Invalid unit 'x'")), ('60:10x', ValueError("Invalid unit 'x'")), # From parseRetentionDef ('10', ValueError("Invalid retention definition '10'")), ('10X:10', ValueError("Invalid precision specification '10X'")), ('10:10$', ValueError("Invalid retention specification '10$'")), ('60:10', (60, 10)), ) for retention, expected_exc in retention_map: try: results = whisper.parseRetentionDef(retention) except expected_exc.__class__ as exc: self.assertEqual( str(expected_exc), str(exc), ) self.assertEqual( expected_exc.__class__, exc.__class__, ) else: # When there isn't an exception raised self.assertEqual(results, expected_exc) class TestCorruptWhisperFile(unittest.TestCase): def setUp(self): self.path = '/opt/graphite/storage/whisper/moolah.wsp' self.error = 'What is the average velocity of an unladen swallow?' def test_error(self): try: raise whisper.CorruptWhisperFile(self.error, self.path) except whisper.CorruptWhisperFile as exc: self.assertEqual(exc.error, self.error) def test_path(self): try: raise whisper.CorruptWhisperFile(self.error, self.path) except whisper.CorruptWhisperFile as exc: self.assertEqual(exc.path, self.path) def test_repr(self): try: raise whisper.CorruptWhisperFile(self.error, self.path) except whisper.CorruptWhisperFile as exc: self.assertEqual( repr(exc), '' % (self.path, self.error), ) def test_str(self): try: raise whisper.CorruptWhisperFile(self.error, self.path) except whisper.CorruptWhisperFile as exc: self.assertEqual( str(exc), "{0} ({1})".format(self.error, self.path) ) if __name__ == '__main__': unittest.main() ././@PaxHeader0000000000000000000000000000003400000000000011452 xustar000000000000000028 mtime=1653243593.8137162 whisper-1.1.10/whisper.egg-info/0000755000000000000000000000000000000000000016365 5ustar00rootroot00000000000000././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1653243593.0 whisper-1.1.10/whisper.egg-info/PKG-INFO0000644000000000000000000002137300000000000017470 0ustar00rootroot00000000000000Metadata-Version: 2.1 Name: whisper Version: 1.1.10 Summary: Fixed size round-robin style database Home-page: http://graphiteapp.org/ Author: Chris Davis Author-email: chrismd@gmail.com License: Apache Software License 2.0 Classifier: Programming Language :: Python :: 2 Classifier: Programming Language :: Python :: 2.7 Classifier: Programming Language :: Python :: 3 Classifier: Programming Language :: Python :: 3.5 Classifier: Programming Language :: Python :: 3.6 Classifier: Programming Language :: Python :: 3.7 Classifier: Programming Language :: Python :: Implementation :: CPython Classifier: Programming Language :: Python :: Implementation :: PyPy Description-Content-Type: text/markdown License-File: LICENSE # Whisper [![Codacy Badge](https://api.codacy.com/project/badge/Grade/f00d0b65802742e29de56f3744503ab0)](https://www.codacy.com/app/graphite-project/whisper?utm_source=github.com&utm_medium=referral&utm_content=graphite-project/whisper&utm_campaign=badger) [![Build Status](https://secure.travis-ci.org/graphite-project/whisper.png)](http://travis-ci.org/graphite-project/whisper) [![FOSSA Status](https://app.fossa.io/api/projects/git%2Bhttps%3A%2F%2Fgithub.com%2Fgraphite-project%2Fwhisper.svg?type=shield)](https://app.fossa.io/projects/git%2Bhttps%3A%2F%2Fgithub.com%2Fgraphite-project%2Fwhisper?ref=badge_shield) ## Overview Whisper is one of three components within the Graphite project: 1. [Graphite-Web](https://github.com/graphite-project/graphite-web), a Django-based web application that renders graphs and dashboards 2. The [Carbon](https://github.com/graphite-project/carbon) metric processing daemons 3. The Whisper time-series database library ![Graphite Components](https://github.com/graphite-project/graphite-web/raw/master/webapp/content/img/overview.png "Graphite Components") Whisper is a fixed-size database, similar in design and purpose to RRD (round-robin-database). It provides fast, reliable storage of numeric data over time. Whisper allows for higher resolution (seconds per point) of recent data to degrade into lower resolutions for long-term retention of historical data. ## Installation, Configuration and Usage Please refer to the instructions at [readthedocs](http://graphite.readthedocs.org/). ## Whisper Scripts rrd2whisper.py -------------- Convert a rrd file into a whisper (.wsp) file. ``` Usage: rrd2whisper.py rrd_path Options: -h, --help show this help message and exit --xFilesFactor=XFILESFACTOR The xFilesFactor to use in the output file. Defaults to the input RRD's xFilesFactor --aggregationMethod=AGGREGATIONMETHOD The consolidation function to fetch from on input and aggregationMethod to set on output. One of: average, last, max, min, avg_zero, absmax, absmin --destinationPath=DESTINATIONPATH Path to place created whisper file. Defaults to the RRD file's source path. ``` whisper-create.py ----------------- Create a new whisper database file. ``` Usage: whisper-create.py path timePerPoint:timeToStore [timePerPoint:timeToStore]* whisper-create.py --estimate timePerPoint:timeToStore [timePerPoint:timeToStore]* timePerPoint and timeToStore specify lengths of time, for example: 60:1440 60 seconds per datapoint, 1440 datapoints = 1 day of retention 15m:8 15 minutes per datapoint, 8 datapoints = 2 hours of retention 1h:7d 1 hour per datapoint, 7 days of retention 12h:2y 12 hours per datapoint, 2 years of retention Options: -h, --help show this help message and exit --xFilesFactor=XFILESFACTOR --aggregationMethod=AGGREGATIONMETHOD Function to use when aggregating values (average, sum, last, max, min, avg_zero, absmax, absmin) --overwrite --estimate Don't create a whisper file, estimate storage requirements based on archive definitions ``` whisper-dump.py --------------- Dump the whole whisper file content to stdout. ``` Usage: whisper-dump.py path Options: -h, --help show this help message and exit --pretty Show human-readable timestamps instead of unix times -t TIME_FORMAT, --time-format=TIME_FORMAT Time format to use with --pretty; see time.strftime() -r, --raw Dump value only in the same format for whisper-update (UTC timestamps) ``` whisper-fetch.py ---------------- Fetch all the metrics stored in a whisper file to stdout. ``` Usage: whisper-fetch.py [options] path Options: -h, --help show this help message and exit --from=_FROM Unix epoch time of the beginning of your requested interval (default: 24 hours ago) --until=UNTIL Unix epoch time of the end of your requested interval (default: now) --json Output results in JSON form --pretty Show human-readable timestamps instead of unix times -t TIME_FORMAT, --time-format=TIME_FORMAT Time format to use with --pretty; see time.strftime() --drop=DROP Specify 'nulls' to drop all null values. Specify 'zeroes' to drop all zero values. Specify 'empty' to drop both null and zero values. ``` whisper-info.py --------------- Dump the metadata about a whisper file to stdout. ``` Usage: whisper-info.py [options] path [field] Options: -h, --help show this help message and exit --json Output results in JSON form ``` whisper-merge.py ---------------- Join two existing whisper files together. ``` Usage: whisper-merge.py [options] from_path to_path Options: -h, --help show this help message and exit ``` whisper-fill.py ---------------- Copies data from src in dst, if missing. Unlike whisper-merge, don't overwrite data that's already present in the target file, but instead, only add the missing data (e.g. where the gaps in the target file are). Because no values are overwritten, no data or precision gets lost. Also, unlike whisper-merge, try to take the highest-precision archive to provide the data, instead of the one with the largest retention. ``` Usage: whisper-fill.py [options] src_path dst_path Options: -h, --help show this help message and exit ``` whisper-resize.py ----------------- Change the retention rates of an existing whisper file. ``` Usage: whisper-resize.py path timePerPoint:timeToStore [timePerPoint:timeToStore]* timePerPoint and timeToStore specify lengths of time, for example: 60:1440 60 seconds per datapoint, 1440 datapoints = 1 day of retention 15m:8 15 minutes per datapoint, 8 datapoints = 2 hours of retention 1h:7d 1 hour per datapoint, 7 days of retention 12h:2y 12 hours per datapoint, 2 years of retention Options: -h, --help show this help message and exit --xFilesFactor=XFILESFACTOR Change the xFilesFactor --aggregationMethod=AGGREGATIONMETHOD Change the aggregation function (average, sum, last, max, min, avg_zero, absmax, absmin) --force Perform a destructive change --newfile=NEWFILE Create a new database file without removing the existing one --nobackup Delete the .bak file after successful execution --aggregate Try to aggregate the values to fit the new archive better. Note that this will make things slower and use more memory. ``` whisper-set-aggregation-method.py --------------------------------- Change the aggregation method of an existing whisper file. ``` Usage: whisper-set-aggregation-method.py path Options: -h, --help show this help message and exit ``` whisper-update.py ----------------- Update a whisper file with 1 or many values, must provide a time stamp with the value. ``` Usage: whisper-update.py [options] path timestamp:value [timestamp:value]* Options: -h, --help show this help message and exit ``` whisper-diff.py --------------- Check the differences between whisper files. Use sanity check before merging. ``` Usage: whisper-diff.py [options] path_a path_b Options: -h, --help show this help message and exit --summary show summary of differences --ignore-empty skip comparison if either value is undefined --columns print output in simple columns --no-headers do not print column headers --until=UNTIL Unix epoch time of the end of your requested interval (default: now) --json Output results in JSON form ``` ## License Whisper is licensed under version 2.0 of the Apache License. See the [LICENSE](https://github.com/graphite-project/carbon/blob/master/LICENSE) file for details. ././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1653243593.0 whisper-1.1.10/whisper.egg-info/SOURCES.txt0000644000000000000000000000120200000000000020244 0ustar00rootroot00000000000000LICENSE README.md setup.py test_whisper.py whisper.py bin/find-corrupt-whisper-files.py bin/rrd2whisper.py bin/whisper-create.py bin/whisper-diff.py bin/whisper-dump.py bin/whisper-fetch.py bin/whisper-fill.py bin/whisper-info.py bin/whisper-merge.py bin/whisper-resize.py bin/whisper-set-aggregation-method.py bin/whisper-set-xfilesfactor.py bin/whisper-update.py contrib/update-storage-times.py contrib/whisper-auto-resize.py contrib/whisper-auto-update.py whisper.egg-info/PKG-INFO whisper.egg-info/SOURCES.txt whisper.egg-info/dependency_links.txt whisper.egg-info/not-zip-safe whisper.egg-info/requires.txt whisper.egg-info/top_level.txt././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1653243593.0 whisper-1.1.10/whisper.egg-info/dependency_links.txt0000644000000000000000000000000100000000000022433 0ustar00rootroot00000000000000 ././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1653243593.0 whisper-1.1.10/whisper.egg-info/not-zip-safe0000644000000000000000000000000100000000000020613 0ustar00rootroot00000000000000 ././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1653243593.0 whisper-1.1.10/whisper.egg-info/requires.txt0000644000000000000000000000000400000000000020757 0ustar00rootroot00000000000000six ././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1653243593.0 whisper-1.1.10/whisper.egg-info/top_level.txt0000644000000000000000000000001000000000000021106 0ustar00rootroot00000000000000whisper ././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1653242170.0 whisper-1.1.10/whisper.py0000664000000000000000000011000000000000000015237 0ustar00rootroot00000000000000# Copyright 2009-Present The Graphite Development Team # Copyright 2008 Orbitz WorldWide # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # # This module is an implementation of the Whisper database API # Here is the basic layout of a whisper data file # # File = Header,Data # Header = Metadata,ArchiveInfo+ # Metadata = aggregationType,maxRetention,xFilesFactor,archiveCount # ArchiveInfo = Offset,SecondsPerPoint,Points # Data = Archive+ # Archive = Point+ # Point = timestamp,value import itertools import operator import os import platform import re import struct import sys import time izip = getattr(itertools, 'izip', zip) ifilter = getattr(itertools, 'ifilter', filter) if sys.version_info >= (3, 0): xrange = range try: import fcntl CAN_LOCK = True except ImportError: CAN_LOCK = False try: import ctypes import ctypes.util CAN_FALLOCATE = True except ImportError: CAN_FALLOCATE = False try: if sys.version_info >= (3, 0): from os import posix_fadvise, POSIX_FADV_RANDOM else: from fadvise import posix_fadvise, POSIX_FADV_RANDOM CAN_FADVISE = True except ImportError: CAN_FADVISE = False fallocate = None if CAN_FALLOCATE: libc_name = ctypes.util.find_library('c') libc = ctypes.CDLL(libc_name) c_off64_t = ctypes.c_int64 c_off_t = ctypes.c_int if platform.uname()[0] == 'FreeBSD': # offset type is 64-bit on FreeBSD 32-bit & 64-bit platforms to address files more than 2GB c_off_t = ctypes.c_int64 try: _fallocate = libc.posix_fallocate64 _fallocate.restype = ctypes.c_int _fallocate.argtypes = [ctypes.c_int, c_off64_t, c_off64_t] except AttributeError: try: _fallocate = libc.posix_fallocate _fallocate.restype = ctypes.c_int _fallocate.argtypes = [ctypes.c_int, c_off_t, c_off_t] except AttributeError: CAN_FALLOCATE = False if CAN_FALLOCATE: def _py_fallocate(fd, offset, len_): res = _fallocate(fd.fileno(), offset, len_) if res != 0: raise IOError(res, 'fallocate') fallocate = _py_fallocate del libc del libc_name LOCK = False CACHE_HEADERS = False AUTOFLUSH = False FADVISE_RANDOM = False # Buffering setting applied to all operations that do *not* require # a full scan of the file in order to minimize cache thrashing. BUFFERING = 0 __headerCache = {} longFormat = "!L" longSize = struct.calcsize(longFormat) floatFormat = "!f" floatSize = struct.calcsize(floatFormat) valueFormat = "!d" valueSize = struct.calcsize(valueFormat) pointFormat = "!Ld" pointSize = struct.calcsize(pointFormat) metadataFormat = "!2LfL" metadataSize = struct.calcsize(metadataFormat) archiveInfoFormat = "!3L" archiveInfoSize = struct.calcsize(archiveInfoFormat) aggregationTypeToMethod = dict({ 1: 'average', 2: 'sum', 3: 'last', 4: 'max', 5: 'min', 6: 'avg_zero', 7: 'absmax', 8: 'absmin' }) aggregationMethodToType = dict([[v, k] for k, v in aggregationTypeToMethod.items()]) aggregationMethods = aggregationTypeToMethod.values() debug = startBlock = endBlock = lambda *a, **k: None UnitMultipliers = { 'seconds': 1, 'minutes': 60, 'hours': 3600, 'days': 86400, 'weeks': 86400 * 7, 'years': 86400 * 365 } def getUnitString(s): for value in ('seconds', 'minutes', 'hours', 'days', 'weeks', 'years'): if value.startswith(s): return value raise ValueError("Invalid unit '%s'" % s) def parseRetentionDef(retentionDef): try: (precision, points) = retentionDef.strip().split(':', 1) except ValueError: raise ValueError("Invalid retention definition '%s'" % retentionDef) if precision.isdigit(): precision = int(precision) * UnitMultipliers[getUnitString('s')] else: precision_re = re.compile(r'^(\d+)([a-z]+)$') match = precision_re.match(precision) if match: precision = int(match.group(1)) * UnitMultipliers[getUnitString(match.group(2))] else: raise ValueError("Invalid precision specification '%s'" % precision) if points.isdigit(): points = int(points) else: points_re = re.compile(r'^(\d+)([a-z]+)$') match = points_re.match(points) if match: points = int(match.group(1)) * UnitMultipliers[getUnitString(match.group(2))] // precision else: raise ValueError("Invalid retention specification '%s'" % points) return (precision, points) class WhisperException(Exception): """Base class for whisper exceptions.""" class InvalidConfiguration(WhisperException): """Invalid configuration.""" class InvalidAggregationMethod(WhisperException): """Invalid aggregation method.""" class InvalidTimeInterval(WhisperException): """Invalid time interval.""" class InvalidXFilesFactor(WhisperException): """Invalid xFilesFactor.""" class TimestampNotCovered(WhisperException): """Timestamp not covered by any archives in this database.""" class CorruptWhisperFile(WhisperException): def __init__(self, error, path): Exception.__init__(self, error) self.error = error self.path = path def __repr__(self): return "" % (self.path, self.error) def __str__(self): return "%s (%s)" % (self.error, self.path) def disableDebug(): """ Disable writing IO statistics to stdout """ global open try: open = _open except NameError: pass def enableDebug(): """ Enable writing IO statistics to stdout """ global open, _open, debug, startBlock, endBlock _open = open class open(object): def __init__(self, *args, **kwargs): self.f = _open(*args, **kwargs) self.writeCount = 0 self.readCount = 0 def __enter__(self): return self def __exit__(self, *args): self.f.close() def write(self, data): self.writeCount += 1 debug('WRITE %d bytes #%d' % (len(data), self.writeCount)) return self.f.write(data) def read(self, size): self.readCount += 1 debug('READ %d bytes #%d' % (size, self.readCount)) return self.f.read(size) def __getattr__(self, attr): return getattr(self.f, attr) def debug(message): print('DEBUG :: %s' % message) __timingBlocks = {} def startBlock(name): __timingBlocks[name] = time.time() def endBlock(name): debug("%s took %.5f seconds" % (name, time.time() - __timingBlocks.pop(name))) def __readHeader(fh): if CACHE_HEADERS: info = __headerCache.get(fh.name) if info: return info originalOffset = fh.tell() fh.seek(0) packedMetadata = fh.read(metadataSize) try: (aggregationType, maxRetention, xff, archiveCount) \ = struct.unpack(metadataFormat, packedMetadata) except (struct.error, ValueError, TypeError): raise CorruptWhisperFile("Unable to read header", fh.name) try: aggregationTypeToMethod[aggregationType] except KeyError: raise CorruptWhisperFile("Unable to read header", fh.name) if not 0 <= xff <= 1: raise CorruptWhisperFile("Unable to read header", fh.name) archives = [] for i in xrange(archiveCount): packedArchiveInfo = fh.read(archiveInfoSize) try: (offset, secondsPerPoint, points) = struct.unpack(archiveInfoFormat, packedArchiveInfo) except (struct.error, ValueError, TypeError): raise CorruptWhisperFile("Unable to read archive%d metadata" % i, fh.name) archiveInfo = { 'offset': offset, 'secondsPerPoint': secondsPerPoint, 'points': points, 'retention': secondsPerPoint * points, 'size': points * pointSize, } archives.append(archiveInfo) fh.seek(originalOffset) info = { 'aggregationMethod': aggregationTypeToMethod.get(aggregationType, 'average'), 'maxRetention': maxRetention, 'xFilesFactor': xff, 'archives': archives, } if CACHE_HEADERS: __headerCache[fh.name] = info return info def setXFilesFactor(path, xFilesFactor): """Sets the xFilesFactor for file in path path is a string pointing to a whisper file xFilesFactor is a float between 0 and 1 returns the old xFilesFactor """ (_, old_xff) = __setAggregation(path, xFilesFactor=xFilesFactor) return old_xff def setAggregationMethod(path, aggregationMethod, xFilesFactor=None): """Sets the aggregationMethod for file in path path is a string pointing to the whisper file aggregationMethod specifies the method to use when propagating data (see ``whisper.aggregationMethods``) xFilesFactor specifies the fraction of data points in a propagation interval that must have known values for a propagation to occur. If None, the existing xFilesFactor in path will not be changed returns the old aggregationMethod """ (old_agm, _) = __setAggregation(path, aggregationMethod, xFilesFactor) return old_agm def __setAggregation(path, aggregationMethod=None, xFilesFactor=None): """ Set aggregationMethod and or xFilesFactor for file in path""" with open(path, 'r+b', BUFFERING) as fh: if LOCK: fcntl.flock(fh.fileno(), fcntl.LOCK_EX) info = __readHeader(fh) if xFilesFactor is None: xFilesFactor = info['xFilesFactor'] if aggregationMethod is None: aggregationMethod = info['aggregationMethod'] __writeHeaderMetadata(fh, aggregationMethod, info['maxRetention'], xFilesFactor, len(info['archives'])) if AUTOFLUSH: fh.flush() os.fsync(fh.fileno()) if CACHE_HEADERS and fh.name in __headerCache: del __headerCache[fh.name] return (info['aggregationMethod'], info['xFilesFactor']) def __writeHeaderMetadata(fh, aggregationMethod, maxRetention, xFilesFactor, archiveCount): """ Writes header metadata to fh """ try: aggregationType = aggregationMethodToType[aggregationMethod] except KeyError: raise InvalidAggregationMethod("Unrecognized aggregation method: %s" % aggregationMethod) try: xFilesFactor = float(xFilesFactor) except ValueError: raise InvalidXFilesFactor("Invalid xFilesFactor %s, not a float" % xFilesFactor) if xFilesFactor < 0 or xFilesFactor > 1: raise InvalidXFilesFactor("Invalid xFilesFactor %s, not between 0 and 1" % xFilesFactor) aggregationType = struct.pack(longFormat, aggregationType) maxRetention = struct.pack(longFormat, maxRetention) xFilesFactor = struct.pack(floatFormat, xFilesFactor) archiveCount = struct.pack(longFormat, archiveCount) packedMetadata = aggregationType + maxRetention + xFilesFactor + archiveCount fh.seek(0) fh.write(packedMetadata) def validateArchiveList(archiveList): """ Validates an archiveList. An ArchiveList must: 1. Have at least one archive config. Example: (60, 86400) 2. No archive may be a duplicate of another. 3. Higher precision archives' precision must evenly divide all lower precision archives' precision. 4. Lower precision archives must cover larger time intervals than higher precision archives. 5. Each archive must have at least enough points to consolidate to the next archive Returns True or False """ if not archiveList: raise InvalidConfiguration("You must specify at least one archive configuration!") archiveList.sort(key=lambda a: a[0]) # Sort by precision (secondsPerPoint) for i, archive in enumerate(archiveList): if i == len(archiveList) - 1: break nextArchive = archiveList[i + 1] if not archive[0] < nextArchive[0]: raise InvalidConfiguration( "A Whisper database may not be configured having " "two archives with the same precision (archive%d: %s, archive%d: %s)" % (i, archive, i + 1, nextArchive)) if nextArchive[0] % archive[0] != 0: raise InvalidConfiguration( "Higher precision archives' precision " "must evenly divide all lower precision archives' precision " "(archive%d: %s, archive%d: %s)" % (i, archive[0], i + 1, nextArchive[0])) retention = archive[0] * archive[1] nextRetention = nextArchive[0] * nextArchive[1] if not nextRetention > retention: raise InvalidConfiguration( "Lower precision archives must cover " "larger time intervals than higher precision archives " "(archive%d: %s seconds, archive%d: %s seconds)" % (i, retention, i + 1, nextRetention)) archivePoints = archive[1] pointsPerConsolidation = nextArchive[0] // archive[0] if not archivePoints >= pointsPerConsolidation: raise InvalidConfiguration( "Each archive must have at least enough points " "to consolidate to the next archive (archive%d consolidates %d of " "archive%d's points but it has only %d total points)" % (i + 1, pointsPerConsolidation, i, archivePoints)) def create(path, archiveList, xFilesFactor=None, aggregationMethod=None, sparse=False, useFallocate=False): """create(path,archiveList,xFilesFactor=0.5,aggregationMethod='average') path is a string archiveList is a list of archives, each of which is of the form (secondsPerPoint, numberOfPoints) xFilesFactor specifies the fraction of data points in a propagation interval that must have known values for a propagation to occur aggregationMethod specifies the function to use when propagating data (see ``whisper.aggregationMethods``) """ # Set default params if xFilesFactor is None: xFilesFactor = 0.5 if aggregationMethod is None: aggregationMethod = 'average' # Validate archive configurations... validateArchiveList(archiveList) # Looks good, now we create the file and write the header if os.path.exists(path): raise InvalidConfiguration("File %s already exists!" % path) with open(path, 'wb', BUFFERING) as fh: try: if LOCK: fcntl.flock(fh.fileno(), fcntl.LOCK_EX) if CAN_FADVISE and FADVISE_RANDOM: posix_fadvise(fh.fileno(), 0, 0, POSIX_FADV_RANDOM) oldest = max([secondsPerPoint * points for secondsPerPoint, points in archiveList]) __writeHeaderMetadata(fh, aggregationMethod, oldest, xFilesFactor, len(archiveList)) headerSize = metadataSize + (archiveInfoSize * len(archiveList)) archiveOffsetPointer = headerSize for secondsPerPoint, points in archiveList: archiveInfo = struct.pack(archiveInfoFormat, archiveOffsetPointer, secondsPerPoint, points) fh.write(archiveInfo) archiveOffsetPointer += (points * pointSize) # If configured to use fallocate and capable of fallocate use that, else # attempt sparse if configure or zero pre-allocate if sparse isn't configured. if CAN_FALLOCATE and useFallocate: remaining = archiveOffsetPointer - headerSize fallocate(fh, headerSize, remaining) elif sparse: fh.seek(archiveOffsetPointer - 1) fh.write(b'\x00') else: remaining = archiveOffsetPointer - headerSize chunksize = 16384 zeroes = b'\x00' * chunksize while remaining > chunksize: fh.write(zeroes) remaining -= chunksize fh.write(zeroes[:remaining]) if AUTOFLUSH: fh.flush() os.fsync(fh.fileno()) # Explicitly close the file to catch IOError on close() fh.close() except IOError: # if we got an IOError above, the file is either empty or half created. # Better off deleting it to avoid surprises later os.unlink(fh.name) raise def aggregate(aggregationMethod, knownValues, neighborValues=None): if aggregationMethod == 'average': return float(sum(knownValues)) / float(len(knownValues)) elif aggregationMethod == 'sum': return float(sum(knownValues)) elif aggregationMethod == 'last': return knownValues[-1] elif aggregationMethod == 'max': return max(knownValues) elif aggregationMethod == 'min': return min(knownValues) elif aggregationMethod == 'avg_zero': if not neighborValues: raise InvalidAggregationMethod("Using avg_zero without neighborValues") values = [x or 0 for x in neighborValues] return float(sum(values)) / float(len(values)) elif aggregationMethod == 'absmax': return max(knownValues, key=abs) elif aggregationMethod == 'absmin': return min(knownValues, key=abs) else: raise InvalidAggregationMethod( "Unrecognized aggregation method %s" % aggregationMethod) def __propagate(fh, header, timestamp, higher, lower): aggregationMethod = header['aggregationMethod'] xff = header['xFilesFactor'] lowerIntervalStart = timestamp - (timestamp % lower['secondsPerPoint']) fh.seek(higher['offset']) packedPoint = fh.read(pointSize) try: (higherBaseInterval, higherBaseValue) = struct.unpack(pointFormat, packedPoint) except struct.error: raise CorruptWhisperFile("Unable to read base datapoint", fh.name) if higherBaseInterval == 0: higherFirstOffset = higher['offset'] else: timeDistance = lowerIntervalStart - higherBaseInterval pointDistance = timeDistance // higher['secondsPerPoint'] byteDistance = pointDistance * pointSize higherFirstOffset = higher['offset'] + (byteDistance % higher['size']) higherPoints = lower['secondsPerPoint'] // higher['secondsPerPoint'] higherSize = higherPoints * pointSize relativeFirstOffset = higherFirstOffset - higher['offset'] relativeLastOffset = (relativeFirstOffset + higherSize) % higher['size'] higherLastOffset = relativeLastOffset + higher['offset'] fh.seek(higherFirstOffset) if higherFirstOffset < higherLastOffset: # We don't wrap the archive seriesString = fh.read(higherLastOffset - higherFirstOffset) else: # We do wrap the archive higherEnd = higher['offset'] + higher['size'] seriesString = fh.read(higherEnd - higherFirstOffset) fh.seek(higher['offset']) seriesString += fh.read(higherLastOffset - higher['offset']) # Now we unpack the series data we just read byteOrder, pointTypes = pointFormat[0], pointFormat[1:] points = len(seriesString) // pointSize seriesFormat = byteOrder + (pointTypes * points) try: unpackedSeries = struct.unpack(seriesFormat, seriesString) except struct.error: raise CorruptWhisperFile("Unable to read datapoints", fh.name) # And finally we construct a list of values neighborValues = [None] * points currentInterval = lowerIntervalStart step = higher['secondsPerPoint'] for i in xrange(0, len(unpackedSeries), 2): pointTime = unpackedSeries[i] if pointTime == currentInterval: neighborValues[i // 2] = unpackedSeries[i + 1] currentInterval += step # Propagate aggregateValue to propagate from neighborValues if we have enough known points knownValues = [v for v in neighborValues if v is not None] if not knownValues: return False knownPercent = float(len(knownValues)) / float(len(neighborValues)) if knownPercent >= xff: # We have enough data to propagate a value! aggregateValue = aggregate(aggregationMethod, knownValues, neighborValues) myPackedPoint = struct.pack(pointFormat, lowerIntervalStart, aggregateValue) fh.seek(lower['offset']) packedPoint = fh.read(pointSize) try: (lowerBaseInterval, lowerBaseValue) = struct.unpack(pointFormat, packedPoint) except struct.error: raise CorruptWhisperFile("Unable to read base datapoint", fh.name) if lowerBaseInterval == 0: # First propagated update to this lower archive fh.seek(lower['offset']) fh.write(myPackedPoint) else: # Not our first propagated update to this lower archive timeDistance = lowerIntervalStart - lowerBaseInterval pointDistance = timeDistance // lower['secondsPerPoint'] byteDistance = pointDistance * pointSize lowerOffset = lower['offset'] + (byteDistance % lower['size']) fh.seek(lowerOffset) fh.write(myPackedPoint) return True else: return False def update(path, value, timestamp=None, now=None): """ update(path, value, timestamp=None) path is a string value is a float timestamp is either an int or float """ value = float(value) with open(path, 'r+b', BUFFERING) as fh: if CAN_FADVISE and FADVISE_RANDOM: posix_fadvise(fh.fileno(), 0, 0, POSIX_FADV_RANDOM) return file_update(fh, value, timestamp, now) def file_update(fh, value, timestamp, now=None): if LOCK: fcntl.flock(fh.fileno(), fcntl.LOCK_EX) header = __readHeader(fh) if now is None: now = int(time.time()) if timestamp is None: timestamp = now timestamp = int(timestamp) diff = now - timestamp if not ((diff < header['maxRetention']) and diff >= 0): raise TimestampNotCovered( "Timestamp not covered by any archives in this database.") # Find the highest-precision archive that covers timestamp for i, archive in enumerate(header['archives']): if archive['retention'] < diff: continue # We'll pass on the update to these lower precision archives later lowerArchives = header['archives'][i + 1:] break # First we update the highest-precision archive myInterval = timestamp - (timestamp % archive['secondsPerPoint']) myPackedPoint = struct.pack(pointFormat, myInterval, value) fh.seek(archive['offset']) packedPoint = fh.read(pointSize) try: (baseInterval, baseValue) = struct.unpack(pointFormat, packedPoint) except struct.error: raise CorruptWhisperFile("Unable to read base datapoint", fh.name) if baseInterval == 0: # This file's first update fh.seek(archive['offset']) fh.write(myPackedPoint) baseInterval = myInterval else: # Not our first update timeDistance = myInterval - baseInterval pointDistance = timeDistance // archive['secondsPerPoint'] byteDistance = pointDistance * pointSize myOffset = archive['offset'] + (byteDistance % archive['size']) fh.seek(myOffset) fh.write(myPackedPoint) # Now we propagate the update to lower-precision archives higher = archive for lower in lowerArchives: if not __propagate(fh, header, myInterval, higher, lower): break higher = lower if AUTOFLUSH: fh.flush() os.fsync(fh.fileno()) def update_many(path, points, now=None): """update_many(path,points) path is a string points is a list of (timestamp,value) points """ if not points: return points = [(int(t), float(v)) for (t, v) in points] points.sort(key=lambda p: p[0], reverse=True) # Order points by timestamp, newest first with open(path, 'r+b', BUFFERING) as fh: if CAN_FADVISE and FADVISE_RANDOM: posix_fadvise(fh.fileno(), 0, 0, POSIX_FADV_RANDOM) return file_update_many(fh, points, now) def file_update_many(fh, points, now=None): if LOCK: fcntl.flock(fh.fileno(), fcntl.LOCK_EX) header = __readHeader(fh) if now is None: now = int(time.time()) archives = iter(header['archives']) currentArchive = next(archives) currentPoints = [] for point in points: age = now - point[0] while currentArchive['retention'] < age: # We can't fit any more points in this archive if currentPoints: # Commit all the points we've found that it can fit currentPoints.reverse() # Put points in chronological order __archive_update_many(fh, header, currentArchive, currentPoints) currentPoints = [] try: currentArchive = next(archives) except StopIteration: currentArchive = None break if not currentArchive: break # Drop remaining points that don't fit in the database currentPoints.append(point) # Don't forget to commit after we've checked all the archives if currentArchive and currentPoints: currentPoints.reverse() __archive_update_many(fh, header, currentArchive, currentPoints) if AUTOFLUSH: fh.flush() os.fsync(fh.fileno()) def __archive_update_many(fh, header, archive, points): step = archive['secondsPerPoint'] alignedPoints = [(timestamp - (timestamp % step), value) for (timestamp, value) in points] # Create a packed string for each contiguous sequence of points packedStrings = [] previousInterval = None currentString = b"" lenAlignedPoints = len(alignedPoints) for i in xrange(0, lenAlignedPoints): # Take last point in run of points with duplicate intervals if i + 1 < lenAlignedPoints and alignedPoints[i][0] == alignedPoints[i + 1][0]: continue (interval, value) = alignedPoints[i] if (not previousInterval) or (interval == previousInterval + step): currentString += struct.pack(pointFormat, interval, value) previousInterval = interval else: numberOfPoints = len(currentString) // pointSize startInterval = previousInterval - (step * (numberOfPoints - 1)) packedStrings.append((startInterval, currentString)) currentString = struct.pack(pointFormat, interval, value) previousInterval = interval if currentString: numberOfPoints = len(currentString) // pointSize startInterval = previousInterval - (step * (numberOfPoints - 1)) packedStrings.append((startInterval, currentString)) # Read base point and determine where our writes will start fh.seek(archive['offset']) packedBasePoint = fh.read(pointSize) try: (baseInterval, baseValue) = struct.unpack(pointFormat, packedBasePoint) except struct.error: raise CorruptWhisperFile("Unable to read base datapoint", fh.name) if baseInterval == 0: # This file's first update # Use our first string as the base, so we start at the start baseInterval = packedStrings[0][0] # Write all of our packed strings in locations determined by the baseInterval for (interval, packedString) in packedStrings: timeDistance = interval - baseInterval pointDistance = timeDistance // step byteDistance = pointDistance * pointSize myOffset = archive['offset'] + (byteDistance % archive['size']) fh.seek(myOffset) archiveEnd = archive['offset'] + archive['size'] bytesBeyond = (myOffset + len(packedString)) - archiveEnd if bytesBeyond > 0: fh.write(packedString[:-bytesBeyond]) assert fh.tell() == archiveEnd, ( "archiveEnd=%d fh.tell=%d bytesBeyond=%d len(packedString)=%d" % (archiveEnd, fh.tell(), bytesBeyond, len(packedString)) ) fh.seek(archive['offset']) # Safe because it can't exceed the archive (retention checking logic above) fh.write(packedString[-bytesBeyond:]) else: fh.write(packedString) # Now we propagate the updates to lower-precision archives higher = archive lowerArchives = [arc for arc in header['archives'] if arc['secondsPerPoint'] > archive['secondsPerPoint']] for lower in lowerArchives: def fit(i): return i - (i % lower['secondsPerPoint']) lowerIntervals = [fit(p[0]) for p in alignedPoints] uniqueLowerIntervals = set(lowerIntervals) propagateFurther = False for interval in uniqueLowerIntervals: if __propagate(fh, header, interval, higher, lower): propagateFurther = True if not propagateFurther: break higher = lower def info(path): """ info(path) path is a string """ try: with open(path, 'rb') as fh: return __readHeader(fh) except (IOError, OSError): pass return None def fetch(path, fromTime, untilTime=None, now=None, archiveToSelect=None): """fetch(path,fromTime,untilTime=None,archiveToSelect=None) path is a string fromTime is an epoch time untilTime is also an epoch time, but defaults to now. archiveToSelect is the requested granularity, but defaults to None. Returns a tuple of (timeInfo, valueList) where timeInfo is itself a tuple of (fromTime, untilTime, step) Returns None if no data can be returned """ with open(path, 'rb') as fh: return file_fetch(fh, fromTime, untilTime, now, archiveToSelect) def file_fetch(fh, fromTime, untilTime, now=None, archiveToSelect=None): header = __readHeader(fh) if now is None: now = int(time.time()) if untilTime is None: untilTime = now fromTime = int(fromTime) untilTime = int(untilTime) # Here we try and be flexible and return as much data as we can. # If the range of data is from too far in the past or fully in the future, we # return nothing if fromTime > untilTime: raise InvalidTimeInterval( "Invalid time interval: from time '%s' is after until time '%s'" % (fromTime, untilTime)) oldestTime = now - header['maxRetention'] # Range is in the future if fromTime > now: return None # Range is beyond retention if untilTime < oldestTime: return None # Range requested is partially beyond retention, adjust if fromTime < oldestTime: fromTime = oldestTime # Range is partially in the future, adjust if untilTime > now: untilTime = now diff = now - fromTime # Parse granularity if requested if archiveToSelect: retentionStr = str(archiveToSelect) + ":1" archiveToSelect = parseRetentionDef(retentionStr)[0] for archive in header['archives']: if archiveToSelect: if archive['secondsPerPoint'] == archiveToSelect: break archive = None else: if archive['retention'] >= diff: break if archiveToSelect and not archive: raise ValueError("Invalid granularity: %s" % (archiveToSelect)) return __archive_fetch(fh, archive, fromTime, untilTime) def __archive_fetch(fh, archive, fromTime, untilTime): """ Fetch data from a single archive. Note that checks for validity of the time period requested happen above this level so it's possible to wrap around the archive on a read and request data older than the archive's retention """ step = archive['secondsPerPoint'] fromInterval = int(fromTime - (fromTime % step)) + step untilInterval = int(untilTime - (untilTime % step)) + step if fromInterval == untilInterval: # Zero-length time range: always include the next point untilInterval += step fh.seek(archive['offset']) packedPoint = fh.read(pointSize) try: (baseInterval, baseValue) = struct.unpack(pointFormat, packedPoint) except struct.error: raise CorruptWhisperFile("Unable to read base datapoint", fh.name) if baseInterval == 0: points = (untilInterval - fromInterval) // step timeInfo = (fromInterval, untilInterval, step) valueList = [None] * points return (timeInfo, valueList) # Determine fromOffset timeDistance = fromInterval - baseInterval pointDistance = timeDistance // step byteDistance = pointDistance * pointSize fromOffset = archive['offset'] + (byteDistance % archive['size']) # Determine untilOffset timeDistance = untilInterval - baseInterval pointDistance = timeDistance // step byteDistance = pointDistance * pointSize untilOffset = archive['offset'] + (byteDistance % archive['size']) # Read all the points in the interval fh.seek(fromOffset) if fromOffset < untilOffset: # If we don't wrap around the archive seriesString = fh.read(untilOffset - fromOffset) else: # We do wrap around the archive, so we need two reads archiveEnd = archive['offset'] + archive['size'] seriesString = fh.read(archiveEnd - fromOffset) fh.seek(archive['offset']) seriesString += fh.read(untilOffset - archive['offset']) # Now we unpack the series data we just read (anything faster than unpack?) byteOrder, pointTypes = pointFormat[0], pointFormat[1:] points = len(seriesString) // pointSize seriesFormat = byteOrder + (pointTypes * points) try: unpackedSeries = struct.unpack(seriesFormat, seriesString) except struct.error: raise CorruptWhisperFile("Unable to read datapoints", fh.name) # And finally we construct a list of values (optimize this!) valueList = [None] * points # Pre-allocate entire list for speed currentInterval = fromInterval for i in xrange(0, len(unpackedSeries), 2): pointTime = unpackedSeries[i] if pointTime == currentInterval: pointValue = unpackedSeries[i + 1] valueList[i // 2] = pointValue # In-place reassignment is faster than append() currentInterval += step timeInfo = (fromInterval, untilInterval, step) return (timeInfo, valueList) def merge(path_from, path_to, time_from=None, time_to=None, now=None): """ Merges the data from one whisper file into another. Each file must have the same archive configuration. time_from and time_to can optionally be specified for the merge. """ # Python 2.7 will allow the following commented line # with open(path_from, 'rb') as fh_from, open(path_to, 'rb+') as fh_to: # But with Python 2.6 we need to use this (I prefer not to introduce # contextlib.nested just for this): with open(path_from, 'rb') as fh_from: with open(path_to, 'rb+') as fh_to: return file_merge(fh_from, fh_to, time_from, time_to, now) def file_merge(fh_from, fh_to, time_from=None, time_to=None, now=None): headerFrom = __readHeader(fh_from) headerTo = __readHeader(fh_to) if headerFrom['archives'] != headerTo['archives']: raise NotImplementedError( "%s and %s archive configurations are unalike. " "Resize the input before merging" % (fh_from.name, fh_to.name)) if now is None: now = int(time.time()) if (time_to is not None): untilTime = time_to else: untilTime = now if (time_from is not None): fromTime = time_from else: fromTime = 0 # Sanity check: do not mix the from/to values. if untilTime < fromTime: raise ValueError("time_to must be >= time_from") archives = headerFrom['archives'] archives.sort(key=operator.itemgetter('retention')) for archive in archives: archiveFrom = fromTime archiveTo = untilTime if archiveFrom < now - archive['retention']: archiveFrom = now - archive['retention'] # if untilTime is too old, skip this archive if archiveTo < now - archive['retention']: continue (timeInfo, values) = __archive_fetch(fh_from, archive, archiveFrom, archiveTo) (start, end, archive_step) = timeInfo pointsToWrite = list(ifilter( lambda points: points[1] is not None, izip(xrange(start, end, archive_step), values))) # skip if there are no points to write if len(pointsToWrite) == 0: continue __archive_update_many(fh_to, headerTo, archive, pointsToWrite) def diff(path_from, path_to, ignore_empty=False, until_time=None, now=None): """ Compare two whisper databases. Each file must have the same archive configuration """ with open(path_from, 'rb') as fh_from: with open(path_to, 'rb') as fh_to: return file_diff(fh_from, fh_to, ignore_empty, until_time, now) def file_diff(fh_from, fh_to, ignore_empty=False, until_time=None, now=None): headerFrom = __readHeader(fh_from) headerTo = __readHeader(fh_to) if headerFrom['archives'] != headerTo['archives']: # TODO: Add specific whisper-resize commands to right size things raise NotImplementedError( "%s and %s archive configurations are unalike. " "Resize the input before diffing" % (fh_from.name, fh_to.name)) archives = headerFrom['archives'] archives.sort(key=operator.itemgetter('retention')) archive_diffs = [] if now is None: now = int(time.time()) if until_time: untilTime = until_time else: untilTime = now for archive_number, archive in enumerate(archives): diffs = [] startTime = now - archive['retention'] (fromTimeInfo, fromValues) = \ __archive_fetch(fh_from, archive, startTime, untilTime) (toTimeInfo, toValues) = __archive_fetch(fh_to, archive, startTime, untilTime) (start, end, archive_step) = \ (min(fromTimeInfo[0], toTimeInfo[0]), max(fromTimeInfo[1], toTimeInfo[1]), min(fromTimeInfo[2], toTimeInfo[2])) points = map(lambda s: (s * archive_step + start, fromValues[s], toValues[s]), xrange(0, (end - start) // archive_step)) if ignore_empty: points = [p for p in points if p[1] is not None and p[2] is not None] else: points = [p for p in points if p[1] is not None or p[2] is not None] diffs = [p for p in points if p[1] != p[2]] archive_diffs.append((archive_number, diffs, points.__len__())) untilTime = min(startTime, untilTime) return archive_diffs