carbon-1.0.2/0000755000000000000000000000000013131244747012722 5ustar rootroot00000000000000carbon-1.0.2/bin/0000755000000000000000000000000013131244747013472 5ustar rootroot00000000000000carbon-1.0.2/bin/carbon-aggregator.py0000755000000000000000000000210113131244455017421 0ustar rootroot00000000000000#!/usr/bin/env python """Copyright 2009 Chris Davis Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.""" import sys import os.path # Figure out where we're installed BIN_DIR = os.path.dirname(os.path.abspath(__file__)) ROOT_DIR = os.path.dirname(BIN_DIR) # Make sure that carbon's 'lib' dir is in the $PYTHONPATH if we're running from # source. LIB_DIR = os.path.join(ROOT_DIR, "lib") sys.path.insert(0, LIB_DIR) from carbon.util import run_twistd_plugin from carbon.exceptions import CarbonConfigException try: run_twistd_plugin(__file__) except CarbonConfigException, exc: raise SystemExit(str(exc)) carbon-1.0.2/bin/carbon-cache.py0000755000000000000000000000210113131244455016342 0ustar rootroot00000000000000#!/usr/bin/env python """Copyright 2009 Chris Davis Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.""" import sys import os.path # Figure out where we're installed BIN_DIR = os.path.dirname(os.path.abspath(__file__)) ROOT_DIR = os.path.dirname(BIN_DIR) # Make sure that carbon's 'lib' dir is in the $PYTHONPATH if we're running from # source. LIB_DIR = os.path.join(ROOT_DIR, "lib") sys.path.insert(0, LIB_DIR) from carbon.util import run_twistd_plugin from carbon.exceptions import CarbonConfigException try: run_twistd_plugin(__file__) except CarbonConfigException, exc: raise SystemExit(str(exc)) carbon-1.0.2/bin/carbon-client.py0000755000000000000000000001061413131244455016565 0ustar rootroot00000000000000#!/usr/bin/env python """Copyright 2009 Chris Davis Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.""" import sys import imp from os.path import dirname, join, abspath, exists from optparse import OptionParser # Figure out where we're installed BIN_DIR = dirname(abspath(__file__)) ROOT_DIR = dirname(BIN_DIR) CONF_DIR = join(ROOT_DIR, 'conf') default_relayrules = join(CONF_DIR, 'relay-rules.conf') # Make sure that carbon's 'lib' dir is in the $PYTHONPATH if we're running from # source. LIB_DIR = join(ROOT_DIR, 'lib') sys.path.insert(0, LIB_DIR) try: from twisted.internet import epollreactor epollreactor.install() except ImportError: pass from twisted.internet import stdio, reactor, defer from twisted.protocols.basic import LineReceiver from carbon.routers import ConsistentHashingRouter, RelayRulesRouter from carbon.client import CarbonClientManager from carbon import log, events option_parser = OptionParser(usage="%prog [options] ...") option_parser.add_option('--debug', action='store_true', help="Log debug info to stdout") option_parser.add_option('--keyfunc', help="Use a custom key function (path/to/module.py:myFunc)") option_parser.add_option('--replication', type='int', default=1, help='Replication factor') option_parser.add_option('--routing', default='consistent-hashing', help='Routing method: "consistent-hashing" (default) or "relay"') option_parser.add_option('--diverse-replicas', action='store_true', help="Spread replicas across diff. servers") option_parser.add_option('--relayrules', default=default_relayrules, help='relay-rules.conf file to use for relay routing') options, args = option_parser.parse_args() if not args: print 'At least one host:port destination required\n' option_parser.print_usage() raise SystemExit(1) if options.routing not in ('consistent-hashing', 'relay'): print "Invalid --routing value, must be one of:" print " consistent-hashing" print " relay" raise SystemExit(1) destinations = [] for arg in args: parts = arg.split(':', 2) host = parts[0] port = int(parts[1]) if len(parts) > 2: instance = parts[2] else: instance = None destinations.append( (host, port, instance) ) if options.debug: log.logToStdout() log.setDebugEnabled(True) defer.setDebugging(True) if options.routing == 'consistent-hashing': router = ConsistentHashingRouter(options.replication, diverse_replicas=options.diverse_replicas) elif options.routing == 'relay': if exists(options.relayrules): router = RelayRulesRouter(options.relayrules) else: print "relay rules file %s does not exist" % options.relayrules raise SystemExit(1) client_manager = CarbonClientManager(router) reactor.callWhenRunning(client_manager.startService) if options.keyfunc: router.setKeyFunctionFromModule(options.keyfunc) firstConnectAttempts = [client_manager.startClient(dest) for dest in destinations] firstConnectsAttempted = defer.DeferredList(firstConnectAttempts) class StdinMetricsReader(LineReceiver): delimiter = '\n' def lineReceived(self, line): #log.msg("[DEBUG] lineReceived(): %s" % line) try: (metric, value, timestamp) = line.split() datapoint = (float(timestamp), float(value)) assert datapoint[1] == datapoint[1] # filter out NaNs client_manager.sendDatapoint(metric, datapoint) except ValueError: log.err(None, 'Dropping invalid line: %s' % line) def connectionLost(self, reason): log.msg('stdin disconnected') def startShutdown(results): log.msg("startShutdown(%s)" % str(results)) allStopped = client_manager.stopAllClients() allStopped.addCallback(shutdown) firstConnectsAttempted.addCallback(startShutdown) stdio.StandardIO( StdinMetricsReader() ) exitCode = 0 def shutdown(results): global exitCode for success, result in results: if not success: exitCode = 1 break if reactor.running: reactor.stop() reactor.run() raise SystemExit(exitCode) carbon-1.0.2/bin/validate-storage-schemas.py0000755000000000000000000000430013131244455020714 0ustar rootroot00000000000000#!/usr/bin/env python """Copyright 2009 Chris Davis Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.""" import sys import whisper from os.path import dirname, exists, join, realpath from ConfigParser import ConfigParser if len(sys.argv) == 2: SCHEMAS_FILE = sys.argv[1] print "Loading storage-schemas configuration from: '%s'" % SCHEMAS_FILE else: SCHEMAS_FILE = realpath(join(dirname(__file__), '..', 'conf', 'storage-schemas.conf')) print "Loading storage-schemas configuration from default location at: '%s'" % SCHEMAS_FILE config_parser = ConfigParser() if not config_parser.read(SCHEMAS_FILE): raise SystemExit("Error: Couldn't read config file: %s" % SCHEMAS_FILE) errors_found = 0 for section in config_parser.sections(): print "Section '%s':" % section options = dict(config_parser.items(section)) retentions = options['retentions'].split(',') archives = [] section_failed = False for retention in retentions: try: archives.append(whisper.parseRetentionDef(retention)) except ValueError, e: print " - Error: Section '%s' contains an invalid item in its retention definition ('%s')" % \ (section, retention) print " %s" % e.message section_failed = True if not section_failed: try: whisper.validateArchiveList(archives) except whisper.InvalidConfiguration, e: print " - Error: Section '%s' contains an invalid retention definition ('%s')" % \ (section, ','.join(retentions)) print " %s" % e.message if section_failed: errors_found += 1 else: print " OK" if errors_found: raise SystemExit( "Storage-schemas configuration '%s' failed validation" % SCHEMAS_FILE) print "Storage-schemas configuration '%s' is valid" % SCHEMAS_FILE carbon-1.0.2/bin/carbon-relay.py0000755000000000000000000000210113131244455016413 0ustar rootroot00000000000000#!/usr/bin/env python """Copyright 2009 Chris Davis Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.""" import sys import os.path # Figure out where we're installed BIN_DIR = os.path.dirname(os.path.abspath(__file__)) ROOT_DIR = os.path.dirname(BIN_DIR) # Make sure that carbon's 'lib' dir is in the $PYTHONPATH if we're running from # source. LIB_DIR = os.path.join(ROOT_DIR, "lib") sys.path.insert(0, LIB_DIR) from carbon.util import run_twistd_plugin from carbon.exceptions import CarbonConfigException try: run_twistd_plugin(__file__) except CarbonConfigException, exc: raise SystemExit(str(exc)) carbon-1.0.2/conf/0000755000000000000000000000000013131244747013647 5ustar rootroot00000000000000carbon-1.0.2/conf/whitelist.conf.example0000644000000000000000000000047313131244450020157 0ustar rootroot00000000000000# This file takes a single regular expression per line # If USE_WHITELIST is set to True in carbon.conf, only metrics received which # match one of these expressions will be persisted. If this file is empty or # missing, all metrics will pass through. # This file is reloaded automatically when changes are made .* carbon-1.0.2/conf/storage-aggregation.conf.example0000644000000000000000000000147313131244455022102 0ustar rootroot00000000000000# Aggregation methods for whisper files. Entries are scanned in order, # and first match wins. This file is scanned for changes every 60 seconds # # [name] # pattern = # xFilesFactor = # aggregationMethod = # # name: Arbitrary unique name for the rule # pattern: Regex pattern to match against the metric name # xFilesFactor: Ratio of valid data points required for aggregation to the next retention to occur # aggregationMethod: function to apply to data points for aggregation # [min] pattern = \.min$ xFilesFactor = 0.1 aggregationMethod = min [max] pattern = \.max$ xFilesFactor = 0.1 aggregationMethod = max [sum] pattern = \.count$ xFilesFactor = 0 aggregationMethod = sum [default_average] pattern = .* xFilesFactor = 0.5 aggregationMethod = average carbon-1.0.2/conf/aggregation-rules.conf.example0000644000000000000000000000340613131244450021561 0ustar rootroot00000000000000# The form of each line in this file should be as follows: # # output_template (frequency) = method input_pattern # # This will capture any received metrics that match 'input_pattern' # for calculating an aggregate metric. The calculation will occur # every 'frequency' seconds and the 'method' can specify 'sum' or # 'avg'. The name of the aggregate metric will be derived from # 'output_template' filling in any captured fields from 'input_pattern'. # # For example, if you're metric naming scheme is: # # .applications... # # You could configure some aggregations like so: # # .applications..all.requests (60) = sum .applications..*.requests # .applications..all.latency (60) = avg .applications..*.latency # # As an example, if the following metrics are received: # # prod.applications.apache.www01.requests # prod.applications.apache.www01.requests # # They would all go into the same aggregation buffer and after 60 seconds the # aggregate metric 'prod.applications.apache.all.requests' would be calculated # by summing their values. # # Template components such as will match everything up to the next dot. # To match metric multiple components including the dots, use <> in the # input template: # # .applications..all. (60) = sum .applications..*.<> # # It is also possible to use regular expressions. Following the example above # when using: # # .applications...requests (60) = sum .applications..\d{2}.requests # # You will end up with 'prod.applications.apache.www.requests' instead of # 'prod.applications.apache.all.requests'. # # Note that any time this file is modified, it will be re-read automatically. carbon-1.0.2/conf/rewrite-rules.conf.example0000644000000000000000000000105613131244450020752 0ustar rootroot00000000000000# This file defines regular expression patterns that can be used to # rewrite metric names in a search & replace fashion. It consists of two # sections, [pre] and [post]. The rules in the pre section are applied to # metric names as soon as they are received. The post rules are applied # after aggregation has taken place. # # The general form of each rule is as follows: # # regex-pattern = replacement-text # # For example: # # [post] # _sum$ = # _avg$ = # # These rules would strip off a suffix of _sum or _avg from any metric names # after aggregation. carbon-1.0.2/conf/storage-schemas.conf.example0000644000000000000000000000153213131244450021225 0ustar rootroot00000000000000# Schema definitions for Whisper files. Entries are scanned in order, # and first match wins. This file is scanned for changes every 60 seconds. # # Definition Syntax: # # [name] # pattern = regex # retentions = timePerPoint:timeToStore, timePerPoint:timeToStore, ... # # Remember: To support accurate aggregation from higher to lower resolution # archives, the precision of a longer retention archive must be # cleanly divisible by precision of next lower retention archive. # # Valid: 60s:7d,300s:30d (300/60 = 5) # Invalid: 180s:7d,300s:30d (300/180 = 3.333) # # Carbon's internal metrics. This entry should match what is specified in # CARBON_METRIC_PREFIX and CARBON_METRIC_INTERVAL settings [carbon] pattern = ^carbon\. retentions = 60:90d [default_1min_for_1day] pattern = .* retentions = 60s:1d carbon-1.0.2/conf/relay-rules.conf.example0000644000000000000000000000157013131244450020406 0ustar rootroot00000000000000# Relay destination rules for carbon-relay. Entries are scanned in order, # and the first pattern a metric matches will cause processing to cease after sending # unless `continue` is set to true # # [name] # pattern = # destinations = # continue = # default: False # # name: Arbitrary unique name to identify the rule # pattern: Regex pattern to match against the metric name # destinations: Comma-separated list of destinations. # ex: 127.0.0.1:2004:a, 10.1.2.4:2004, myserver.mydomain.com:2004 # continue: Continue processing rules if this rule matches (default: False) # You must have exactly one section with 'default = true' # Note that all destinations listed must also exist in carbon.conf # in the DESTINATIONS setting in the [relay] section [default] default = true destinations = 127.0.0.1:2004:a, 127.0.0.1:2104:b carbon-1.0.2/conf/carbon.conf.example0000644000000000000000000006270413131244455017421 0ustar rootroot00000000000000[cache] # Configure carbon directories. # # OS environment variables can be used to tell carbon where graphite is # installed, where to read configuration from and where to write data. # # GRAPHITE_ROOT - Root directory of the graphite installation. # Defaults to ../ # GRAPHITE_CONF_DIR - Configuration directory (where this file lives). # Defaults to $GRAPHITE_ROOT/conf/ # GRAPHITE_STORAGE_DIR - Storage directory for whisper/rrd/log/pid files. # Defaults to $GRAPHITE_ROOT/storage/ # # To change other directory paths, add settings to this file. The following # configuration variables are available with these default values: # # STORAGE_DIR = $GRAPHITE_STORAGE_DIR # LOCAL_DATA_DIR = %(STORAGE_DIR)s/whisper/ # WHITELISTS_DIR = %(STORAGE_DIR)s/lists/ # CONF_DIR = %(STORAGE_DIR)s/conf/ # LOG_DIR = %(STORAGE_DIR)s/log/ # PID_DIR = %(STORAGE_DIR)s/ # # For FHS style directory structures, use: # # STORAGE_DIR = /var/lib/carbon/ # CONF_DIR = /etc/carbon/ # LOG_DIR = /var/log/carbon/ # PID_DIR = /var/run/ # #LOCAL_DATA_DIR = /opt/graphite/storage/whisper/ # Specify the database library used to store metric data on disk. Each database # may have configurable options to change the behaviour of how it writes to # persistent storage. # # whisper - Fixed-size database, similar in design and purpose to RRD. This is # the default storage backend for carbon and the most rigorously tested. # # ceres - Experimental alternative database that supports storing data in sparse # files of arbitrary fixed-size resolutions. DATABASE = whisper # Enable daily log rotation. If disabled, a new file will be opened whenever the log file path no # longer exists (i.e. it is removed or renamed) ENABLE_LOGROTATION = True # Specify the user to drop privileges to # If this is blank carbon-cache runs as the user that invokes it # This user must have write access to the local data directory USER = # Limit the size of the cache to avoid swapping or becoming CPU bound. # Sorts and serving cache queries gets more expensive as the cache grows. # Use the value "inf" (infinity) for an unlimited cache size. # value should be an integer number of metric datapoints. MAX_CACHE_SIZE = inf # Limits the number of whisper update_many() calls per second, which effectively # means the number of write requests sent to the disk. This is intended to # prevent over-utilizing the disk and thus starving the rest of the system. # When the rate of required updates exceeds this, then carbon's caching will # take effect and increase the overall throughput accordingly. MAX_UPDATES_PER_SECOND = 500 # If defined, this changes the MAX_UPDATES_PER_SECOND in Carbon when a # stop/shutdown is initiated. This helps when MAX_UPDATES_PER_SECOND is # relatively low and carbon has cached a lot of updates; it enables the carbon # daemon to shutdown more quickly. # MAX_UPDATES_PER_SECOND_ON_SHUTDOWN = 1000 # Softly limits the number of whisper files that get created each minute. # Setting this value low (e.g. 50) is a good way to ensure that your carbon # system will not be adversely impacted when a bunch of new metrics are # sent to it. The trade off is that any metrics received in excess of this # value will be silently dropped, and the whisper file will not be created # until such point as a subsequent metric is received and fits within the # defined rate limit. Setting this value high (like "inf" for infinity) will # cause carbon to create the files quickly but at the risk of increased I/O. MAX_CREATES_PER_MINUTE = 50 # Set the minimum timestamp resolution supported by this instance. This allows # internal optimisations by overwriting points with equal truncated timestamps # in order to limit the number of updates to the database. It defaults to one # second. MIN_TIMESTAMP_RESOLUTION = 1 # Set the interface and port for the line (plain text) listener. Setting the # interface to 0.0.0.0 listens on all interfaces. Port can be set to 0 to # disable this listener if it is not required. LINE_RECEIVER_INTERFACE = 0.0.0.0 LINE_RECEIVER_PORT = 2003 # Set this to True to enable the UDP listener. By default this is off # because it is very common to run multiple carbon daemons and managing # another (rarely used) port for every carbon instance is not fun. ENABLE_UDP_LISTENER = False UDP_RECEIVER_INTERFACE = 0.0.0.0 UDP_RECEIVER_PORT = 2003 # Set the interface and port for the pickle listener. Setting the interface to # 0.0.0.0 listens on all interfaces. Port can be set to 0 to disable this # listener if it is not required. PICKLE_RECEIVER_INTERFACE = 0.0.0.0 PICKLE_RECEIVER_PORT = 2004 # Set the interface and port for the protobuf listener. Setting the interface to # 0.0.0.0 listens on all interfaces. Port can be set to 0 to disable this # listener if it is not required. # PROTOBUF_RECEIVER_INTERFACE = 0.0.0.0 # PROTOBUF_RECEIVER_PORT = 2005 # Limit the number of open connections the receiver can handle as any time. # Default is no limit. Setting up a limit for sites handling high volume # traffic may be recommended to avoid running out of TCP memory or having # thousands of TCP connections reduce the throughput of the service. #MAX_RECEIVER_CONNECTIONS = inf # Per security concerns outlined in Bug #817247 the pickle receiver # will use a more secure and slightly less efficient unpickler. # Set this to True to revert to the old-fashioned insecure unpickler. USE_INSECURE_UNPICKLER = False CACHE_QUERY_INTERFACE = 0.0.0.0 CACHE_QUERY_PORT = 7002 # Set this to False to drop datapoints received after the cache # reaches MAX_CACHE_SIZE. If this is True (the default) then sockets # over which metrics are received will temporarily stop accepting # data until the cache size falls below 95% MAX_CACHE_SIZE. USE_FLOW_CONTROL = True # If enabled this setting is used to timeout metric client connection if no # metrics have been sent in specified time in seconds #METRIC_CLIENT_IDLE_TIMEOUT = None # By default, carbon-cache will log every whisper update and cache hit. # This can be excessive and degrade performance if logging on the same # volume as the whisper data is stored. LOG_UPDATES = False LOG_CREATES = False LOG_CACHE_HITS = False LOG_CACHE_QUEUE_SORTS = False # The thread that writes metrics to disk can use one of the following strategies # determining the order in which metrics are removed from cache and flushed to # disk. The default option preserves the same behavior as has been historically # available in version 0.9.10. # # sorted - All metrics in the cache will be counted and an ordered list of # them will be sorted according to the number of datapoints in the cache at the # moment of the list's creation. Metrics will then be flushed from the cache to # disk in that order. # # timesorted - All metrics in the list will be looked at and sorted according # to the timestamp of there datapoints. The metric that were the least recently # written will be written first. This is an hybrid strategy between max and # sorted which is particularly adapted to sets of metrics with non-uniform # resolutions. # # max - The writer thread will always pop and flush the metric from cache # that has the most datapoints. This will give a strong flush preference to # frequently updated metrics and will also reduce random file-io. Infrequently # updated metrics may only ever be persisted to disk at daemon shutdown if # there are a large number of metrics which receive very frequent updates OR if # disk i/o is very slow. # # naive - Metrics will be flushed from the cache to disk in an unordered # fashion. This strategy may be desirable in situations where the storage for # whisper files is solid state, CPU resources are very limited or deference to # the OS's i/o scheduler is expected to compensate for the random write # pattern. # CACHE_WRITE_STRATEGY = sorted # On some systems it is desirable for whisper to write synchronously. # Set this option to True if you'd like to try this. Basically it will # shift the onus of buffering writes from the kernel into carbon's cache. WHISPER_AUTOFLUSH = False # By default new Whisper files are created pre-allocated with the data region # filled with zeros to prevent fragmentation and speed up contiguous reads and # writes (which are common). Enabling this option will cause Whisper to create # the file sparsely instead. Enabling this option may allow a large increase of # MAX_CREATES_PER_MINUTE but may have longer term performance implications # depending on the underlying storage configuration. # WHISPER_SPARSE_CREATE = False # Only beneficial on linux filesystems that support the fallocate system call. # It maintains the benefits of contiguous reads/writes, but with a potentially # much faster creation speed, by allowing the kernel to handle the block # allocation and zero-ing. Enabling this option may allow a large increase of # MAX_CREATES_PER_MINUTE. If enabled on an OS or filesystem that is unsupported # this option will gracefully fallback to standard POSIX file access methods. WHISPER_FALLOCATE_CREATE = True # Enabling this option will cause Whisper to lock each Whisper file it writes # to with an exclusive lock (LOCK_EX, see: man 2 flock). This is useful when # multiple carbon-cache daemons are writing to the same files. # WHISPER_LOCK_WRITES = False # On systems which has a large number of metrics, an amount of Whisper write(2)'s # pageback sometimes cause disk thrashing due to memory shortage, so that abnormal # disk reads occur. Enabling this option makes it possible to decrease useless # page cache memory by posix_fadvise(2) with POSIX_FADVISE_RANDOM option. # WHISPER_FADVISE_RANDOM = False # By default all nodes stored in Ceres are cached in memory to improve the # throughput of reads and writes to underlying slices. Turning this off will # greatly reduce memory consumption for databases with millions of metrics, at # the cost of a steep increase in disk i/o, approximately an extra two os.stat # calls for every read and write. Reasons to do this are if the underlying # storage can handle stat() with practically zero cost (SSD, NVMe, zRAM). # Valid values are: # all - all nodes are cached # none - node caching is disabled # CERES_NODE_CACHING_BEHAVIOR = all # Ceres nodes can have many slices and caching the right ones can improve # performance dramatically. Note that there are many trade-offs to tinkering # with this, and unless you are a ceres developer you *really* should not # mess with this. Valid values are: # latest - only the most recent slice is cached # all - all slices are cached # none - slice caching is disabled # CERES_SLICE_CACHING_BEHAVIOR = latest # If a Ceres node accumulates too many slices, performance can suffer. # This can be caused by intermittently reported data. To mitigate # slice fragmentation there is a tolerance for how much space can be # wasted within a slice file to avoid creating a new one. That tolerance # level is determined by MAX_SLICE_GAP, which is the number of consecutive # null datapoints allowed in a slice file. # If you set this very low, you will waste less of the *tiny* bit disk space # that this feature wastes, and you will be prone to performance problems # caused by slice fragmentation, which can be pretty severe. # If you set this really high, you will waste a bit more disk space (each # null datapoint wastes 8 bytes, but keep in mind your filesystem's block # size). If you suffer slice fragmentation issues, you should increase this or # run the ceres-maintenance defrag plugin more often. However you should not # set it to be huge because then if a large but allowed gap occurs it has to # get filled in, which means instead of a simple 8-byte write to a new file we # could end up doing an (8 * MAX_SLICE_GAP)-byte write to the latest slice. # CERES_MAX_SLICE_GAP = 80 # Enabling this option will cause Ceres to lock each Ceres file it writes to # to with an exclusive lock (LOCK_EX, see: man 2 flock). This is useful when # multiple carbon-cache daemons are writing to the same files. # CERES_LOCK_WRITES = False # Set this to True to enable whitelisting and blacklisting of metrics in # CONF_DIR/whitelist.conf and CONF_DIR/blacklist.conf. If the whitelist is # missing or empty, all metrics will pass through # USE_WHITELIST = False # By default, carbon itself will log statistics (such as a count, # metricsReceived) with the top level prefix of 'carbon' at an interval of 60 # seconds. Set CARBON_METRIC_INTERVAL to 0 to disable instrumentation # CARBON_METRIC_PREFIX = carbon # CARBON_METRIC_INTERVAL = 60 # Enable AMQP if you want to receve metrics using an amqp broker # ENABLE_AMQP = False # Verbose means a line will be logged for every metric received # useful for testing # AMQP_VERBOSE = False # AMQP_HOST = localhost # AMQP_PORT = 5672 # AMQP_VHOST = / # AMQP_USER = guest # AMQP_PASSWORD = guest # AMQP_EXCHANGE = graphite # AMQP_METRIC_NAME_IN_BODY = False # The manhole interface allows you to SSH into the carbon daemon # and get a python interpreter. BE CAREFUL WITH THIS! If you do # something like time.sleep() in the interpreter, the whole process # will sleep! This is *extremely* helpful in debugging, assuming # you are familiar with the code. If you are not, please don't # mess with this, you are asking for trouble :) # # ENABLE_MANHOLE = False # MANHOLE_INTERFACE = 127.0.0.1 # MANHOLE_PORT = 7222 # MANHOLE_USER = admin # MANHOLE_PUBLIC_KEY = ssh-rsa AAAAB3NzaC1yc2EAAAABiwAaAIEAoxN0sv/e4eZCPpi3N3KYvyzRaBaMeS2RsOQ/cDuKv11dlNzVeiyc3RFmCv5Rjwn/lQ79y0zyHxw67qLyhQ/kDzINc4cY41ivuQXm2tPmgvexdrBv5nsfEpjs3gLZfJnyvlcVyWK/lId8WUvEWSWHTzsbtmXAF2raJMdgLTbQ8wE= # Patterns for all of the metrics this machine will store. Read more at # http://en.wikipedia.org/wiki/Advanced_Message_Queuing_Protocol#Bindings # # Example: store all sales, linux servers, and utilization metrics # BIND_PATTERNS = sales.#, servers.linux.#, #.utilization # # Example: store everything # BIND_PATTERNS = # # To configure special settings for the carbon-cache instance 'b', uncomment this: #[cache:b] #LINE_RECEIVER_PORT = 2103 #PICKLE_RECEIVER_PORT = 2104 #CACHE_QUERY_PORT = 7102 # and any other settings you want to customize, defaults are inherited # from the [cache] section. # You can then specify the --instance=b option to manage this instance # # In order to turn off logging of successful connections for the line # receiver, set this to False # LOG_LISTENER_CONN_SUCCESS = True [relay] LINE_RECEIVER_INTERFACE = 0.0.0.0 LINE_RECEIVER_PORT = 2013 PICKLE_RECEIVER_INTERFACE = 0.0.0.0 PICKLE_RECEIVER_PORT = 2014 # Carbon-relay has several options for metric routing controlled by RELAY_METHOD # # Use relay-rules.conf to route metrics to destinations based on pattern rules #RELAY_METHOD = rules # # Use consistent-hashing for even distribution of metrics between destinations #RELAY_METHOD = consistent-hashing # # Use consistent-hashing but take into account an aggregation-rules.conf shared # by downstream carbon-aggregator daemons. This will ensure that all metrics # that map to a given aggregation rule are sent to the same carbon-aggregator # instance. # Enable this for carbon-relays that send to a group of carbon-aggregators #RELAY_METHOD = aggregated-consistent-hashing # # You can also use fast-hashing and fast-aggregated-hashing which are in O(1) # and will always redirect the metrics to the same destination but do not try # to minimize rebalancing when the list of destinations is changing. RELAY_METHOD = rules # If you use consistent-hashing you can add redundancy by replicating every # datapoint to more than one machine. REPLICATION_FACTOR = 1 # For REPLICATION_FACTOR >=2, set DIVERSE_REPLICAS to True to guarantee replicas # across distributed hosts. With this setting disabled, it's possible that replicas # may be sent to different caches on the same host. This has been the default # behavior since introduction of 'consistent-hashing' relay method. # Note that enabling this on an existing pre-0.9.14 cluster will require rebalancing # your metrics across the cluster nodes using a tool like Carbonate. #DIVERSE_REPLICAS = True # This is a list of carbon daemons we will send any relayed or # generated metrics to. The default provided would send to a single # carbon-cache instance on the default port. However if you # use multiple carbon-cache instances then it would look like this: # # DESTINATIONS = 127.0.0.1:2004:a, 127.0.0.1:2104:b # # The general form is IP:PORT:INSTANCE where the :INSTANCE part is # optional and refers to the "None" instance if omitted. # # Note that if the destinations are all carbon-caches then this should # exactly match the webapp's CARBONLINK_HOSTS setting in terms of # instances listed (order matters!). # # If using RELAY_METHOD = rules, all destinations used in relay-rules.conf # must be defined in this list DESTINATIONS = 127.0.0.1:2004 # This define the protocol to use to contact the destination. It can be # set to one of "line", "pickle", "udp" and "protobuf". This list can be # extended with CarbonClientFactory plugins and defaults to "pickle". # DESTINATION_PROTOCOL = pickle # This is the maximum number of datapoints that can be queued up # for a single destination. Once this limit is hit, we will # stop accepting new data if USE_FLOW_CONTROL is True, otherwise # we will drop any subsequently received datapoints. MAX_QUEUE_SIZE = 10000 # This defines the maximum "message size" between carbon daemons. If # your queue is large, setting this to a lower number will cause the # relay to forward smaller discrete chunks of stats, which may prevent # overloading on the receiving side after a disconnect. MAX_DATAPOINTS_PER_MESSAGE = 500 # Limit the number of open connections the receiver can handle as any time. # Default is no limit. Setting up a limit for sites handling high volume # traffic may be recommended to avoid running out of TCP memory or having # thousands of TCP connections reduce the throughput of the service. #MAX_RECEIVER_CONNECTIONS = inf # Specify the user to drop privileges to # If this is blank carbon-relay runs as the user that invokes it # USER = # This is the percentage that the queue must be empty before it will accept # more messages. For a larger site, if the queue is very large it makes sense # to tune this to allow for incoming stats. So if you have an average # flow of 100k stats/minute, and a MAX_QUEUE_SIZE of 3,000,000, it makes sense # to allow stats to start flowing when you've cleared the queue to 95% since # you should have space to accommodate the next minute's worth of stats # even before the relay incrementally clears more of the queue QUEUE_LOW_WATERMARK_PCT = 0.8 # To allow for batch efficiency from the pickle protocol and to benefit from # other batching advantages, all writes are deferred by putting them into a queue, # and then the queue is flushed and sent a small fraction of a second later. TIME_TO_DEFER_SENDING = 0.0001 # Set this to False to drop datapoints when any send queue (sending datapoints # to a downstream carbon daemon) hits MAX_QUEUE_SIZE. If this is True (the # default) then sockets over which metrics are received will temporarily stop accepting # data until the send queues fall below QUEUE_LOW_WATERMARK_PCT * MAX_QUEUE_SIZE. USE_FLOW_CONTROL = True # If enabled this setting is used to timeout metric client connection if no # metrics have been sent in specified time in seconds #METRIC_CLIENT_IDLE_TIMEOUT = None # Set this to True to enable whitelisting and blacklisting of metrics in # CONF_DIR/whitelist.conf and CONF_DIR/blacklist.conf. If the whitelist is # missing or empty, all metrics will pass through # USE_WHITELIST = False # By default, carbon itself will log statistics (such as a count, # metricsReceived) with the top level prefix of 'carbon' at an interval of 60 # seconds. Set CARBON_METRIC_INTERVAL to 0 to disable instrumentation # CARBON_METRIC_PREFIX = carbon # CARBON_METRIC_INTERVAL = 60 # # In order to turn off logging of successful connections for the line # receiver, set this to False # LOG_LISTENER_CONN_SUCCESS = True # If you're connecting from the relay to a destination that's over the # internet or similarly iffy connection, a backlog can develop because # of internet weather conditions, e.g. acks getting lost or similar issues. # To deal with that, you can enable USE_RATIO_RESET which will let you # re-set the connection to an individual destination. Defaults to being off. USE_RATIO_RESET=False # When there is a small number of stats flowing, it's not desirable to # perform any actions based on percentages - it's just too "twitchy". MIN_RESET_STAT_FLOW=1000 # When the ratio of stats being sent in a reporting interval is far # enough from 1.0, we will disconnect the socket and reconnecto to # clear out queued stats. The default ratio of 0.9 indicates that 10% # of stats aren't being delivered within one CARBON_METRIC_INTERVAL # (default of 60 seconds), which can lead to a queue backup. Under # some circumstances re-setting the connection can fix this, so # set this according to your tolerance, and look in the logs for # "resetConnectionForQualityReasons" to observe whether this is kicking # in when your sent queue is building up. MIN_RESET_RATIO=0.9 # The minimum time between resets. When a connection is re-set, we # need to wait before another reset is performed. # (2*CARBON_METRIC_INTERVAL) + 1 second is the minimum time needed # before stats for the new connection will be available. Setting this # below (2*CARBON_METRIC_INTERVAL) + 1 second will result in a lot of # reset connections for no good reason. MIN_RESET_INTERVAL=121 [aggregator] LINE_RECEIVER_INTERFACE = 0.0.0.0 LINE_RECEIVER_PORT = 2023 PICKLE_RECEIVER_INTERFACE = 0.0.0.0 PICKLE_RECEIVER_PORT = 2024 # If set true, metric received will be forwarded to DESTINATIONS in addition to # the output of the aggregation rules. If set false the carbon-aggregator will # only ever send the output of aggregation. FORWARD_ALL = True # Filenames of the configuration files to use for this instance of aggregator. # Filenames are relative to CONF_DIR. # # AGGREGATION_RULES = aggregation-rules.conf # REWRITE_RULES = rewrite-rules.conf # This is a list of carbon daemons we will send any relayed or # generated metrics to. The default provided would send to a single # carbon-cache instance on the default port. However if you # use multiple carbon-cache instances then it would look like this: # # DESTINATIONS = 127.0.0.1:2004:a, 127.0.0.1:2104:b # # The format is comma-delimited IP:PORT:INSTANCE where the :INSTANCE part is # optional and refers to the "None" instance if omitted. # # Note that if the destinations are all carbon-caches then this should # exactly match the webapp's CARBONLINK_HOSTS setting in terms of # instances listed (order matters!). DESTINATIONS = 127.0.0.1:2004 # If you want to add redundancy to your data by replicating every # datapoint to more than one machine, increase this. REPLICATION_FACTOR = 1 # This is the maximum number of datapoints that can be queued up # for a single destination. Once this limit is hit, we will # stop accepting new data if USE_FLOW_CONTROL is True, otherwise # we will drop any subsequently received datapoints. MAX_QUEUE_SIZE = 10000 # Set this to False to drop datapoints when any send queue (sending datapoints # to a downstream carbon daemon) hits MAX_QUEUE_SIZE. If this is True (the # default) then sockets over which metrics are received will temporarily stop accepting # data until the send queues fall below 80% MAX_QUEUE_SIZE. USE_FLOW_CONTROL = True # If enabled this setting is used to timeout metric client connection if no # metrics have been sent in specified time in seconds #METRIC_CLIENT_IDLE_TIMEOUT = None # This defines the maximum "message size" between carbon daemons. # You shouldn't need to tune this unless you really know what you're doing. MAX_DATAPOINTS_PER_MESSAGE = 500 # This defines how many datapoints the aggregator remembers for # each metric. Aggregation only happens for datapoints that fall in # the past MAX_AGGREGATION_INTERVALS * intervalSize seconds. MAX_AGGREGATION_INTERVALS = 5 # Limit the number of open connections the receiver can handle as any time. # Default is no limit. Setting up a limit for sites handling high volume # traffic may be recommended to avoid running out of TCP memory or having # thousands of TCP connections reduce the throughput of the service. #MAX_RECEIVER_CONNECTIONS = inf # By default (WRITE_BACK_FREQUENCY = 0), carbon-aggregator will write back # aggregated data points once every rule.frequency seconds, on a per-rule basis. # Set this (WRITE_BACK_FREQUENCY = N) to write back all aggregated data points # every N seconds, independent of rule frequency. This is useful, for example, # to be able to query partially aggregated metrics from carbon-cache without # having to first wait rule.frequency seconds. # WRITE_BACK_FREQUENCY = 0 # Set this to True to enable whitelisting and blacklisting of metrics in # CONF_DIR/whitelist.conf and CONF_DIR/blacklist.conf. If the whitelist is # missing or empty, all metrics will pass through # USE_WHITELIST = False # By default, carbon itself will log statistics (such as a count, # metricsReceived) with the top level prefix of 'carbon' at an interval of 60 # seconds. Set CARBON_METRIC_INTERVAL to 0 to disable instrumentation # CARBON_METRIC_PREFIX = carbon # CARBON_METRIC_INTERVAL = 60 # In order to turn off logging of successful connections for the line # receiver, set this to False # LOG_LISTENER_CONN_SUCCESS = True # In order to turn off logging of metrics with no corresponding # aggregation rules receiver, set this to False # LOG_AGGREGATOR_MISSES = False # Specify the user to drop privileges to # If this is blank carbon-aggregator runs as the user that invokes it # USER = carbon-1.0.2/conf/blacklist.conf.example0000644000000000000000000000075413131244450020115 0ustar rootroot00000000000000# This file takes a single regular expression per line # If USE_WHITELIST is set to True in carbon.conf, any metrics received which # match one of these expressions will be dropped # This file is reloaded automatically when changes are made ^some\.noisy\.metric\.prefix\..* # Reject metrics with multiple or surrounding dots, since they lead to # counter intuitive behavior when read (they can be read from disk but not # from carbon-cache, at least with whisper data back-end) \.\. ^\. \.$ carbon-1.0.2/conf/carbon.amqp.conf.example0000644000000000000000000000504413131244450020343 0ustar rootroot00000000000000# This is a configuration file with AMQP enabled [cache] LOCAL_DATA_DIR = # Specify the user to drop privileges to # If this is blank carbon runs as the user that invokes it # This user must have write access to the local data directory USER = # Limit the size of the cache to avoid swapping or becoming CPU bound. # Sorts and serving cache queries gets more expensive as the cache grows. # Use the value "inf" (infinity) for an unlimited cache size. MAX_CACHE_SIZE = inf # Limits the number of whisper update_many() calls per second, which effectively # means the number of write requests sent to the disk. This is intended to # prevent over-utilizing the disk and thus starving the rest of the system. # When the rate of required updates exceeds this, then carbon's caching will # take effect and increase the overall throughput accordingly. MAX_UPDATES_PER_SECOND = 1000 # Softly limits the number of whisper files that get created each minute. # Setting this value low (like at 50) is a good way to ensure your graphite # system will not be adversely impacted when a bunch of new metrics are # sent to it. The trade off is that it will take much longer for those metrics' # database files to all get created and thus longer until the data becomes usable. # Setting this value high (like "inf" for infinity) will cause graphite to create # the files quickly but at the risk of slowing I/O down considerably for a while. MAX_CREATES_PER_MINUTE = inf LINE_RECEIVER_INTERFACE = 0.0.0.0 LINE_RECEIVER_PORT = 2003 UDP_RECEIVER_INTERFACE = 0.0.0.0 UDP_RECEIVER_PORT = 2003 PICKLE_RECEIVER_INTERFACE = 0.0.0.0 PICKLE_RECEIVER_PORT = 2004 CACHE_QUERY_INTERFACE = 0.0.0.0 CACHE_QUERY_PORT = 7002 # Enable AMQP if you want to receive metrics using your amqp broker ENABLE_AMQP = True # Verbose means a line will be logged for every metric received # useful for testing AMQP_VERBOSE = True # your credentials for the amqp server # AMQP_USER = guest # AMQP_PASSWORD = guest # the network settings for the amqp server # AMQP_HOST = localhost # AMQP_PORT = 5672 # if you want to include the metric name as part of the message body # instead of as the routing key, set this to True # AMQP_METRIC_NAME_IN_BODY = False # NOTE: you cannot run both a cache and a relay on the same server # with the default configuration, you have to specify a distinict # interfaces and ports for the listeners. [relay] LINE_RECEIVER_INTERFACE = 0.0.0.0 LINE_RECEIVER_PORT = 2003 PICKLE_RECEIVER_INTERFACE = 0.0.0.0 PICKLE_RECEIVER_PORT = 2004 CACHE_SERVERS = server1, server2, server3 MAX_QUEUE_SIZE = 10000 carbon-1.0.2/PKG-INFO0000644000000000000000000000124013131244747014014 0ustar rootroot00000000000000Metadata-Version: 1.1 Name: carbon Version: 1.0.2 Summary: Backend data caching and persistence daemon for Graphite Home-page: http://graphiteapp.org/ Author: Chris Davis Author-email: chrismd@gmail.com License: Apache Software License 2.0 Description: Backend data caching and persistence daemon for Graphite Platform: UNKNOWN Classifier: Intended Audience :: Developers Classifier: Natural Language :: English Classifier: License :: OSI Approved :: Apache Software License Classifier: Programming Language :: Python Classifier: Programming Language :: Python :: 2 Classifier: Programming Language :: Python :: 2.7 Classifier: Programming Language :: Python :: 2 :: Only carbon-1.0.2/setup.py0000644000000000000000000000631213131244455014432 0ustar rootroot00000000000000#!/usr/bin/env python from __future__ import with_statement import os import ConfigParser import platform from glob import glob try: from io import BytesIO except ImportError: from StringIO import StringIO as BytesIO # Graphite historically has an install prefix set in setup.cfg. Being in a # configuration file, it's not easy to override it or unset it (for installing # graphite in a virtualenv for instance). # The prefix is now set by ``setup.py`` and *unset* if an environment variable # named ``GRAPHITE_NO_PREFIX`` is present. # While ``setup.cfg`` doesn't contain the prefix anymore, the *unset* step is # required for installations from a source tarball because running # ``python setup.py sdist`` will re-add the prefix to the tarball's # ``setup.cfg``. with open('setup.cfg', 'r') as f: orig_setup_cfg = f.read() cf = ConfigParser.ConfigParser() cf.readfp(BytesIO(orig_setup_cfg), 'setup.cfg') if os.environ.get('GRAPHITE_NO_PREFIX'): cf.remove_section('install') else: try: cf.add_section('install') except ConfigParser.DuplicateSectionError: pass if not cf.has_option('install', 'prefix'): cf.set('install', 'prefix', '/opt/graphite') if not cf.has_option('install', 'install-lib'): cf.set('install', 'install-lib', '%(prefix)s/lib') with open('setup.cfg', 'wb') as f: cf.write(f) if os.environ.get('USE_SETUPTOOLS'): from setuptools import setup setup_kwargs = dict(zip_safe=0) else: from distutils.core import setup setup_kwargs = dict() storage_dirs = [ ('storage/ceres', []), ('storage/whisper',[]), ('storage/lists',[]), ('storage/log',[]), ('storage/rrd',[]) ] conf_files = [ ('conf', glob('conf/*.example')) ] install_files = storage_dirs + conf_files # Let's include redhat init scripts, despite build platform # but won't put them in /etc/init.d/ automatically anymore init_scripts = [ ('examples/init.d', ['distro/redhat/init.d/carbon-cache', 'distro/redhat/init.d/carbon-relay', 'distro/redhat/init.d/carbon-aggregator']) ] install_files += init_scripts try: setup( name='carbon', version='1.0.2', url='http://graphiteapp.org/', author='Chris Davis', author_email='chrismd@gmail.com', license='Apache Software License 2.0', description='Backend data caching and persistence daemon for Graphite', long_description='Backend data caching and persistence daemon for Graphite', packages=['carbon', 'carbon.aggregator', 'twisted.plugins'], package_dir={'' : 'lib'}, scripts=glob('bin/*'), package_data={ 'carbon' : ['*.xml'] }, data_files=install_files, install_requires=['Twisted', 'txAMQP'], classifiers=( 'Intended Audience :: Developers', 'Natural Language :: English', 'License :: OSI Approved :: Apache Software License', 'Programming Language :: Python', 'Programming Language :: Python :: 2', 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 2 :: Only', ), **setup_kwargs ) finally: with open('setup.cfg', 'w') as f: f.write(orig_setup_cfg) carbon-1.0.2/distro/0000755000000000000000000000000013131244747014226 5ustar rootroot00000000000000carbon-1.0.2/distro/redhat/0000755000000000000000000000000013131244747015475 5ustar rootroot00000000000000carbon-1.0.2/distro/redhat/init.d/0000755000000000000000000000000013131244747016662 5ustar rootroot00000000000000carbon-1.0.2/distro/redhat/init.d/carbon-relay0000644000000000000000000000427413131244450021161 0ustar rootroot00000000000000#!/bin/bash # chkconfig: - 25 75 # description: carbon-relay # processname: carbon-relay export PYTHONPATH="$GRAPHITE_DIR/lib:$PYTHONPATH" # Source function library. if [ -e /etc/rc.d/init.d/functions ]; then . /etc/rc.d/init.d/functions; fi; CARBON_DAEMON="relay" GRAPHITE_DIR="/opt/graphite" INSTANCES=`grep "^\[${CARBON_DAEMON}" ${GRAPHITE_DIR}/conf/carbon.conf | cut -d \[ -f 2 | cut -d \] -f 1 | cut -d : -f 2` function die { echo $1 exit 1 } start(){ cd $GRAPHITE_DIR; for INSTANCE in ${INSTANCES}; do if [ "${INSTANCE}" == "${CARBON_DAEMON}" ]; then INSTANCE="a"; fi; echo "Starting carbon-${CARBON_DAEMON}:${INSTANCE}..." bin/carbon-${CARBON_DAEMON}.py --instance=${INSTANCE} start; if [ $? -eq 0 ]; then echo_success else echo_failure fi; echo "" done; } stop(){ cd $GRAPHITE_DIR for INSTANCE in ${INSTANCES}; do if [ "${INSTANCE}" == "${CARBON_DAEMON}" ]; then INSTANCE="a"; fi; echo "Stopping carbon-${CARBON_DAEMON}:${INSTANCE}..." bin/carbon-${CARBON_DAEMON}.py --instance=${INSTANCE} stop if [ `sleep 3; /usr/bin/pgrep -f "carbon-${CARBON_DAEMON}.py --instance=${INSTANCE}" | /usr/bin/wc -l` -gt 0 ]; then echo "Carbon did not stop yet. Sleeping longer, then force killing it..."; sleep 20; /usr/bin/pkill -9 -f "carbon-${CARBON_DAEMON}.py --instance=${INSTANCE}"; fi; if [ $? -eq 0 ]; then echo_success else echo_failure fi; echo "" done; } status(){ cd $GRAPHITE_DIR; for INSTANCE in ${INSTANCES}; do if [ "${INSTANCE}" == "${CARBON_DAEMON}" ]; then INSTANCE="a"; fi; bin/carbon-${CARBON_DAEMON}.py --instance=${INSTANCE} status; if [ $? -eq 0 ]; then echo_success else echo_failure fi; echo "" done; } case "$1" in start) start ;; stop) stop ;; status) status ;; restart|reload) stop start ;; *) echo $"Usage: $0 {start|stop|restart|status}" exit 1 esac carbon-1.0.2/distro/redhat/init.d/carbon-aggregator0000644000000000000000000000431313131244450022161 0ustar rootroot00000000000000#!/bin/bash # chkconfig: - 25 75 # description: carbon-aggregator # processname: carbon-aggregator export PYTHONPATH="$GRAPHITE_DIR/lib:$PYTHONPATH" # Source function library. if [ -e /etc/rc.d/init.d/functions ]; then . /etc/rc.d/init.d/functions; fi; CARBON_DAEMON="aggregator" GRAPHITE_DIR="/opt/graphite" INSTANCES=`grep "^\[${CARBON_DAEMON}" ${GRAPHITE_DIR}/conf/carbon.conf | cut -d \[ -f 2 | cut -d \] -f 1 | cut -d : -f 2` function die { echo $1 exit 1 } start(){ cd $GRAPHITE_DIR; for INSTANCE in ${INSTANCES}; do if [ "${INSTANCE}" == "${CARBON_DAEMON}" ]; then INSTANCE="a"; fi; echo "Starting carbon-${CARBON_DAEMON}:${INSTANCE}..." bin/carbon-${CARBON_DAEMON}.py --instance=${INSTANCE} start; if [ $? -eq 0 ]; then echo_success else echo_failure fi; echo "" done; } stop(){ cd $GRAPHITE_DIR for INSTANCE in ${INSTANCES}; do if [ "${INSTANCE}" == "${CARBON_DAEMON}" ]; then INSTANCE="a"; fi; echo "Stopping carbon-${CARBON_DAEMON}:${INSTANCE}..." bin/carbon-${CARBON_DAEMON}.py --instance=${INSTANCE} stop if [ `sleep 3; /usr/bin/pgrep -f "carbon-${CARBON_DAEMON}.py --instance=${INSTANCE}" | /usr/bin/wc -l` -gt 0 ]; then echo "Carbon did not stop yet. Sleeping longer, then force killing it..."; sleep 20; /usr/bin/pkill -9 -f "carbon-${CARBON_DAEMON}.py --instance=${INSTANCE}"; fi; if [ $? -eq 0 ]; then echo_success else echo_failure fi; echo "" done; } status(){ cd $GRAPHITE_DIR; for INSTANCE in ${INSTANCES}; do if [ "${INSTANCE}" == "${CARBON_DAEMON}" ]; then INSTANCE="a"; fi; bin/carbon-${CARBON_DAEMON}.py --instance=${INSTANCE} status; if [ $? -eq 0 ]; then echo_success else echo_failure fi; echo "" done; } case "$1" in start) start ;; stop) stop ;; status) status ;; restart|reload) stop start ;; *) echo $"Usage: $0 {start|stop|restart|status}" exit 1 esac carbon-1.0.2/distro/redhat/init.d/carbon-cache0000644000000000000000000000427413131244450021110 0ustar rootroot00000000000000#!/bin/bash # chkconfig: - 25 75 # description: carbon-cache # processname: carbon-cache # Source function library. if [ -e /etc/rc.d/init.d/functions ]; then . /etc/rc.d/init.d/functions; fi; CARBON_DAEMON="cache" GRAPHITE_DIR="/opt/graphite" INSTANCES=`grep "^\[${CARBON_DAEMON}" ${GRAPHITE_DIR}/conf/carbon.conf | cut -d \[ -f 2 | cut -d \] -f 1 | cut -d : -f 2` export PYTHONPATH="$GRAPHITE_DIR/lib:$PYTHONPATH" function die { echo $1 exit 1 } start(){ cd $GRAPHITE_DIR; for INSTANCE in ${INSTANCES}; do if [ "${INSTANCE}" == "${CARBON_DAEMON}" ]; then INSTANCE="a"; fi; echo "Starting carbon-${CARBON_DAEMON}:${INSTANCE}..." bin/carbon-${CARBON_DAEMON}.py --instance=${INSTANCE} start; if [ $? -eq 0 ]; then echo_success else echo_failure fi; echo "" done; } stop(){ cd $GRAPHITE_DIR for INSTANCE in ${INSTANCES}; do if [ "${INSTANCE}" == "${CARBON_DAEMON}" ]; then INSTANCE="a"; fi; echo "Stopping carbon-${CARBON_DAEMON}:${INSTANCE}..." bin/carbon-${CARBON_DAEMON}.py --instance=${INSTANCE} stop if [ `sleep 3; /usr/bin/pgrep -f "carbon-${CARBON_DAEMON}.py --instance=${INSTANCE}" | /usr/bin/wc -l` -gt 0 ]; then echo "Carbon did not stop yet. Sleeping longer, then force killing it..."; sleep 20; /usr/bin/pkill -9 -f "carbon-${CARBON_DAEMON}.py --instance=${INSTANCE}"; fi; if [ $? -eq 0 ]; then echo_success else echo_failure fi; echo "" done; } status(){ cd $GRAPHITE_DIR; for INSTANCE in ${INSTANCES}; do if [ "${INSTANCE}" == "${CARBON_DAEMON}" ]; then INSTANCE="a"; fi; bin/carbon-${CARBON_DAEMON}.py --instance=${INSTANCE} status; if [ $? -eq 0 ]; then echo_success else echo_failure fi; echo "" done; } case "$1" in start) start ;; stop) stop ;; status) status ;; restart|reload) stop start ;; *) echo $"Usage: $0 {start|stop|restart|status}" exit 1 esac carbon-1.0.2/LICENSE0000644000000000000000000002613613131244450013726 0ustar rootroot00000000000000 Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. carbon-1.0.2/lib/0000755000000000000000000000000013131244747013470 5ustar rootroot00000000000000carbon-1.0.2/lib/carbon/0000755000000000000000000000000013131244747014734 5ustar rootroot00000000000000carbon-1.0.2/lib/carbon/relayrules.py0000644000000000000000000000425313131244450017470 0ustar rootroot00000000000000import re from carbon.conf import OrderedConfigParser from carbon.util import parseDestinations from carbon.exceptions import CarbonConfigException class RelayRule: def __init__(self, condition, destinations, continue_matching=False): self.condition = condition self.destinations = destinations self.continue_matching = continue_matching def matches(self, metric): return bool(self.condition(metric)) def loadRelayRules(path): rules = [] parser = OrderedConfigParser() if not parser.read(path): raise CarbonConfigException("Could not read rules file %s" % path) defaultRule = None for section in parser.sections(): if not parser.has_option(section, 'destinations'): raise CarbonConfigException("Rules file %s section %s does not define a " "'destinations' list" % (path, section)) destination_strings = parser.get(section, 'destinations').split(',') destinations = parseDestinations(destination_strings) if parser.has_option(section, 'pattern'): if parser.has_option(section, 'default'): raise CarbonConfigException("Section %s contains both 'pattern' and " "'default'. You must use one or the other." % section) pattern = parser.get(section, 'pattern') regex = re.compile(pattern, re.I) continue_matching = False if parser.has_option(section, 'continue'): continue_matching = parser.getboolean(section, 'continue') rule = RelayRule(condition=regex.search, destinations=destinations, continue_matching=continue_matching) rules.append(rule) continue if parser.has_option(section, 'default'): if not parser.getboolean(section, 'default'): continue # just ignore default = false if defaultRule: raise CarbonConfigException("Only one default rule can be specified") defaultRule = RelayRule(condition=lambda metric: True, destinations=destinations) if not defaultRule: raise CarbonConfigException("No default rule defined. You must specify exactly one " "rule with 'default = true' instead of a pattern.") rules.append(defaultRule) return rules carbon-1.0.2/lib/carbon/database.py0000644000000000000000000001423113131244455017047 0ustar rootroot00000000000000"""Copyright 2009 Chris Davis Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.""" import os from os.path import exists, dirname, join, sep from carbon.util import PluginRegistrar from carbon import log class TimeSeriesDatabase(object): "Abstract base class for Carbon database backends." __metaclass__ = PluginRegistrar plugins = {} "List of supported aggregation methods for the database." aggregationMethods = [] def write(self, metric, datapoints): "Persist datapoints in the database for metric." raise NotImplemented() def exists(self, metric): "Return True if the given metric path exists, False otherwise." raise NotImplemented() def create(self, metric, retentions, xfilesfactor, aggregation_method): "Create an entry in the database for metric using options." raise NotImplemented() def getMetadata(self, metric, key): "Lookup metric metadata." raise NotImplemented() def setMetadata(self, metric, key, value): "Modify metric metadata." raise NotImplemented() def getFilesystemPath(self, metric): "Return filesystem path for metric, defaults to None." pass def validateArchiveList(self, archiveList): "Validate that the database can handle the given archiveList." pass try: import whisper except ImportError: pass else: class WhisperDatabase(TimeSeriesDatabase): plugin_name = 'whisper' aggregationMethods = whisper.aggregationMethods def __init__(self, settings): self.data_dir = settings.LOCAL_DATA_DIR self.sparse_create = settings.WHISPER_SPARSE_CREATE self.fallocate_create = settings.WHISPER_FALLOCATE_CREATE if settings.WHISPER_AUTOFLUSH: log.msg("Enabling Whisper autoflush") whisper.AUTOFLUSH = True if settings.WHISPER_FALLOCATE_CREATE: if whisper.CAN_FALLOCATE: log.msg("Enabling Whisper fallocate support") else: log.err("WHISPER_FALLOCATE_CREATE is enabled but linking failed.") if settings.WHISPER_LOCK_WRITES: if whisper.CAN_LOCK: log.msg("Enabling Whisper file locking") whisper.LOCK = True else: log.err("WHISPER_LOCK_WRITES is enabled but import of fcntl module failed.") if settings.WHISPER_FADVISE_RANDOM: try: if whisper.CAN_FADVISE: log.msg("Enabling Whisper fadvise_random support") whisper.FADVISE_RANDOM = True else: log.err("WHISPER_FADVISE_RANDOM is enabled but import of ftools module failed.") except AttributeError: log.err("WHISPER_FADVISE_RANDOM is enabled but skipped because it is not compatible with the version of Whisper.") def write(self, metric, datapoints): path = self.getFilesystemPath(metric) whisper.update_many(path, datapoints) def exists(self, metric): return exists(self.getFilesystemPath(metric)) def create(self, metric, retentions, xfilesfactor, aggregation_method): path = self.getFilesystemPath(metric) directory = dirname(path) try: if not exists(directory): os.makedirs(directory) except OSError, e: log.err("%s" % e) whisper.create(path, retentions, xfilesfactor, aggregation_method, self.sparse_create, self.fallocate_create) def getMetadata(self, metric, key): if key != 'aggregationMethod': raise ValueError("Unsupported metadata key \"%s\"" % key) wsp_path = self.getFilesystemPath(metric) return whisper.info(wsp_path)['aggregationMethod'] def setMetadata(self, metric, key, value): if key != 'aggregationMethod': raise ValueError("Unsupported metadata key \"%s\"" % key) wsp_path = self.getFilesystemPath(metric) return whisper.setAggregationMethod(wsp_path, value) def getFilesystemPath(self, metric): metric_path = metric.replace('.', sep).lstrip(sep) + '.wsp' return join(self.data_dir, metric_path) def validateArchiveList(self, archiveList): try: whisper.validateArchiveList(archiveList) except whisper.InvalidConfiguration, e: raise ValueError("%s" % e) try: import ceres except ImportError: pass else: class CeresDatabase(TimeSeriesDatabase): plugin_name = 'ceres' aggregationMethods = ['average','sum','last','max','min'] def __init__(self, settings): self.data_dir = settings.LOCAL_DATA_DIR ceres.setDefaultNodeCachingBehavior(settings.CERES_NODE_CACHING_BEHAVIOR) ceres.setDefaultSliceCachingBehavior(settings.CERES_SLICE_CACHING_BEHAVIOR) ceres.MAX_SLICE_GAP = int(settings.CERES_MAX_SLICE_GAP) if settings.CERES_LOCK_WRITES: if ceres.CAN_LOCK: log.msg("Enabling Ceres file locking") ceres.LOCK_WRITES = True else: log.err("CERES_LOCK_WRITES is enabled but import of fcntl module failed.") self.tree = ceres.CeresTree(self.data_dir) def write(self, metric, datapoints): self.tree.store(metric, datapoints) def exists(self, metric): return self.tree.hasNode(metric) def create(self, metric, retentions, xfilesfactor, aggregation_method): self.tree.createNode(metric, retentions=retentions, timeStep=retentions[0][0], xFilesFactor=xfilesfactor, aggregationMethod=aggregation_method) def getMetadata(self, metric, key): return self.tree.getNode(metric).readMetadata()[key] def setMetadata(self, metric, key, value): node = self.tree.getNode(metric) metadata = node.readMetadata() metadata[key] = value node.writeMetadata(metadata) def getFilesystemPath(self, metric): return self.tree.getFilesystemPath(metric) carbon-1.0.2/lib/carbon/__init__.py0000644000000000000000000000000013131244450017022 0ustar rootroot00000000000000carbon-1.0.2/lib/carbon/routers.py0000644000000000000000000001542113131244455017010 0ustar rootroot00000000000000import imp from carbon.hashing import ConsistentHashRing from carbon.util import PluginRegistrar class DatapointRouter(object): "Abstract base class for datapoint routing logic implementations" __metaclass__ = PluginRegistrar plugins = {} def addDestination(self, destination): "destination is a (host, port, instance) triple" raise NotImplemented() def removeDestination(self, destination): "destination is a (host, port, instance) triple" raise NotImplemented() def getDestinations(self, key): """Generate the destinations where the given routing key should map to. Only destinations which are configured (addDestination has been called for it) may be generated by this method.""" raise NotImplemented() class RelayRulesRouter(DatapointRouter): plugin_name = 'rules' def __init__(self, settings): # We need to import relayrules here to avoid circular dependencies. from carbon.relayrules import loadRelayRules rules_path = settings["relay-rules"] self.rules_path = rules_path self.rules = loadRelayRules(rules_path) self.destinations = set() def addDestination(self, destination): self.destinations.add(destination) def removeDestination(self, destination): self.destinations.discard(destination) def getDestinations(self, key): for rule in self.rules: if rule.matches(key): for destination in rule.destinations: if destination in self.destinations: yield destination if not rule.continue_matching: return class ConsistentHashingRouter(DatapointRouter): plugin_name = 'consistent-hashing' def __init__(self, settings): replication_factor = settings.REPLICATION_FACTOR diverse_replicas = settings.DIVERSE_REPLICAS self.replication_factor = int(replication_factor) self.diverse_replicas = diverse_replicas self.instance_ports = {} # { (server, instance) : port } self.ring = ConsistentHashRing([]) def addDestination(self, destination): (server, port, instance) = destination if (server, instance) in self.instance_ports: raise Exception("destination instance (%s, %s) already configured" % (server, instance)) self.instance_ports[(server, instance)] = port self.ring.add_node((server, instance)) def removeDestination(self, destination): (server, port, instance) = destination if (server, instance) not in self.instance_ports: raise Exception("destination instance (%s, %s) not configured" % (server, instance)) del self.instance_ports[(server, instance)] self.ring.remove_node((server, instance)) def getDestinations(self, metric): key = self.getKey(metric) if self.diverse_replicas: used_servers = set() for (server, instance) in self.ring.get_nodes(key): if server in used_servers: continue else: used_servers.add(server) port = self.instance_ports[(server, instance)] yield (server, port, instance) if len(used_servers) >= self.replication_factor: return else: for (count, node) in enumerate(self.ring.get_nodes(key)): if count == self.replication_factor: return (server, instance) = node port = self.instance_ports[(server, instance)] yield (server, port, instance) def getKey(self, metric): return metric def setKeyFunction(self, func): self.getKey = func def setKeyFunctionFromModule(self, keyfunc_spec): module_path, func_name = keyfunc_spec.rsplit(':', 1) module_file = open(module_path, 'U') description = ('.py', 'U', imp.PY_SOURCE) module = imp.load_module('keyfunc_module', module_file, module_path, description) keyfunc = getattr(module, func_name) self.setKeyFunction(keyfunc) class AggregatedConsistentHashingRouter(DatapointRouter): plugin_name = 'aggregated-consistent-hashing' def __init__(self, settings): from carbon.aggregator.rules import RuleManager aggregation_rules_path = settings["aggregation-rules"] if aggregation_rules_path: RuleManager.read_from(aggregation_rules_path) self.hash_router = ConsistentHashingRouter(settings) self.agg_rules_manager = RuleManager def addDestination(self, destination): self.hash_router.addDestination(destination) def removeDestination(self, destination): self.hash_router.removeDestination(destination) def getDestinations(self, key): # resolve metric to aggregate forms resolved_metrics = [] for rule in self.agg_rules_manager.rules: aggregate_metric = rule.get_aggregate_metric(key) if aggregate_metric is None: continue else: resolved_metrics.append(aggregate_metric) # if the metric will not be aggregated, send it raw # (will pass through aggregation) if len(resolved_metrics) == 0: resolved_metrics.append(key) # get consistent hashing destinations based on aggregate forms destinations = set() for resolved_metric in resolved_metrics: for destination in self.hash_router.getDestinations(resolved_metric): destinations.add(destination) for destination in destinations: yield destination try: import mmh3 except ImportError: pass else: class FastHashRing(object): """A very fast hash 'ring'. Instead of trying to avoid rebalancing data when changing the list of nodes we try to making routing as fast as we can. It's good enough because the current rebalancing tools performances depend on the total number of metrics and not the number of metrics to rebalance. """ def __init__(self): self.nodes = set() self.sorted_nodes = [] def _hash(self, key): return mmh3.hash(key) def _update_nodes(self): self.sorted_nodes = sorted( [(self._hash(str(n)), n) for n in self.nodes], key=lambda v: v[0] ) def add_node(self, node): self.nodes.add(node) self._update_nodes() def remove_node(self, node): self.nodes.discard(node) self._update_nodes() def get_nodes(self, key): seed = self._hash(key) % len(self.nodes) for n in xrange(seed, seed + len(self.nodes)): yield self.sorted_nodes[n % len(self.sorted_nodes)][1] class FastHashingRouter(ConsistentHashingRouter): """Same as ConsistentHashingRouter but using FastHashRing.""" plugin_name = 'fast-hashing' def __init__(self, settings): super(FastHashingRouter, self).__init__(settings) self.ring = FastHashRing() class FastAggregatedHashingRouter(AggregatedConsistentHashingRouter): """Same as AggregatedConsistentHashingRouter but using FastHashRing.""" plugin_name = 'fast-aggregated-hashing' def __init__(self, settings): super(FastAggregatedHashingRouter, self).__init__(settings) self.hash_router.ring = FastHashRing() carbon-1.0.2/lib/carbon/hashing.py0000644000000000000000000000503213131244455016723 0ustar rootroot00000000000000try: from hashlib import md5 except ImportError: from md5 import md5 import bisect try: import pyhash hasher = pyhash.fnv1a_32() def fnv32a(string, seed=0x811c9dc5): return hasher(string, seed=seed) except ImportError: def fnv32a(string, seed=0x811c9dc5): """ FNV-1a Hash (http://isthe.com/chongo/tech/comp/fnv/) in Python. Taken from https://gist.github.com/vaiorabbit/5670985 """ hval = seed fnv_32_prime = 0x01000193 uint32_max = 2 ** 32 for s in string: hval = hval ^ ord(s) hval = (hval * fnv_32_prime) % uint32_max return hval class ConsistentHashRing: def __init__(self, nodes, replica_count=100, hash_type='carbon_ch'): self.ring = [] self.nodes = set() self.replica_count = replica_count self.hash_type = hash_type for node in nodes: self.add_node(node) def compute_ring_position(self, key): if self.hash_type == 'fnv1a_ch': big_hash = '{:x}'.format(int(fnv32a( str(key) ))) small_hash = int(big_hash[:4], 16) ^ int(big_hash[4:], 16) else: big_hash = md5(str(key)).hexdigest() small_hash = int(big_hash[:4], 16) return small_hash def add_node(self, node): self.nodes.add(node) for i in range(self.replica_count): if self.hash_type == 'fnv1a_ch': replica_key = "%d-%s" % (i, node[1]) else: replica_key = "%s:%d" % (node, i) position = self.compute_ring_position(replica_key) while position in [r[0] for r in self.ring]: position = position + 1 entry = (position, node) bisect.insort(self.ring, entry) def remove_node(self, node): self.nodes.discard(node) self.ring = [entry for entry in self.ring if entry[1] != node] def get_node(self, key): assert self.ring node = None node_iter = self.get_nodes(key) node = node_iter.next() node_iter.close() return node def get_nodes(self, key): assert self.ring if len(self.nodes) == 1: # short circuit in simple 1-node case for node in self.nodes: yield node return nodes = set() position = self.compute_ring_position(key) search_entry = (position, None) index = bisect.bisect_left(self.ring, search_entry) % len(self.ring) last_index = (index - 1) % len(self.ring) while len(nodes) < len(self.nodes) and index != last_index: next_entry = self.ring[index] (position, next_node) = next_entry if next_node not in nodes: nodes.add(next_node) yield next_node index = (index + 1) % len(self.ring) carbon-1.0.2/lib/carbon/exceptions.py0000644000000000000000000000014713131244450017460 0ustar rootroot00000000000000class CarbonConfigException(Exception): """Raised when a carbon daemon is improperly configured""" carbon-1.0.2/lib/carbon/amqp0-8.xml0000644000000000000000000007547713131244450016654 0ustar rootroot00000000000000 carbon-1.0.2/lib/carbon/protocols.py0000644000000000000000000001770113131244455017334 0ustar rootroot00000000000000import time from twisted.internet.protocol import ServerFactory, DatagramProtocol from twisted.application.internet import TCPServer, UDPServer from twisted.internet.error import ConnectionDone from twisted.protocols.basic import LineOnlyReceiver, Int32StringReceiver from twisted.protocols.policies import TimeoutMixin from carbon import log, events, state, management from carbon.conf import settings from carbon.regexlist import WhiteList, BlackList from carbon.util import pickle, get_unpickler from carbon.util import PluginRegistrar class CarbonReceiverFactory(ServerFactory): def buildProtocol(self, addr): from carbon.conf import settings # Don't establish the connection if we have reached the limit. if len(state.connectedMetricReceiverProtocols) < settings.MAX_RECEIVER_CONNECTIONS: return ServerFactory.buildProtocol(self, addr) else: return None class CarbonServerProtocol(object): __metaclass__ = PluginRegistrar plugins = {} @classmethod def build(cls, root_service): plugin_up = cls.plugin_name.upper() interface = settings.get('%s_RECEIVER_INTERFACE' % plugin_up, None) port = int(settings.get('%s_RECEIVER_PORT' % plugin_up, 0)) protocol = cls if not port: return if hasattr(protocol, 'datagramReceived'): service = UDPServer(port, protocol(), interface=interface) service.setServiceParent(root_service) else: factory = CarbonReceiverFactory() factory.protocol = protocol service = TCPServer(port, factory, interface=interface) service.setServiceParent(root_service) class MetricReceiver(CarbonServerProtocol, TimeoutMixin): """ Base class for all metric receiving protocols, handles flow control events and connection state logging. """ def connectionMade(self): self.setTimeout(settings.METRIC_CLIENT_IDLE_TIMEOUT) self.peerName = self.getPeerName() if settings.LOG_LISTENER_CONN_SUCCESS: log.listener("%s connection with %s established" % ( self.__class__.__name__, self.peerName)) if state.metricReceiversPaused: self.pauseReceiving() state.connectedMetricReceiverProtocols.add(self) if settings.USE_FLOW_CONTROL: events.pauseReceivingMetrics.addHandler(self.pauseReceiving) events.resumeReceivingMetrics.addHandler(self.resumeReceiving) def getPeerName(self): if hasattr(self.transport, 'getPeer'): peer = self.transport.getPeer() return "%s:%d" % (peer.host, peer.port) else: return "peer" def pauseReceiving(self): self.transport.pauseProducing() def resumeReceiving(self): self.transport.resumeProducing() def connectionLost(self, reason): if reason.check(ConnectionDone): if settings.LOG_LISTENER_CONN_SUCCESS: log.listener("%s connection with %s closed cleanly" % (self.__class__.__name__, self.peerName)) else: log.listener("%s connection with %s lost: %s" % (self.__class__.__name__, self.peerName, reason.value)) state.connectedMetricReceiverProtocols.remove(self) if settings.USE_FLOW_CONTROL: events.pauseReceivingMetrics.removeHandler(self.pauseReceiving) events.resumeReceivingMetrics.removeHandler(self.resumeReceiving) def metricReceived(self, metric, datapoint): if BlackList and metric in BlackList: instrumentation.increment('blacklistMatches') return if WhiteList and metric not in WhiteList: instrumentation.increment('whitelistRejects') return if datapoint[1] != datapoint[1]: # filter out NaN values return if int(datapoint[0]) == -1: # use current time if none given: https://github.com/graphite-project/carbon/issues/54 datapoint = (time.time(), datapoint[1]) res = settings.MIN_TIMESTAMP_RESOLUTION if res: datapoint = (int(datapoint[0]) // res * res, datapoint[1]) events.metricReceived(metric, datapoint) self.resetTimeout() class MetricLineReceiver(MetricReceiver, LineOnlyReceiver): plugin_name = "line" delimiter = '\n' def lineReceived(self, line): try: metric, value, timestamp = line.strip().split() datapoint = (float(timestamp), float(value)) except ValueError: if len(line) > 400: line = line[:400] + '...' log.listener('invalid line received from client %s, ignoring [%s]' % (self.peerName, line.strip().encode('string_escape'))) return self.metricReceived(metric, datapoint) class MetricDatagramReceiver(MetricReceiver, DatagramProtocol): plugin_name = "udp" @classmethod def build(cls, root_service): if not settings.ENABLE_UDP_LISTENER: return super(MetricDatagramReceiver, cls).build(root_service) def datagramReceived(self, data, (host, port)): for line in data.splitlines(): try: metric, value, timestamp = line.strip().split() datapoint = (float(timestamp), float(value)) self.metricReceived(metric, datapoint) except ValueError: if len(line) > 400: line = line[:400] + '...' log.listener('invalid line received from %s, ignoring [%s]' % (host, line.strip().encode('string_escape'))) class MetricPickleReceiver(MetricReceiver, Int32StringReceiver): plugin_name = "pickle" MAX_LENGTH = 2 ** 20 def connectionMade(self): MetricReceiver.connectionMade(self) self.unpickler = get_unpickler(insecure=settings.USE_INSECURE_UNPICKLER) def stringReceived(self, data): try: datapoints = self.unpickler.loads(data) except pickle.UnpicklingError: log.listener('invalid pickle received from %s, ignoring' % self.peerName) return for raw in datapoints: try: (metric, (value, timestamp)) = raw except Exception, e: log.listener('Error decoding pickle: %s' % e) try: datapoint = (float(value), float(timestamp)) # force proper types except ValueError: continue self.metricReceived(metric, datapoint) class CacheManagementHandler(Int32StringReceiver): MAX_LENGTH = 1024 ** 3 # 1mb def connectionMade(self): peer = self.transport.getPeer() self.peerAddr = "%s:%d" % (peer.host, peer.port) log.query("%s connected" % self.peerAddr) self.unpickler = get_unpickler(insecure=settings.USE_INSECURE_UNPICKLER) def connectionLost(self, reason): if reason.check(ConnectionDone): log.query("%s disconnected" % self.peerAddr) else: log.query("%s connection lost: %s" % (self.peerAddr, reason.value)) def stringReceived(self, rawRequest): request = self.unpickler.loads(rawRequest) cache = MetricCache() if request['type'] == 'cache-query': metric = request['metric'] datapoints = cache.get(metric, {}).items() result = dict(datapoints=datapoints) if settings.LOG_CACHE_HITS: log.query('[%s] cache query for \"%s\" returned %d values' % (self.peerAddr, metric, len(datapoints))) instrumentation.increment('cacheQueries') elif request['type'] == 'cache-query-bulk': datapointsByMetric = {} metrics = request['metrics'] for metric in metrics: datapointsByMetric[metric] = cache.get(metric, {}).items() result = dict(datapointsByMetric=datapointsByMetric) if settings.LOG_CACHE_HITS: log.query('[%s] cache query bulk for \"%d\" metrics returned %d values' % (self.peerAddr, len(metrics), sum([len(datapoints) for datapoints in datapointsByMetric.values()]))) instrumentation.increment('cacheBulkQueries') instrumentation.append('cacheBulkQuerySize', len(metrics)) elif request['type'] == 'get-metadata': result = management.getMetadata(request['metric'], request['key']) elif request['type'] == 'set-metadata': result = management.setMetadata(request['metric'], request['key'], request['value']) else: result = dict(error="Invalid request type \"%s\"" % request['type']) response = pickle.dumps(result, protocol=-1) self.sendString(response) # Avoid import circularities from carbon.cache import MetricCache from carbon import instrumentation carbon-1.0.2/lib/carbon/log.py0000644000000000000000000001047013131244455016065 0ustar rootroot00000000000000import os import time from sys import stdout, stderr from zope.interface import implements from twisted.python.log import startLoggingWithObserver, textFromEventDict, msg, err, ILogObserver from twisted.python.syslog import SyslogObserver from twisted.python.logfile import DailyLogFile class CarbonLogFile(DailyLogFile): """Overridden to support logrotate.d""" def __init__(self, *args, **kwargs): DailyLogFile.__init__(self, *args, **kwargs) # avoid circular dependencies from carbon.conf import settings self.enableRotation = settings.ENABLE_LOGROTATION def _openFile(self): """ Fix Umask Issue https://twistedmatrix.com/trac/ticket/7026 """ openMode = self.defaultMode or 0777 self._file = os.fdopen(os.open( self.path, os.O_CREAT|os.O_RDWR, openMode), 'r+', 1) self.closed = False # Try our best to update permissions for files which already exist. if self.defaultMode: try: os.chmod(self.path, self.defaultMode) except OSError: pass # Seek is needed for uniformity of stream positioning # for read and write between Linux and BSD systems due # to differences in fopen() between operating systems. self._file.seek(0, os.SEEK_END) self.lastDate = self.toDate(os.stat(self.path)[8]) def shouldRotate(self): if self.enableRotation: return DailyLogFile.shouldRotate(self) else: return False def write(self, data): if not self.enableRotation: if not os.path.exists(self.path): self.reopen() else: path_stat = os.stat(self.path) fd_stat = os.fstat(self._file.fileno()) if not (path_stat.st_ino == fd_stat.st_ino and path_stat.st_dev == fd_stat.st_dev): self.reopen() DailyLogFile.write(self, data) # Backport from twisted >= 10 def reopen(self): self.close() self._openFile() class CarbonLogObserver(object): implements(ILogObserver) def log_to_dir(self, logdir): self.logdir = logdir self.console_logfile = CarbonLogFile('console.log', logdir) self.custom_logs = {} self.observer = self.logdir_observer def log_to_syslog(self, prefix): observer = SyslogObserver(prefix).emit def syslog_observer(event): event["system"] = event.get("type", "console") observer(event) self.observer = syslog_observer def __call__(self, event): return self.observer(event) def stdout_observer(self, event): stdout.write(formatEvent(event, includeType=True) + '\n') stdout.flush() def logdir_observer(self, event): message = formatEvent(event) log_type = event.get('type') if log_type is not None and log_type not in self.custom_logs: self.custom_logs[log_type] = CarbonLogFile(log_type + '.log', self.logdir) logfile = self.custom_logs.get(log_type, self.console_logfile) logfile.write(message + '\n') logfile.flush() # Default to stdout observer = stdout_observer carbonLogObserver = CarbonLogObserver() def formatEvent(event, includeType=False): event['isError'] = 'failure' in event message = textFromEventDict(event) if includeType: typeTag = '[%s] ' % event.get('type', 'console') else: typeTag = '' timestamp = time.strftime("%d/%m/%Y %H:%M:%S") return "%s :: %s%s" % (timestamp, typeTag, message) logToDir = carbonLogObserver.log_to_dir logToSyslog = carbonLogObserver.log_to_syslog def logToStdout(): startLoggingWithObserver(carbonLogObserver) def cache(message, **context): context['type'] = 'cache' msg(message, **context) def clients(message, **context): context['type'] = 'clients' msg(message, **context) def creates(message, **context): context['type'] = 'creates' msg(message, **context) def updates(message, **context): context['type'] = 'updates' msg(message, **context) def listener(message, **context): context['type'] = 'listener' msg(message, **context) def relay(message, **context): context['type'] = 'relay' msg(message, **context) def aggregator(message, **context): context['type'] = 'aggregator' msg(message, **context) def query(message, **context): context['type'] = 'query' msg(message, **context) def debug(message, **context): if debugEnabled: msg(message, **context) debugEnabled = False def setDebugEnabled(enabled): global debugEnabled debugEnabled = enabled carbon-1.0.2/lib/carbon/regexlist.py0000644000000000000000000000273413131244455017316 0ustar rootroot00000000000000import time import re import os.path from carbon import log from twisted.internet.task import LoopingCall class RegexList: """ Maintain a list of regex for matching whitelist and blacklist """ def __init__(self): self.regex_list = [] self.list_file = None self.read_task = LoopingCall(self.read_list) self.rules_last_read = 0.0 def read_from(self, list_file): self.list_file = list_file self.read_list() self.read_task.start(10, now=False) def read_list(self): # Clear rules and move on if file isn't there if not os.path.exists(self.list_file): self.regex_list = [] return try: mtime = os.path.getmtime(self.list_file) except OSError: log.err("Failed to get mtime of %s" % self.list_file) return if mtime <= self.rules_last_read: return # Begin read new_regex_list = [] for line in open(self.list_file): pattern = line.strip() if line.startswith('#') or not pattern: continue try: new_regex_list.append(re.compile(pattern)) except re.error: log.err("Failed to parse '%s' in '%s'. Ignoring line" % (pattern, self.list_file)) self.regex_list = new_regex_list self.rules_last_read = mtime def __contains__(self, value): for regex in self.regex_list: if regex.search(value): return True return False def __nonzero__(self): return bool(self.regex_list) WhiteList = RegexList() BlackList = RegexList() carbon-1.0.2/lib/carbon/instrumentation.py0000644000000000000000000001510113131244455020543 0ustar rootroot00000000000000import os import time import socket from resource import getrusage, RUSAGE_SELF from twisted.application.service import Service from twisted.internet.task import LoopingCall from carbon.conf import settings stats = {} prior_stats = {} HOSTNAME = socket.gethostname().replace('.', '_') PAGESIZE = os.sysconf('SC_PAGESIZE') rusage = getrusage(RUSAGE_SELF) lastUsage = rusage.ru_utime + rusage.ru_stime lastUsageTime = time.time() # NOTE: Referencing settings in this *top level scope* will # give you *defaults* only. Probably not what you wanted. # TODO(chrismd) refactor the graphite metrics hierarchy to be cleaner, # more consistent, and make room for frontend metrics. #metric_prefix = "Graphite.backend.%(program)s.%(instance)s." % settings def increment(stat, increase=1): try: stats[stat] += increase except KeyError: stats[stat] = increase def max(stat, newval): try: if stats[stat] < newval: stats[stat] = newval except KeyError: stats[stat] = newval def append(stat, value): try: stats[stat].append(value) except KeyError: stats[stat] = [value] def getCpuUsage(): global lastUsage, lastUsageTime rusage = getrusage(RUSAGE_SELF) currentUsage = rusage.ru_utime + rusage.ru_stime currentTime = time.time() usageDiff = currentUsage - lastUsage timeDiff = currentTime - lastUsageTime if timeDiff == 0: # shouldn't be possible, but I've actually seen a ZeroDivisionError from this timeDiff = 0.000001 cpuUsagePercent = (usageDiff / timeDiff) * 100.0 lastUsage = currentUsage lastUsageTime = currentTime return cpuUsagePercent def getMemUsage(): rss_pages = int(open('/proc/self/statm').read().split()[1]) return rss_pages * PAGESIZE def recordMetrics(): global lastUsage global prior_stats myStats = stats.copy() myPriorStats = {} stats.clear() # cache metrics if settings.program == 'carbon-cache': record = cache_record updateTimes = myStats.get('updateTimes', []) committedPoints = myStats.get('committedPoints', 0) creates = myStats.get('creates', 0) droppedCreates = myStats.get('droppedCreates', 0) errors = myStats.get('errors', 0) cacheQueries = myStats.get('cacheQueries', 0) cacheBulkQueries = myStats.get('cacheBulkQueries', 0) cacheOverflow = myStats.get('cache.overflow', 0) cacheBulkQuerySizes = myStats.get('cacheBulkQuerySize', []) # Calculate cache-data-structure-derived metrics prior to storing anything # in the cache itself -- which would otherwise affect said metrics. cache_size = cache.MetricCache().size cache_queues = len(cache.MetricCache()) record('cache.size', cache_size) record('cache.queues', cache_queues) if updateTimes: avgUpdateTime = sum(updateTimes) / len(updateTimes) record('avgUpdateTime', avgUpdateTime) if committedPoints: pointsPerUpdate = float(committedPoints) / len(updateTimes) record('pointsPerUpdate', pointsPerUpdate) if cacheBulkQuerySizes: avgBulkSize = sum(cacheBulkQuerySizes) / len(cacheBulkQuerySizes) record('cache.bulk_queries_average_size', avgBulkSize) record('updateOperations', len(updateTimes)) record('committedPoints', committedPoints) record('creates', creates) record('droppedCreates', droppedCreates) record('errors', errors) record('cache.queries', cacheQueries) record('cache.bulk_queries', cacheBulkQueries) record('cache.overflow', cacheOverflow) # aggregator metrics elif settings.program == 'carbon-aggregator': record = aggregator_record record('allocatedBuffers', len(BufferManager)) record('bufferedDatapoints', sum([b.size for b in BufferManager.buffers.values()])) record('aggregateDatapointsSent', myStats.get('aggregateDatapointsSent', 0)) # relay metrics else: record = relay_record # shared relay stats for relays & aggregators if settings.program in ['carbon-aggregator', 'carbon-relay']: prefix = 'destinations.' relay_stats = [(k,v) for (k,v) in myStats.items() if k.startswith(prefix)] for stat_name, stat_value in relay_stats: record(stat_name, stat_value) # Preserve the count of sent metrics so that the ratio of # received : sent can be checked per-relay to determine the # health of the destination. if stat_name.endswith('.sent'): myPriorStats[stat_name] = stat_value # common metrics record('activeConnections', len(state.connectedMetricReceiverProtocols)) record('metricsReceived', myStats.get('metricsReceived', 0)) record('blacklistMatches', myStats.get('blacklistMatches', 0)) record('whitelistRejects', myStats.get('whitelistRejects', 0)) record('cpuUsage', getCpuUsage()) # And here preserve count of messages received in the prior periiod myPriorStats['metricsReceived'] = myStats.get('metricsReceived', 0) prior_stats.clear() prior_stats.update(myPriorStats) try: # This only works on Linux record('memUsage', getMemUsage()) except Exception: pass def cache_record(metric, value): prefix = settings.CARBON_METRIC_PREFIX if settings.instance is None: fullMetric = '%s.agents.%s.%s' % (prefix, HOSTNAME, metric) else: fullMetric = '%s.agents.%s-%s.%s' % (prefix, HOSTNAME, settings.instance, metric) datapoint = (time.time(), value) cache.MetricCache().store(fullMetric, datapoint) def relay_record(metric, value): prefix = settings.CARBON_METRIC_PREFIX if settings.instance is None: fullMetric = '%s.relays.%s.%s' % (prefix, HOSTNAME, metric) else: fullMetric = '%s.relays.%s-%s.%s' % (prefix, HOSTNAME, settings.instance, metric) datapoint = (time.time(), value) events.metricGenerated(fullMetric, datapoint) def aggregator_record(metric, value): prefix = settings.CARBON_METRIC_PREFIX if settings.instance is None: fullMetric = '%s.aggregator.%s.%s' % (prefix, HOSTNAME, metric) else: fullMetric = '%s.aggregator.%s-%s.%s' % (prefix, HOSTNAME, settings.instance, metric) datapoint = (time.time(), value) events.metricGenerated(fullMetric, datapoint) class InstrumentationService(Service): def __init__(self): self.record_task = LoopingCall(recordMetrics) def startService(self): if settings.CARBON_METRIC_INTERVAL > 0: self.record_task.start(settings.CARBON_METRIC_INTERVAL, False) Service.startService(self) def stopService(self): if settings.CARBON_METRIC_INTERVAL > 0: self.record_task.stop() Service.stopService(self) # Avoid import circularities from carbon import state, events, cache from carbon.aggregator.buffers import BufferManager carbon-1.0.2/lib/carbon/state.py0000644000000000000000000000045213131244450016416 0ustar rootroot00000000000000__doc__ = """ This module exists for the purpose of tracking global state used across several modules. """ metricReceiversPaused = False cacheTooFull = False client_manager = None connectedMetricReceiverProtocols = set() pipeline_processors = [] pipeline_processors_generated = [] database = None carbon-1.0.2/lib/carbon/aggregator/0000755000000000000000000000000013131244747017056 5ustar rootroot00000000000000carbon-1.0.2/lib/carbon/aggregator/buffers.py0000644000000000000000000000610513131244455021062 0ustar rootroot00000000000000import time from twisted.internet.task import LoopingCall from carbon.conf import settings from carbon import log class BufferManager: def __init__(self): self.buffers = {} def __len__(self): return len(self.buffers) def get_buffer(self, metric_path): if metric_path not in self.buffers: log.aggregator("Allocating new metric buffer for %s" % metric_path) self.buffers[metric_path] = MetricBuffer(metric_path) return self.buffers[metric_path] def clear(self): for buffer in self.buffers.values(): buffer.close() self.buffers.clear() class MetricBuffer: __slots__ = ('metric_path', 'interval_buffers', 'compute_task', 'configured', 'aggregation_frequency', 'aggregation_func') def __init__(self, metric_path): self.metric_path = metric_path self.interval_buffers = {} self.compute_task = None self.configured = False self.aggregation_frequency = None self.aggregation_func = None def input(self, datapoint): (timestamp, value) = datapoint interval = timestamp - (timestamp % self.aggregation_frequency) if interval in self.interval_buffers: buffer = self.interval_buffers[interval] else: buffer = self.interval_buffers[interval] = IntervalBuffer(interval) buffer.input(datapoint) def configure_aggregation(self, frequency, func): self.aggregation_frequency = int(frequency) self.aggregation_func = func self.compute_task = LoopingCall(self.compute_value) compute_frequency = min(settings['WRITE_BACK_FREQUENCY'], frequency) or frequency self.compute_task.start(compute_frequency, now=False) self.configured = True def compute_value(self): now = int( time.time() ) current_interval = now - (now % self.aggregation_frequency) age_threshold = current_interval - (settings['MAX_AGGREGATION_INTERVALS'] * self.aggregation_frequency) for buffer in self.interval_buffers.values(): if buffer.active: value = self.aggregation_func(buffer.values) datapoint = (buffer.interval, value) state.events.metricGenerated(self.metric_path, datapoint) state.instrumentation.increment('aggregateDatapointsSent') buffer.mark_inactive() if buffer.interval < age_threshold: del self.interval_buffers[buffer.interval] if not self.interval_buffers: self.close() self.configured = False del BufferManager.buffers[self.metric_path] def close(self): if self.compute_task and self.compute_task.running: self.compute_task.stop() @property def size(self): return sum([len(buf.values) for buf in self.interval_buffers.values()]) class IntervalBuffer: __slots__ = ('interval', 'values', 'active') def __init__(self, interval): self.interval = interval self.values = [] self.active = True def input(self, datapoint): self.values.append( datapoint[1] ) self.active = True def mark_inactive(self): self.active = False # Shared importable singleton BufferManager = BufferManager() # Avoid import circularity from carbon import state carbon-1.0.2/lib/carbon/aggregator/rules.py0000644000000000000000000001030013131244455020550 0ustar rootroot00000000000000import time import re from os.path import exists, getmtime from twisted.internet.task import LoopingCall from carbon import log from carbon.aggregator.buffers import BufferManager class RuleManager: def __init__(self): self.rules = [] self.rules_file = None self.read_task = LoopingCall(self.read_rules) self.rules_last_read = 0.0 def clear(self): self.rules = [] def read_from(self, rules_file): self.rules_file = rules_file self.read_rules() self.read_task.start(10, now=False) def read_rules(self): if not exists(self.rules_file): self.clear() return # Only read if the rules file has been modified try: mtime = getmtime(self.rules_file) except OSError: log.err("Failed to get mtime of %s" % self.rules_file) return if mtime <= self.rules_last_read: return # Read new rules log.aggregator("reading new aggregation rules from %s" % self.rules_file) new_rules = [] for line in open(self.rules_file): line = line.strip() if line.startswith('#') or not line: continue rule = self.parse_definition(line) new_rules.append(rule) log.aggregator("clearing aggregation buffers") BufferManager.clear() self.rules = new_rules self.rules_last_read = mtime def parse_definition(self, line): try: left_side, right_side = line.split('=', 1) output_pattern, frequency = left_side.split() method, input_pattern = right_side.split() frequency = int( frequency.lstrip('(').rstrip(')') ) return AggregationRule(input_pattern, output_pattern, method, frequency) except ValueError: log.err("Failed to parse rule in %s, line: %s" % (self.rules_file, line)) raise class AggregationRule: def __init__(self, input_pattern, output_pattern, method, frequency): self.input_pattern = input_pattern self.output_pattern = output_pattern self.method = method self.frequency = int(frequency) if method not in AGGREGATION_METHODS: raise ValueError("Invalid aggregation method '%s'" % method) self.aggregation_func = AGGREGATION_METHODS[method] self.build_regex() self.build_template() self.cache = {} def get_aggregate_metric(self, metric_path): if metric_path in self.cache: try: return self.cache[metric_path] except KeyError: # The value can expire at any time, so we need to catch this. pass match = self.regex.match(metric_path) result = None if match: extracted_fields = match.groupdict() try: result = self.output_template % extracted_fields except TypeError: log.err("Failed to interpolate template %s with fields %s" % (self.output_template, extracted_fields)) if result: self.cache[metric_path] = result return result def build_regex(self): input_pattern_parts = self.input_pattern.split('.') regex_pattern_parts = [] for input_part in input_pattern_parts: if '<<' in input_part and '>>' in input_part: i = input_part.find('<<') j = input_part.find('>>') pre = input_part[:i] post = input_part[j+2:] field_name = input_part[i+2:j] regex_part = '%s(?P<%s>.+)%s' % (pre, field_name, post) else: i = input_part.find('<') j = input_part.find('>') if i > -1 and j > i: pre = input_part[:i] post = input_part[j+1:] field_name = input_part[i+1:j] regex_part = '%s(?P<%s>[^.]+)%s' % (pre, field_name, post) elif input_part == '*': regex_part = '[^.]+' else: regex_part = input_part.replace('*', '[^.]*') regex_pattern_parts.append(regex_part) regex_pattern = '\\.'.join(regex_pattern_parts) + '$' self.regex = re.compile(regex_pattern) def build_template(self): self.output_template = self.output_pattern.replace('<', '%(').replace('>', ')s') def avg(values): if values: return float( sum(values) ) / len(values) def count(values): if values: return len(values) AGGREGATION_METHODS = { 'sum' : sum, 'avg' : avg, 'min' : min, 'max' : max, 'count' : count } # Importable singleton RuleManager = RuleManager() carbon-1.0.2/lib/carbon/aggregator/__init__.py0000644000000000000000000000000013131244450021144 0ustar rootroot00000000000000carbon-1.0.2/lib/carbon/aggregator/processor.py0000644000000000000000000000250613131244450021441 0ustar rootroot00000000000000from carbon.aggregator.rules import RuleManager from carbon.aggregator.buffers import BufferManager from carbon.instrumentation import increment from carbon.pipeline import Processor from carbon.rewrite import PRE, POST, RewriteRuleManager from carbon.conf import settings from carbon import log class AggregationProcessor(Processor): plugin_name = 'aggregate' def process(self, metric, datapoint): increment('datapointsReceived') for rule in RewriteRuleManager.rules(PRE): metric = rule.apply(metric) aggregate_metrics = set() for rule in RuleManager.rules: aggregate_metric = rule.get_aggregate_metric(metric) if aggregate_metric is None: continue else: aggregate_metrics.add(aggregate_metric) values_buffer = BufferManager.get_buffer(aggregate_metric) if not values_buffer.configured: values_buffer.configure_aggregation(rule.frequency, rule.aggregation_func) values_buffer.input(datapoint) for rule in RewriteRuleManager.rules(POST): metric = rule.apply(metric) if settings.FORWARD_ALL and metric not in aggregate_metrics: if settings.LOG_AGGREGATOR_MISSES and len(aggregate_metrics) == 0: log.msg("Couldn't match metric %s with any aggregation rule. Passing on un-aggregated." % metric) yield (metric, datapoint) carbon-1.0.2/lib/carbon/management.py0000644000000000000000000000075113131244450017414 0ustar rootroot00000000000000import traceback from carbon import log, state def getMetadata(metric, key): try: value = state.database.getMetadata(metric, key) return dict(value=value) except Exception: log.err() return dict(error=traceback.format_exc()) def setMetadata(metric, key, value): try: old_value = state.database.setMetadata(metric, key, value) return dict(old_value=old_value, new_value=value) except Exception: log.err() return dict(error=traceback.format_exc()) carbon-1.0.2/lib/carbon/storage.py0000644000000000000000000001062113131244455016746 0ustar rootroot00000000000000"""Copyright 2009 Chris Davis Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.""" import os import re from os.path import join, exists from carbon.conf import OrderedConfigParser, settings from carbon.exceptions import CarbonConfigException from carbon.util import pickle, parseRetentionDef from carbon import log, state STORAGE_SCHEMAS_CONFIG = join(settings.CONF_DIR, 'storage-schemas.conf') STORAGE_AGGREGATION_CONFIG = join(settings.CONF_DIR, 'storage-aggregation.conf') STORAGE_LISTS_DIR = join(settings.CONF_DIR, 'lists') class Schema: def test(self, metric): raise NotImplementedError() def matches(self, metric): return bool(self.test(metric)) class DefaultSchema(Schema): def __init__(self, name, archives): self.name = name self.archives = archives def test(self, metric): return True class PatternSchema(Schema): def __init__(self, name, pattern, archives): self.name = name self.pattern = pattern self.regex = re.compile(pattern) self.archives = archives def test(self, metric): return self.regex.search(metric) class Archive: def __init__(self, secondsPerPoint, points): self.secondsPerPoint = int(secondsPerPoint) self.points = int(points) def __str__(self): return "Archive = (Seconds per point: %d, Datapoints to save: %d)" % (self.secondsPerPoint, self.points) def getTuple(self): return (self.secondsPerPoint, self.points) @staticmethod def fromString(retentionDef): (secondsPerPoint, points) = parseRetentionDef(retentionDef) return Archive(secondsPerPoint, points) def loadStorageSchemas(): schemaList = [] config = OrderedConfigParser() config.read(STORAGE_SCHEMAS_CONFIG) for section in config.sections(): options = dict(config.items(section)) pattern = options.get('pattern') try: retentions = options['retentions'].split(',') archives = [Archive.fromString(s) for s in retentions] except KeyError: log.err("Schema %s missing 'retentions', skipping" % section) continue if pattern: mySchema = PatternSchema(section, pattern, archives) else: log.err("Schema %s missing 'pattern', skipping" % section) continue archiveList = [a.getTuple() for a in archives] try: if state.database is not None: state.database.validateArchiveList(archiveList) schemaList.append(mySchema) except ValueError, e: log.msg("Invalid schemas found in %s: %s" % (section, e)) schemaList.append(defaultSchema) return schemaList def loadAggregationSchemas(): # NOTE: This abuses the Schema classes above, and should probably be refactored. schemaList = [] config = OrderedConfigParser() try: config.read(STORAGE_AGGREGATION_CONFIG) except (IOError, CarbonConfigException): log.msg("%s not found or wrong perms, ignoring." % STORAGE_AGGREGATION_CONFIG) for section in config.sections(): options = dict(config.items(section)) pattern = options.get('pattern') xFilesFactor = options.get('xfilesfactor') aggregationMethod = options.get('aggregationmethod') try: if xFilesFactor is not None: xFilesFactor = float(xFilesFactor) assert 0 <= xFilesFactor <= 1 if aggregationMethod is not None: if state.database is not None: assert aggregationMethod in state.database.aggregationMethods except ValueError: log.msg("Invalid schemas found in %s." % section) continue archives = (xFilesFactor, aggregationMethod) if pattern: mySchema = PatternSchema(section, pattern, archives) else: log.err("Section missing 'pattern': %s" % section) continue schemaList.append(mySchema) schemaList.append(defaultAggregation) return schemaList defaultArchive = Archive(60, 60 * 24 * 7) # default retention for unclassified data (7 days of minutely data) defaultSchema = DefaultSchema('default', [defaultArchive]) defaultAggregation = DefaultSchema('default', (None, None)) carbon-1.0.2/lib/carbon/manhole.py0000644000000000000000000000362513131244450016726 0ustar rootroot00000000000000from twisted.cred import portal, checkers from twisted.conch.ssh import keys from twisted.conch.checkers import SSHPublicKeyDatabase from twisted.conch.manhole import Manhole from twisted.conch.manhole_ssh import TerminalRealm, ConchFactory from twisted.internet import reactor from twisted.application.internet import TCPServer from carbon.protocols import CarbonServerProtocol from carbon.conf import settings namespace = {} class PublicKeyChecker(SSHPublicKeyDatabase): def __init__(self, userKeys): self.userKeys = {} for username, keyData in userKeys.items(): self.userKeys[username] = keys.Key.fromString(data=keyData).blob() def checkKey(self, credentials): if credentials.username in self.userKeys: keyBlob = self.userKeys[credentials.username] return keyBlob == credentials.blob def createManholeListener(): sshRealm = TerminalRealm() sshRealm.chainedProtocolFactory.protocolFactory = lambda _: Manhole(namespace) if settings.MANHOLE_PUBLIC_KEY == 'None': credChecker = checkers.InMemoryUsernamePasswordDatabaseDontUse() credChecker.addUser(settings.MANHOLE_USER, '') else: userKeys = { settings.MANHOLE_USER: settings.MANHOLE_PUBLIC_KEY, } credChecker = PublicKeyChecker(userKeys) sshPortal = portal.Portal(sshRealm) sshPortal.registerChecker(credChecker) sessionFactory = ConchFactory(sshPortal) return sessionFactory def start(): sessionFactory = createManholeListener() reactor.listenTCP(settings.MANHOLE_PORT, sessionFactory, interface=settings.MANHOLE_INTERFACE) class ManholeProtocol(CarbonServerProtocol): plugin_name = "manhole" @classmethod def build(cls, root_service): if not settings.ENABLE_MANHOLE: return factory = createManholeListener() service = TCPServer( settings.MANHOLE_PORT, factory, interface=settings.MANHOLE_INTERFACE) service.setServiceParent(root_service) carbon-1.0.2/lib/carbon/rewrite.py0000644000000000000000000000470013131244450016757 0ustar rootroot00000000000000import re from collections import defaultdict from os.path import exists, getmtime from twisted.internet.task import LoopingCall from carbon.pipeline import Processor from carbon import log # rulesets PRE = 'pre' POST = 'post' class RewriteProcessor(Processor): plugin_name = 'rewrite' def __init__(self, ruleset): self.ruleset = ruleset def process(self, metric, datapoint): for rule in RewriteRuleManager.rules(self.ruleset): metric = rule.apply(metric) yield (metric, datapoint) class RewriteRuleManager: def __init__(self): self.rulesets = defaultdict(list) self.rules_file = None self.read_task = LoopingCall(self.read_rules) self.rules_last_read = 0.0 def clear(self, ruleset=None): if ruleset: self.rulesets[ruleset] = [] else: self.rulesets.clear() def rules(self, ruleset): return self.rulesets[ruleset] def read_from(self, rules_file): self.rules_file = rules_file self.read_rules() if not self.read_task.running: self.read_task.start(10, now=False) def read_rules(self): if not exists(self.rules_file): self.clear() return # Only read if the rules file has been modified try: mtime = getmtime(self.rules_file) except (OSError, IOError): log.err("Failed to get mtime of %s" % self.rules_file) return if mtime <= self.rules_last_read: return section = None for line in open(self.rules_file): line = line.strip() if line.startswith('#') or not line: continue if line.startswith('[') and line.endswith(']'): section = line[1:-1].lower() self.clear(section) elif '=' in line: pattern, replacement = line.split('=', 1) pattern, replacement = pattern.strip(), replacement.strip() try: rule = RewriteRule(pattern, replacement) except re.error: log.err("Invalid regular expression in rewrite rule: '{0}'".format(pattern)) continue self.rulesets[section].append(rule) else: log.err("Invalid syntax: not a section heading or rule: '{0}'".format(line)) self.rules_last_read = mtime class RewriteRule: def __init__(self, pattern, replacement): self.pattern = pattern self.replacement = replacement self.regex = re.compile(pattern) def apply(self, metric): return self.regex.sub(self.replacement, metric) # Ghetto singleton RewriteRuleManager = RewriteRuleManager() carbon-1.0.2/lib/carbon/conf.py0000644000000000000000000005164413131244455016241 0ustar rootroot00000000000000"""Copyright 2009 Chris Davis Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.""" import os import sys import pwd import errno from os.path import join, dirname, normpath, exists, isdir from optparse import OptionParser from ConfigParser import ConfigParser from carbon import log, state from carbon.database import TimeSeriesDatabase from carbon.routers import DatapointRouter from carbon.exceptions import CarbonConfigException from twisted.python import usage defaults = dict( USER="", MAX_CACHE_SIZE=float('inf'), MAX_UPDATES_PER_SECOND=500, MAX_CREATES_PER_MINUTE=float('inf'), MIN_TIMESTAMP_RESOLUTION=0, LINE_RECEIVER_INTERFACE='0.0.0.0', LINE_RECEIVER_PORT=2003, ENABLE_UDP_LISTENER=False, UDP_RECEIVER_INTERFACE='0.0.0.0', UDP_RECEIVER_PORT=2003, PICKLE_RECEIVER_INTERFACE='0.0.0.0', PICKLE_RECEIVER_PORT=2004, MAX_RECEIVER_CONNECTIONS=float('inf'), CACHE_QUERY_INTERFACE='0.0.0.0', CACHE_QUERY_PORT=7002, LOG_UPDATES=True, LOG_CREATES=True, LOG_CACHE_HITS=True, LOG_CACHE_QUEUE_SORTS=True, DATABASE='whisper', WHISPER_AUTOFLUSH=False, WHISPER_SPARSE_CREATE=False, WHISPER_FALLOCATE_CREATE=False, WHISPER_LOCK_WRITES=False, WHISPER_FADVISE_RANDOM=False, CERES_MAX_SLICE_GAP=80, CERES_NODE_CACHING_BEHAVIOR='all', CERES_SLICE_CACHING_BEHAVIOR='latest', CERES_LOCK_WRITES=False, MAX_DATAPOINTS_PER_MESSAGE=500, MAX_AGGREGATION_INTERVALS=5, FORWARD_ALL=True, MAX_QUEUE_SIZE=1000, QUEUE_LOW_WATERMARK_PCT=0.8, TIME_TO_DEFER_SENDING=0.0001, ENABLE_AMQP=False, AMQP_METRIC_NAME_IN_BODY=False, AMQP_VERBOSE=False, AMQP_SPEC=None, BIND_PATTERNS=['#'], ENABLE_MANHOLE=False, MANHOLE_INTERFACE='127.0.0.1', MANHOLE_PORT=7222, MANHOLE_USER="", MANHOLE_PUBLIC_KEY="", RELAY_METHOD='rules', REPLICATION_FACTOR=1, DIVERSE_REPLICAS=True, DESTINATIONS=[], DESTINATION_PROTOCOL="pickle", USE_FLOW_CONTROL=True, USE_INSECURE_UNPICKLER=False, USE_WHITELIST=False, CARBON_METRIC_PREFIX='carbon', CARBON_METRIC_INTERVAL=60, CACHE_WRITE_STRATEGY='sorted', WRITE_BACK_FREQUENCY=None, MIN_RESET_STAT_FLOW=1000, MIN_RESET_RATIO=0.9, MIN_RESET_INTERVAL=121, USE_RATIO_RESET=False, LOG_LISTENER_CONN_SUCCESS=True, LOG_AGGREGATOR_MISSES=True, AGGREGATION_RULES='aggregation-rules.conf', REWRITE_RULES='rewrite-rules.conf', RELAY_RULES='relay-rules.conf', ENABLE_LOGROTATION=True, METRIC_CLIENT_IDLE_TIMEOUT=None, ) def _process_alive(pid): if exists("/proc"): return exists("/proc/%d" % pid) else: try: os.kill(int(pid), 0) return True except OSError, err: return err.errno == errno.EPERM class OrderedConfigParser(ConfigParser): """Hacky workaround to ensure sections are always returned in the order they are defined in. Note that this does *not* make any guarantees about the order of options within a section or the order in which sections get written back to disk on write().""" _ordered_sections = [] def read(self, path): # Verifies a file exists *and* is readable if not os.access(path, os.R_OK): raise CarbonConfigException("Error: Missing config file or wrong perms on %s" % path) result = ConfigParser.read(self, path) sections = [] with open(path) as f: for line in f: line = line.strip() if line.startswith('[') and line.endswith(']'): sections.append(line[1:-1]) self._ordered_sections = sections return result def sections(self): return list(self._ordered_sections) # return a copy for safety class Settings(dict): __getattr__ = dict.__getitem__ def __init__(self): dict.__init__(self) self.update(defaults) def readFrom(self, path, section): parser = ConfigParser() if not parser.read(path): raise CarbonConfigException("Failed to read config file %s" % path) if not parser.has_section(section): return for key, value in parser.items(section): key = key.upper() # Detect type from defaults dict if key in defaults: valueType = type(defaults[key]) else: valueType = str if valueType is list: value = [v.strip() for v in value.split(',')] elif valueType is bool: value = parser.getboolean(section, key) else: # Attempt to figure out numeric types automatically try: value = int(value) except ValueError: try: value = float(value) except ValueError: pass self[key] = value settings = Settings() settings.update(defaults) class CarbonCacheOptions(usage.Options): optFlags = [ ["debug", "", "Run in debug mode."], ] optParameters = [ ["config", "c", None, "Use the given config file."], ["instance", "", "a", "Manage a specific carbon instance."], ["logdir", "", None, "Write logs to the given directory."], ["whitelist", "", None, "List of metric patterns to allow."], ["blacklist", "", None, "List of metric patterns to disallow."], ] def postOptions(self): global settings program = self.parent.subCommand # Use provided pidfile (if any) as default for configuration. If it's # set to 'twistd.pid', that means no value was provided and the default # was used. pidfile = self.parent["pidfile"] if pidfile.endswith("twistd.pid"): pidfile = None self["pidfile"] = pidfile # Enforce a default umask of '022' if none was set. if not self.parent.has_key("umask") or self.parent["umask"] is None: self.parent["umask"] = 022 # Read extra settings from the configuration file. program_settings = read_config(program, self) settings.update(program_settings) settings["program"] = program # Normalize and expand paths settings["STORAGE_DIR"] = os.path.normpath(os.path.expanduser(settings["STORAGE_DIR"])) settings["LOCAL_DATA_DIR"] = os.path.normpath(os.path.expanduser(settings["LOCAL_DATA_DIR"])) settings["WHITELISTS_DIR"] = os.path.normpath(os.path.expanduser(settings["WHITELISTS_DIR"])) settings["PID_DIR"] = os.path.normpath(os.path.expanduser(settings["PID_DIR"])) settings["LOG_DIR"] = os.path.normpath(os.path.expanduser(settings["LOG_DIR"])) settings["pidfile"] = os.path.normpath(os.path.expanduser(settings["pidfile"])) # Set process uid/gid by changing the parent config, if a user was # provided in the configuration file. if settings.USER: self.parent["uid"], self.parent["gid"] = ( pwd.getpwnam(settings.USER)[2:4]) # Set the pidfile in parent config to the value that was computed by # C{read_config}. self.parent["pidfile"] = settings["pidfile"] storage_schemas = join(settings["CONF_DIR"], "storage-schemas.conf") if not exists(storage_schemas): print "Error: missing required config %s" % storage_schemas sys.exit(1) if settings.CACHE_WRITE_STRATEGY not in ('timesorted', 'sorted', 'max', 'naive'): log.err("%s is not a valid value for CACHE_WRITE_STRATEGY, defaulting to %s" % (settings.CACHE_WRITE_STRATEGY, defaults['CACHE_WRITE_STRATEGY'])) else: log.msg("Using %s write strategy for cache" % settings.CACHE_WRITE_STRATEGY) # Database-specific settings database = settings.DATABASE if database not in TimeSeriesDatabase.plugins: print "No database plugin implemented for '%s'" % database raise SystemExit(1) database_class = TimeSeriesDatabase.plugins[database] state.database = database_class(settings) settings.CACHE_SIZE_LOW_WATERMARK = settings.MAX_CACHE_SIZE * 0.95 if not "action" in self: self["action"] = "start" self.handleAction() # If we are not running in debug mode or non-daemon mode, then log to a # directory, otherwise log output will go to stdout. If parent options # are set to log to syslog, then use that instead. if not self["debug"]: if self.parent.get("syslog", None): prefix = "%s-%s[%d]" % (program, self["instance"], os.getpid()) log.logToSyslog(prefix) elif not self.parent["nodaemon"]: logdir = settings.LOG_DIR if not isdir(logdir): os.makedirs(logdir) if settings.USER: # We have not yet switched to the specified user, # but that user must be able to create files in this # directory. os.chown(logdir, self.parent["uid"], self.parent["gid"]) log.logToDir(logdir) if self["whitelist"] is None: self["whitelist"] = join(settings["CONF_DIR"], "whitelist.conf") settings["whitelist"] = self["whitelist"] if self["blacklist"] is None: self["blacklist"] = join(settings["CONF_DIR"], "blacklist.conf") settings["blacklist"] = self["blacklist"] def parseArgs(self, *action): """If an action was provided, store it for further processing.""" if len(action) == 1: self["action"] = action[0] def handleAction(self): """Handle extra argument for backwards-compatibility. * C{start} will simply do minimal pid checking and otherwise let twistd take over. * C{stop} will kill an existing running process if it matches the C{pidfile} contents. * C{status} will simply report if the process is up or not. """ action = self["action"] pidfile = self.parent["pidfile"] program = settings["program"] instance = self["instance"] if action == "stop": if not exists(pidfile): print "Pidfile %s does not exist" % pidfile raise SystemExit(0) pf = open(pidfile, 'r') try: pid = int(pf.read().strip()) pf.close() except ValueError: print "Failed to parse pid from pidfile %s" % pidfile raise SystemExit(1) except IOError: print "Could not read pidfile %s" % pidfile raise SystemExit(1) print "Sending kill signal to pid %d" % pid try: os.kill(pid, 15) except OSError, e: if e.errno == errno.ESRCH: print "No process with pid %d running" % pid else: raise raise SystemExit(0) elif action == "status": if not exists(pidfile): print "%s (instance %s) is not running" % (program, instance) raise SystemExit(1) pf = open(pidfile, "r") try: pid = int(pf.read().strip()) pf.close() except ValueError: print "Failed to parse pid from pidfile %s" % pidfile raise SystemExit(1) except IOError: print "Failed to read pid from %s" % pidfile raise SystemExit(1) if _process_alive(pid): print ("%s (instance %s) is running with pid %d" % (program, instance, pid)) raise SystemExit(0) else: print "%s (instance %s) is not running" % (program, instance) raise SystemExit(1) elif action == "start": if exists(pidfile): pf = open(pidfile, 'r') try: pid = int(pf.read().strip()) pf.close() except ValueError: print "Failed to parse pid from pidfile %s" % pidfile raise SystemExit(1) except IOError: print "Could not read pidfile %s" % pidfile raise SystemExit(1) if _process_alive(pid): print ("%s (instance %s) is already running with pid %d" % (program, instance, pid)) raise SystemExit(1) else: print "Removing stale pidfile %s" % pidfile try: os.unlink(pidfile) except IOError: print "Could not remove pidfile %s" % pidfile # Try to create the PID directory else: if not os.path.exists(settings["PID_DIR"]): try: os.makedirs(settings["PID_DIR"]) except OSError as exc: # Python >2.5 if exc.errno == errno.EEXIST and os.path.isdir(settings["PID_DIR"]): pass else: raise print "Starting %s (instance %s)" % (program, instance) else: print "Invalid action '%s'" % action print "Valid actions: start stop status" raise SystemExit(1) class CarbonAggregatorOptions(CarbonCacheOptions): optParameters = [ ["rules", "", None, "Use the given aggregation rules file."], ["rewrite-rules", "", None, "Use the given rewrite rules file."], ] + CarbonCacheOptions.optParameters def postOptions(self): CarbonCacheOptions.postOptions(self) if self["rules"] is None: self["rules"] = join(settings["CONF_DIR"], settings['AGGREGATION_RULES']) settings["aggregation-rules"] = self["rules"] if self["rewrite-rules"] is None: self["rewrite-rules"] = join(settings["CONF_DIR"], settings['REWRITE_RULES']) settings["rewrite-rules"] = self["rewrite-rules"] class CarbonRelayOptions(CarbonCacheOptions): optParameters = [ ["rules", "", None, "Use the given relay rules file."], ["aggregation-rules", "", None, "Use the given aggregation rules file."], ] + CarbonCacheOptions.optParameters def postOptions(self): CarbonCacheOptions.postOptions(self) if self["rules"] is None: self["rules"] = join(settings["CONF_DIR"], settings['RELAY_RULES']) settings["relay-rules"] = self["rules"] if self["aggregation-rules"] is None: self["aggregation-rules"] = join(settings["CONF_DIR"], settings['AGGREGATION_RULES']) settings["aggregation-rules"] = self["aggregation-rules"] router = settings["RELAY_METHOD"] if router not in DatapointRouter.plugins: print ("In carbon.conf, RELAY_METHOD must be one of %s. " "Invalid value: '%s'" % (', '.join(DatapointRouter.plugins), router)) raise SystemExit(1) def get_default_parser(usage="%prog [options] "): """Create a parser for command line options.""" parser = OptionParser(usage=usage) parser.add_option( "--debug", action="store_true", help="Run in the foreground, log to stdout") parser.add_option( "--syslog", action="store_true", help="Write logs to syslog") parser.add_option( "--nodaemon", action="store_true", help="Run in the foreground") parser.add_option( "--profile", help="Record performance profile data to the given file") parser.add_option( "--profiler", help="Specify the profiler to use") parser.add_option( "--pidfile", default=None, help="Write pid to the given file") parser.add_option( "--umask", default=None, help="Use the given umask when creating files") parser.add_option( "--config", default=None, help="Use the given config file") parser.add_option( "--whitelist", default=None, help="Use the given whitelist file") parser.add_option( "--blacklist", default=None, help="Use the given blacklist file") parser.add_option( "--logdir", default=None, help="Write logs in the given directory") parser.add_option( "--instance", default='a', help="Manage a specific carbon instance") return parser def get_parser(name): parser = get_default_parser() if name == "carbon-aggregator": parser.add_option( "--rules", default=None, help="Use the given aggregation rules file.") parser.add_option( "--rewrite-rules", default=None, help="Use the given rewrite rules file.") elif name == "carbon-relay": parser.add_option( "--rules", default=None, help="Use the given relay rules file.") return parser def parse_options(parser, args): """ Parse command line options and print usage message if no arguments were provided for the command. """ (options, args) = parser.parse_args(args) if not args: parser.print_usage() raise SystemExit(1) if args[0] not in ("start", "stop", "status"): parser.print_usage() raise SystemExit(1) return options, args def read_config(program, options, **kwargs): """ Read settings for 'program' from configuration file specified by 'options["config"]', with missing values provided by 'defaults'. """ settings = Settings() settings.update(defaults) # Initialize default values if not set yet. for name, value in kwargs.items(): settings.setdefault(name, value) graphite_root = kwargs.get("ROOT_DIR") if graphite_root is None: graphite_root = os.environ.get('GRAPHITE_ROOT') if graphite_root is None: raise CarbonConfigException("Either ROOT_DIR or GRAPHITE_ROOT " "needs to be provided.") # Default config directory to root-relative, unless overriden by the # 'GRAPHITE_CONF_DIR' environment variable. settings.setdefault("CONF_DIR", os.environ.get("GRAPHITE_CONF_DIR", join(graphite_root, "conf"))) if options["config"] is None: options["config"] = join(settings["CONF_DIR"], "carbon.conf") else: # Set 'CONF_DIR' to the parent directory of the 'carbon.conf' config # file. settings["CONF_DIR"] = dirname(normpath(options["config"])) # Storage directory can be overriden by the 'GRAPHITE_STORAGE_DIR' # environment variable. It defaults to a path relative to GRAPHITE_ROOT # for backwards compatibility though. settings.setdefault("STORAGE_DIR", os.environ.get("GRAPHITE_STORAGE_DIR", join(graphite_root, "storage"))) def update_STORAGE_DIR_deps(): # By default, everything is written to subdirectories of the storage dir. settings.setdefault( "PID_DIR", settings["STORAGE_DIR"]) settings.setdefault( "LOG_DIR", join(settings["STORAGE_DIR"], "log", program)) settings.setdefault( "LOCAL_DATA_DIR", join(settings["STORAGE_DIR"], "whisper")) settings.setdefault( "WHITELISTS_DIR", join(settings["STORAGE_DIR"], "lists")) # Read configuration options from program-specific section. section = program[len("carbon-"):] config = options["config"] if not exists(config): raise CarbonConfigException("Error: missing required config %r" % config) settings.readFrom(config, section) settings.setdefault("instance", options["instance"]) update_STORAGE_DIR_deps() # If a specific instance of the program is specified, augment the settings # with the instance-specific settings and provide sane defaults for # optional settings. if options["instance"]: settings.readFrom(config, "%s:%s" % (section, options["instance"])) settings["pidfile"] = ( options["pidfile"] or join(settings["PID_DIR"], "%s-%s.pid" % (program, options["instance"]))) settings["LOG_DIR"] = (options["logdir"] or join(settings["LOG_DIR"], "%s-%s" % (program, options["instance"]))) else: settings["pidfile"] = ( options["pidfile"] or join(settings["PID_DIR"], '%s.pid' % program)) settings["LOG_DIR"] = (options["logdir"] or settings["LOG_DIR"]) update_STORAGE_DIR_deps() return settings carbon-1.0.2/lib/carbon/writer.py0000644000000000000000000001576413131244455016633 0ustar rootroot00000000000000"""Copyright 2009 Chris Davis Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.""" import time from carbon import state from carbon.cache import MetricCache from carbon.storage import loadStorageSchemas, loadAggregationSchemas from carbon.conf import settings from carbon import log, events, instrumentation from carbon.util import TokenBucket from twisted.internet import reactor from twisted.internet.task import LoopingCall from twisted.application.service import Service try: import signal except ImportError: log.msg("Couldn't import signal module") SCHEMAS = loadStorageSchemas() AGGREGATION_SCHEMAS = loadAggregationSchemas() # Inititalize token buckets so that we can enforce rate limits on creates and # updates if the config wants them. CREATE_BUCKET = None UPDATE_BUCKET = None if settings.MAX_CREATES_PER_MINUTE != float('inf'): capacity = settings.MAX_CREATES_PER_MINUTE fill_rate = float(settings.MAX_CREATES_PER_MINUTE) / 60 CREATE_BUCKET = TokenBucket(capacity, fill_rate) if settings.MAX_UPDATES_PER_SECOND != float('inf'): capacity = settings.MAX_UPDATES_PER_SECOND fill_rate = settings.MAX_UPDATES_PER_SECOND UPDATE_BUCKET = TokenBucket(capacity, fill_rate) def optimalWriteOrder(): """Generates metrics with the most cached values first and applies a soft rate limit on new metrics""" cache = MetricCache() while cache: (metric, datapoints) = cache.drain_metric() dbFileExists = state.database.exists(metric) if not dbFileExists and CREATE_BUCKET: # If our tokenbucket has enough tokens available to create a new metric # file then yield the metric data to complete that operation. Otherwise # we'll just drop the metric on the ground and move on to the next # metric. # XXX This behavior should probably be configurable to no tdrop metrics # when rate limitng unless our cache is too big or some other legit # reason. if CREATE_BUCKET.drain(1): yield (metric, datapoints, dbFileExists) continue yield (metric, datapoints, dbFileExists) def writeCachedDataPoints(): "Write datapoints until the MetricCache is completely empty" cache = MetricCache() while cache: dataWritten = False for (metric, datapoints, dbFileExists) in optimalWriteOrder(): dataWritten = True if not dbFileExists: archiveConfig = None xFilesFactor, aggregationMethod = None, None for schema in SCHEMAS: if schema.matches(metric): if settings.LOG_CREATES: log.creates('new metric %s matched schema %s' % (metric, schema.name)) archiveConfig = [archive.getTuple() for archive in schema.archives] break for schema in AGGREGATION_SCHEMAS: if schema.matches(metric): if settings.LOG_CREATES: log.creates('new metric %s matched aggregation schema %s' % (metric, schema.name)) xFilesFactor, aggregationMethod = schema.archives break if not archiveConfig: raise Exception("No storage schema matched the metric '%s', check your storage-schemas.conf file." % metric) if settings.LOG_CREATES: log.creates("creating database metric %s (archive=%s xff=%s agg=%s)" % (metric, archiveConfig, xFilesFactor, aggregationMethod)) try: state.database.create(metric, archiveConfig, xFilesFactor, aggregationMethod) instrumentation.increment('creates') except Exception, e: log.err() log.msg("Error creating %s: %s" % (metric, e)) instrumentation.increment('errors') continue # If we've got a rate limit configured lets makes sure we enforce it if UPDATE_BUCKET: UPDATE_BUCKET.drain(1, blocking=True) try: t1 = time.time() # If we have duplicated points, always pick the last. update_many() # has no guaranted behavior for that, and in fact the current implementation # will keep the first point in the list. datapoints = dict(datapoints).items() state.database.write(metric, datapoints) updateTime = time.time() - t1 except Exception, e: log.err() log.msg("Error writing to %s: %s" % (metric, e)) instrumentation.increment('errors') else: pointCount = len(datapoints) instrumentation.increment('committedPoints', pointCount) instrumentation.append('updateTimes', updateTime) if settings.LOG_UPDATES: log.updates("wrote %d datapoints for %s in %.5f seconds" % (pointCount, metric, updateTime)) # Avoid churning CPU when only new metrics are in the cache if not dataWritten: time.sleep(0.1) def writeForever(): while reactor.running: try: writeCachedDataPoints() except Exception: log.err() time.sleep(0.1) # The writer thread only sleeps when the cache is empty or an error occurs def reloadStorageSchemas(): global SCHEMAS try: SCHEMAS = loadStorageSchemas() except Exception, e: log.msg("Failed to reload storage SCHEMAS: %s" % (e)) def reloadAggregationSchemas(): global AGGREGATION_SCHEMAS try: AGGREGATION_SCHEMAS = loadAggregationSchemas() except Exception, e: log.msg("Failed to reload aggregation SCHEMAS: %s" % (e)) def shutdownModifyUpdateSpeed(): try: shut = settings.MAX_UPDATES_PER_SECOND_ON_SHUTDOWN if UPDATE_BUCKET: UPDATE_BUCKET.setCapacityAndFillRate(shut,shut) if CREATE_BUCKET: CREATE_BUCKET.setCapacityAndFillRate(shut,shut) log.msg("Carbon shutting down. Changed the update rate to: " + str(settings.MAX_UPDATES_PER_SECOND_ON_SHUTDOWN)) except KeyError: log.msg("Carbon shutting down. Update rate not changed") class WriterService(Service): def __init__(self): self.storage_reload_task = LoopingCall(reloadStorageSchemas) self.aggregation_reload_task = LoopingCall(reloadAggregationSchemas) def startService(self): if 'signal' in globals().keys(): log.msg("Installing SIG_IGN for SIGHUP") signal.signal(signal.SIGHUP, signal.SIG_IGN) self.storage_reload_task.start(60, False) self.aggregation_reload_task.start(60, False) reactor.addSystemEventTrigger('before', 'shutdown', shutdownModifyUpdateSpeed) reactor.callInThread(writeForever) Service.startService(self) def stopService(self): self.storage_reload_task.stop() self.aggregation_reload_task.stop() Service.stopService(self) carbon-1.0.2/lib/carbon/pipeline.py0000644000000000000000000000163513131244455017114 0ustar rootroot00000000000000from carbon.util import PluginRegistrar from carbon import state, log class Processor(object): __metaclass__ = PluginRegistrar plugins = {} NO_OUTPUT = () def pipeline_ready(self): "override me if you want" def process(self, metric, datapoint): raise NotImplemented() def run_pipeline_generated(metric, datapoint): # For generated points, use a special pipeline to avoid points # infinitely being trapped. run_pipeline(metric, datapoint, state.pipeline_processors_generated) def run_pipeline(metric, datapoint, processors=None): if processors is None: processors = state.pipeline_processors elif not processors: return processor = processors[0] try: for out_metric, out_datapoint in processor.process(metric, datapoint): try: run_pipeline(out_metric, out_datapoint, processors[1:]) except Exception: log.err() except Exception: log.err() carbon-1.0.2/lib/carbon/events.py0000644000000000000000000000266213131244450016607 0ustar rootroot00000000000000from twisted.python.failure import Failure class Event: def __init__(self, name): self.name = name self.handlers = [] def addHandler(self, handler): if handler not in self.handlers: self.handlers.append(handler) def removeHandler(self, handler): if handler in self.handlers: self.handlers.remove(handler) def __call__(self, *args, **kwargs): for handler in self.handlers: try: handler(*args, **kwargs) except Exception: log.err(None, "Exception in %s event handler: args=%s kwargs=%s" % (self.name, args, kwargs)) metricReceived = Event('metricReceived') metricGenerated = Event('metricGenerated') cacheFull = Event('cacheFull') cacheSpaceAvailable = Event('cacheSpaceAvailable') pauseReceivingMetrics = Event('pauseReceivingMetrics') resumeReceivingMetrics = Event('resumeReceivingMetrics') # Default handlers metricReceived.addHandler(lambda metric, datapoint: state.instrumentation.increment('metricsReceived')) cacheFull.addHandler(lambda: state.instrumentation.increment('cache.overflow')) cacheFull.addHandler(lambda: setattr(state, 'cacheTooFull', True)) cacheSpaceAvailable.addHandler(lambda: setattr(state, 'cacheTooFull', False)) pauseReceivingMetrics.addHandler(lambda: setattr(state, 'metricReceiversPaused', True)) resumeReceivingMetrics.addHandler(lambda: setattr(state, 'metricReceiversPaused', False)) # Avoid import circularities from carbon import log, state carbon-1.0.2/lib/carbon/amqp_listener.py0000644000000000000000000002102313131244455020143 0ustar rootroot00000000000000#!/usr/bin/env python """ Copyright 2009 Lucio Torre Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. This is an AMQP client that will connect to the specified broker and read messages, parse them, and post them as metrics. Each message's routing key should be a metric name. The message body should be one or more lines of the form: \n \n ... Where each is a real number and is a UNIX epoch time. This program can be started standalone for testing or using carbon-cache.py (see example config file provided) """ import sys import os import socket from optparse import OptionParser from twisted.internet.defer import inlineCallbacks from twisted.internet import reactor from twisted.internet.protocol import ReconnectingClientFactory from twisted.application.internet import TCPClient from txamqp.protocol import AMQClient from txamqp.client import TwistedDelegate import txamqp.spec try: import carbon except ImportError: # this is being run directly, carbon is not installed LIB_DIR = os.path.dirname(os.path.dirname(__file__)) sys.path.insert(0, LIB_DIR) import carbon.protocols #satisfy import order requirements from carbon.protocols import CarbonServerProtocol from carbon.conf import settings from carbon import log, events, instrumentation HOSTNAME = socket.gethostname().split('.')[0] class AMQPProtocol(CarbonServerProtocol): plugin_name = "amqp" @classmethod def build(cls, root_service): if not settings.ENABLE_AMQP: return amqp_host = settings.AMQP_HOST amqp_port = settings.AMQP_PORT amqp_user = settings.AMQP_USER amqp_password = settings.AMQP_PASSWORD amqp_verbose = settings.AMQP_VERBOSE amqp_vhost = settings.AMQP_VHOST amqp_spec = settings.AMQP_SPEC amqp_exchange_name = settings.AMQP_EXCHANGE factory = createAMQPListener( amqp_user, amqp_password, vhost=amqp_vhost, spec=amqp_spec, exchange_name=amqp_exchange_name, verbose=amqp_verbose) service = TCPClient(amqp_host, amqp_port, factory) service.setServiceParent(root_service) class AMQPGraphiteProtocol(AMQClient): """This is the protocol instance that will receive and post metrics.""" consumer_tag = "graphite_consumer" @inlineCallbacks def connectionMade(self): yield AMQClient.connectionMade(self) log.listener("New AMQP connection made") yield self.setup() yield self.receive_loop() @inlineCallbacks def setup(self): exchange = self.factory.exchange_name yield self.authenticate(self.factory.username, self.factory.password) chan = yield self.channel(1) yield chan.channel_open() # declare the exchange and queue yield chan.exchange_declare(exchange=exchange, type="topic", durable=True, auto_delete=False) # we use a private queue to avoid conflicting with existing bindings reply = yield chan.queue_declare(exclusive=True) my_queue = reply.queue # bind each configured metric pattern for bind_pattern in settings.BIND_PATTERNS: log.listener("binding exchange '%s' to queue '%s' with pattern %s" \ % (exchange, my_queue, bind_pattern)) yield chan.queue_bind(exchange=exchange, queue=my_queue, routing_key=bind_pattern) yield chan.basic_consume(queue=my_queue, no_ack=True, consumer_tag=self.consumer_tag) @inlineCallbacks def receive_loop(self): queue = yield self.queue(self.consumer_tag) while True: msg = yield queue.get() self.processMessage(msg) def processMessage(self, message): """Parse a message and post it as a metric.""" if self.factory.verbose: log.listener("Message received: %s" % (message,)) metric = message.routing_key for line in message.content.body.split("\n"): line = line.strip() if not line: continue try: if settings.get("AMQP_METRIC_NAME_IN_BODY", False): metric, value, timestamp = line.split() else: value, timestamp = line.split() datapoint = ( float(timestamp), float(value) ) if datapoint[1] != datapoint[1]: # filter out NaN values continue except ValueError: log.listener("invalid message line: %s" % (line,)) continue events.metricReceived(metric, datapoint) if self.factory.verbose: log.listener("Metric posted: %s %s %s" % (metric, value, timestamp,)) class AMQPReconnectingFactory(ReconnectingClientFactory): """The reconnecting factory. Knows how to create the extended client and how to keep trying to connect in case of errors.""" protocol = AMQPGraphiteProtocol def __init__(self, username, password, delegate, vhost, spec, channel, exchange_name, verbose): self.username = username self.password = password self.delegate = delegate self.vhost = vhost self.spec = spec self.channel = channel self.exchange_name = exchange_name self.verbose = verbose def buildProtocol(self, addr): self.resetDelay() p = self.protocol(self.delegate, self.vhost, self.spec) p.factory = self return p def createAMQPListener(username, password, vhost, exchange_name, spec=None, channel=1, verbose=False): """ Create an C{AMQPReconnectingFactory} configured with the specified options. """ # use provided spec if not specified if not spec: spec = txamqp.spec.load(os.path.normpath( os.path.join(os.path.dirname(__file__), 'amqp0-8.xml'))) delegate = TwistedDelegate() factory = AMQPReconnectingFactory(username, password, delegate, vhost, spec, channel, exchange_name, verbose=verbose) return factory def startReceiver(host, port, username, password, vhost, exchange_name, spec=None, channel=1, verbose=False): """ Starts a twisted process that will read messages on the amqp broker and post them as metrics. """ factory = createAMQPListener(username, password, vhost, exchange_name, spec=spec, channel=channel, verbose=verbose) reactor.connectTCP(host, port, factory) def main(): parser = OptionParser() parser.add_option("-t", "--host", dest="host", help="host name", metavar="HOST", default="localhost") parser.add_option("-p", "--port", dest="port", type=int, help="port number", metavar="PORT", default=5672) parser.add_option("-u", "--user", dest="username", help="username", metavar="USERNAME", default="guest") parser.add_option("-w", "--password", dest="password", help="password", metavar="PASSWORD", default="guest") parser.add_option("-V", "--vhost", dest="vhost", help="vhost", metavar="VHOST", default="/") parser.add_option("-e", "--exchange", dest="exchange", help="exchange", metavar="EXCHANGE", default="graphite") parser.add_option("-v", "--verbose", dest="verbose", help="verbose", default=False, action="store_true") (options, args) = parser.parse_args() startReceiver(options.host, options.port, options.username, options.password, vhost=options.vhost, exchange_name=options.exchange, verbose=options.verbose) reactor.run() if __name__ == "__main__": main() carbon-1.0.2/lib/carbon/carbon_pb2.py0000644000000000000000000001122513131244450017305 0ustar rootroot00000000000000# Generated by the protocol buffer compiler. DO NOT EDIT! # source: carbon.proto import sys _b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) from google.protobuf import descriptor as _descriptor from google.protobuf import message as _message from google.protobuf import reflection as _reflection from google.protobuf import symbol_database as _symbol_database from google.protobuf import descriptor_pb2 # @@protoc_insertion_point(imports) _sym_db = _symbol_database.Default() DESCRIPTOR = _descriptor.FileDescriptor( name='carbon.proto', package='carbon', syntax='proto3', serialized_pb=_b('\n\x0c\x63\x61rbon.proto\x12\x06\x63\x61rbon\")\n\x05Point\x12\x11\n\ttimestamp\x18\x01 \x01(\r\x12\r\n\x05value\x18\x02 \x01(\x01\"7\n\x06Metric\x12\x0e\n\x06metric\x18\x01 \x01(\t\x12\x1d\n\x06points\x18\x02 \x03(\x0b\x32\r.carbon.Point\"*\n\x07Payload\x12\x1f\n\x07metrics\x18\x01 \x03(\x0b\x32\x0e.carbon.Metricb\x06proto3') ) _sym_db.RegisterFileDescriptor(DESCRIPTOR) _POINT = _descriptor.Descriptor( name='Point', full_name='carbon.Point', filename=None, file=DESCRIPTOR, containing_type=None, fields=[ _descriptor.FieldDescriptor( name='timestamp', full_name='carbon.Point.timestamp', index=0, number=1, type=13, cpp_type=3, label=1, has_default_value=False, default_value=0, message_type=None, enum_type=None, containing_type=None, is_extension=False, extension_scope=None, options=None), _descriptor.FieldDescriptor( name='value', full_name='carbon.Point.value', index=1, number=2, type=1, cpp_type=5, label=1, has_default_value=False, default_value=float(0), message_type=None, enum_type=None, containing_type=None, is_extension=False, extension_scope=None, options=None), ], extensions=[ ], nested_types=[], enum_types=[ ], options=None, is_extendable=False, syntax='proto3', extension_ranges=[], oneofs=[ ], serialized_start=24, serialized_end=65, ) _METRIC = _descriptor.Descriptor( name='Metric', full_name='carbon.Metric', filename=None, file=DESCRIPTOR, containing_type=None, fields=[ _descriptor.FieldDescriptor( name='metric', full_name='carbon.Metric.metric', index=0, number=1, type=9, cpp_type=9, label=1, has_default_value=False, default_value=_b("").decode('utf-8'), message_type=None, enum_type=None, containing_type=None, is_extension=False, extension_scope=None, options=None), _descriptor.FieldDescriptor( name='points', full_name='carbon.Metric.points', index=1, number=2, type=11, cpp_type=10, label=3, has_default_value=False, default_value=[], message_type=None, enum_type=None, containing_type=None, is_extension=False, extension_scope=None, options=None), ], extensions=[ ], nested_types=[], enum_types=[ ], options=None, is_extendable=False, syntax='proto3', extension_ranges=[], oneofs=[ ], serialized_start=67, serialized_end=122, ) _PAYLOAD = _descriptor.Descriptor( name='Payload', full_name='carbon.Payload', filename=None, file=DESCRIPTOR, containing_type=None, fields=[ _descriptor.FieldDescriptor( name='metrics', full_name='carbon.Payload.metrics', index=0, number=1, type=11, cpp_type=10, label=3, has_default_value=False, default_value=[], message_type=None, enum_type=None, containing_type=None, is_extension=False, extension_scope=None, options=None), ], extensions=[ ], nested_types=[], enum_types=[ ], options=None, is_extendable=False, syntax='proto3', extension_ranges=[], oneofs=[ ], serialized_start=124, serialized_end=166, ) _METRIC.fields_by_name['points'].message_type = _POINT _PAYLOAD.fields_by_name['metrics'].message_type = _METRIC DESCRIPTOR.message_types_by_name['Point'] = _POINT DESCRIPTOR.message_types_by_name['Metric'] = _METRIC DESCRIPTOR.message_types_by_name['Payload'] = _PAYLOAD Point = _reflection.GeneratedProtocolMessageType('Point', (_message.Message,), dict( DESCRIPTOR = _POINT, __module__ = 'carbon_pb2' # @@protoc_insertion_point(class_scope:carbon.Point) )) _sym_db.RegisterMessage(Point) Metric = _reflection.GeneratedProtocolMessageType('Metric', (_message.Message,), dict( DESCRIPTOR = _METRIC, __module__ = 'carbon_pb2' # @@protoc_insertion_point(class_scope:carbon.Metric) )) _sym_db.RegisterMessage(Metric) Payload = _reflection.GeneratedProtocolMessageType('Payload', (_message.Message,), dict( DESCRIPTOR = _PAYLOAD, __module__ = 'carbon_pb2' # @@protoc_insertion_point(class_scope:carbon.Payload) )) _sym_db.RegisterMessage(Payload) # @@protoc_insertion_point(module_scope) carbon-1.0.2/lib/carbon/cache.py0000644000000000000000000001576013131244455016356 0ustar rootroot00000000000000"""Copyright 2009 Chris Davis Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.""" import time import threading from operator import itemgetter from random import choice from collections import defaultdict from carbon.conf import settings from carbon import events, log from carbon.pipeline import Processor def by_timestamp((timestamp, value)): # useful sort key function return timestamp class CacheFeedingProcessor(Processor): plugin_name = 'write' def __init__(self, *args, **kwargs): super(Processor, self).__init__(*args, **kwargs) self.cache = MetricCache() def process(self, metric, datapoint): self.cache.store(metric, datapoint) return Processor.NO_OUTPUT class DrainStrategy(object): """Implements the strategy for writing metrics. The strategy chooses what order (if any) metrics will be popped from the backing cache""" def __init__(self, cache): self.cache = cache def choose_item(self): raise NotImplemented class NaiveStrategy(DrainStrategy): """Pop points in an unordered fashion.""" def __init__(self, cache): super(NaiveStrategy, self).__init__(cache) def _generate_queue(): while True: metric_names = self.cache.keys() while metric_names: yield metric_names.pop() self.queue = _generate_queue() def choose_item(self): return self.queue.next() class MaxStrategy(DrainStrategy): """Always pop the metric with the greatest number of points stored. This method leads to less variance in pointsPerUpdate but may mean that infrequently or irregularly updated metrics may not be written until shutdown """ def choose_item(self): metric_name, size = max(self.cache.items(), key=lambda x: len(itemgetter(1)(x))) return metric_name class RandomStrategy(DrainStrategy): """Pop points randomly""" def choose_item(self): return choice(self.cache.keys()) class SortedStrategy(DrainStrategy): """ The default strategy which prefers metrics with a greater number of cached points but guarantees every point gets written exactly once during a loop of the cache """ def __init__(self, cache): super(SortedStrategy, self).__init__(cache) def _generate_queue(): while True: t = time.time() metric_counts = sorted(self.cache.counts, key=lambda x: x[1]) size = len(metric_counts) if settings.LOG_CACHE_QUEUE_SORTS and size: log.msg("Sorted %d cache queues in %.6f seconds" % (size, time.time() - t)) while metric_counts: yield itemgetter(0)(metric_counts.pop()) if settings.LOG_CACHE_QUEUE_SORTS and size: log.msg("Queue consumed in %.6f seconds" % (time.time() - t)) self.queue = _generate_queue() def choose_item(self): return self.queue.next() class TimeSortedStrategy(DrainStrategy): """ This strategy prefers metrics wich are lagging behind guarantees every point gets written exactly once during a loop of the cache """ def __init__(self, cache): super(TimeSortedStrategy, self).__init__(cache) def _generate_queue(): while True: t = time.time() metric_lw = sorted(self.cache.watermarks, key=lambda x: x[1], reverse=True) size = len(metric_lw) if settings.LOG_CACHE_QUEUE_SORTS and size: log.msg("Sorted %d cache queues in %.6f seconds" % (size, time.time() - t)) while metric_lw: yield itemgetter(0)(metric_lw.pop()) if settings.LOG_CACHE_QUEUE_SORTS and size: log.msg("Queue consumed in %.6f seconds" % (time.time() - t)) self.queue = _generate_queue() def choose_item(self): return self.queue.next() class _MetricCache(defaultdict): """A Singleton dictionary of metric names and lists of their datapoints""" def __init__(self, strategy=None): self.lock = threading.Lock() self.size = 0 self.strategy = None if strategy: self.strategy = strategy(self) super(_MetricCache, self).__init__(dict) @property def counts(self): return [(metric, len(datapoints)) for (metric, datapoints) in self.items()] @property def watermarks(self): return [(metric, min(datapoints.keys()), max(datapoints.keys())) for (metric, datapoints) in self.items() if datapoints] @property def is_full(self): if settings.MAX_CACHE_SIZE == float('inf'): return False else: return self.size >= settings.MAX_CACHE_SIZE def _check_available_space(self): if state.cacheTooFull and self.size < settings.CACHE_SIZE_LOW_WATERMARK: log.msg("MetricCache below watermark: self.size=%d" % self.size) events.cacheSpaceAvailable() def drain_metric(self): """Returns a metric and it's datapoints in order determined by the `DrainStrategy`_""" if not self: return (None, []) if self.strategy: metric = self.strategy.choose_item() else: # Avoid .keys() as it dumps the whole list metric = self.iterkeys().next() return (metric, self.pop(metric)) def get_datapoints(self, metric): """Return a list of currently cached datapoints sorted by timestamp""" return sorted(self.get(metric, {}).items(), key=by_timestamp) def pop(self, metric): with self.lock: datapoint_index = defaultdict.pop(self, metric) self.size -= len(datapoint_index) self._check_available_space() return sorted(datapoint_index.items(), key=by_timestamp) def store(self, metric, datapoint): timestamp, value = datapoint if timestamp not in self[metric]: # Not a duplicate, hence process if cache is not full if self.is_full: log.msg("MetricCache is full: self.size=%d" % self.size) events.cacheFull() else: with self.lock: self.size += 1 self[metric][timestamp] = value else: # Updating a duplicate does not increase the cache size self[metric][timestamp] = value _Cache = None def MetricCache(): global _Cache if _Cache is not None: return _Cache # Initialize a singleton cache instance # TODO: use plugins. write_strategy = None if settings.CACHE_WRITE_STRATEGY == 'naive': write_strategy = NaiveStrategy if settings.CACHE_WRITE_STRATEGY == 'max': write_strategy = MaxStrategy if settings.CACHE_WRITE_STRATEGY == 'sorted': write_strategy = SortedStrategy if settings.CACHE_WRITE_STRATEGY == 'timesorted': write_strategy = TimeSortedStrategy if settings.CACHE_WRITE_STRATEGY == 'random': write_strategy = RandomStrategy _Cache = _MetricCache(write_strategy) return _Cache # Avoid import circularities from carbon import state carbon-1.0.2/lib/carbon/client.py0000644000000000000000000003757413131244455016600 0ustar rootroot00000000000000from collections import deque from time import time from twisted.application.service import Service from twisted.internet import reactor from twisted.internet.defer import Deferred, DeferredList from twisted.internet.protocol import ReconnectingClientFactory from twisted.protocols.basic import LineOnlyReceiver, Int32StringReceiver from carbon.conf import settings from carbon.util import pickle from carbon import instrumentation, log, pipeline, state from carbon.util import PluginRegistrar try: import signal except ImportError: log.debug("Couldn't import signal module") SEND_QUEUE_LOW_WATERMARK = settings.MAX_QUEUE_SIZE * settings.QUEUE_LOW_WATERMARK_PCT class CarbonClientProtocol(object): def connectionMade(self): log.clients("%s::connectionMade" % self) self.paused = False self.connected = True self.transport.registerProducer(self, streaming=True) # Define internal metric names self.lastResetTime = time() self.destinationName = self.factory.destinationName self.queuedUntilReady = 'destinations.%s.queuedUntilReady' % self.destinationName self.sent = 'destinations.%s.sent' % self.destinationName self.batchesSent = 'destinations.%s.batchesSent' % self.destinationName self.slowConnectionReset = 'destinations.%s.slowConnectionReset' % self.destinationName self.factory.connectionMade.callback(self) self.factory.connectionMade = Deferred() self.sendQueued() def connectionLost(self, reason): log.clients("%s::connectionLost %s" % (self, reason.getErrorMessage())) self.connected = False def pauseProducing(self): self.paused = True def resumeProducing(self): self.paused = False self.sendQueued() def stopProducing(self): self.disconnect() def disconnect(self): if self.connected: self.transport.unregisterProducer() self.transport.loseConnection() self.connected = False def sendDatapoint(self, metric, datapoint): self.factory.enqueue(metric, datapoint) self.factory.scheduleSend() def _sendDatapointsNow(self, datapoints): """Implement this function to actually send datapoints.""" raise NotImplementedError() def sendDatapointsNow(self, datapoints): self._sendDatapointsNow(datapoints) instrumentation.increment(self.sent, len(datapoints)) instrumentation.increment(self.batchesSent) self.factory.checkQueue() def sendQueued(self): """This should be the only method that will be used to send stats. In order to not hold the event loop and prevent stats from flowing in while we send them out, this will process settings.MAX_DATAPOINTS_PER_MESSAGE stats, send them, and if there are still items in the queue, this will invoke reactor.callLater to schedule another run of sendQueued after a reasonable enough time for the destination to process what it has just received. Given a queue size of one million stats, and using a chained_invocation_delay of 0.0001 seconds, you'd get 1,000 sendQueued() invocations/second max. With a settings.MAX_DATAPOINTS_PER_MESSAGE of 100, the rate of stats being sent could theoretically be as high as 100,000 stats/sec, or 6,000,000 stats/minute. This is probably too high for a typical receiver to handle. In practice this theoretical max shouldn't be reached because network delays should add an extra delay - probably on the order of 10ms per send, so the queue should drain with an order of minutes, which seems more realistic. """ queueSize = self.factory.queueSize if self.paused: instrumentation.max(self.queuedUntilReady, queueSize) return if not self.factory.hasQueuedDatapoints(): return if settings.USE_RATIO_RESET is True: if not self.connectionQualityMonitor(): self.resetConnectionForQualityReasons("Sent: {0}, Received: {1}".format( instrumentation.prior_stats.get(self.sent, 0), instrumentation.prior_stats.get('metricsReceived', 0))) self.sendDatapointsNow(self.factory.takeSomeFromQueue()) if (self.factory.queueFull.called and queueSize < SEND_QUEUE_LOW_WATERMARK): if not self.factory.queueHasSpace.called: self.factory.queueHasSpace.callback(queueSize) if self.factory.hasQueuedDatapoints(): self.factory.scheduleSend() def connectionQualityMonitor(self): """Checks to see if the connection for this factory appears to be delivering stats at a speed close to what we're receiving them at. This is open to other measures of connection quality. Returns a Bool True means that quality is good, OR True means that the total received is less than settings.MIN_RESET_STAT_FLOW False means that quality is bad """ destination_sent = float(instrumentation.prior_stats.get(self.sent, 0)) total_received = float(instrumentation.prior_stats.get('metricsReceived', 0)) instrumentation.increment(self.slowConnectionReset, 0) if total_received < settings.MIN_RESET_STAT_FLOW: return True if (destination_sent / total_received) < settings.MIN_RESET_RATIO: return False else: return True def resetConnectionForQualityReasons(self, reason): """Only re-sets the connection if it's been settings.MIN_RESET_INTERVAL seconds since the last re-set. Reason should be a string containing the quality info that led to a re-set. """ if (time() - self.lastResetTime) < float(settings.MIN_RESET_INTERVAL): return else: self.factory.connectedProtocol.disconnect() self.lastResetTime = time() instrumentation.increment(self.slowConnectionReset) log.clients("%s:: resetConnectionForQualityReasons: %s" % (self, reason)) def __str__(self): return 'CarbonClientProtocol(%s:%d:%s)' % (self.factory.destination) __repr__ = __str__ class CarbonClientFactory(object, ReconnectingClientFactory): __metaclass__ = PluginRegistrar plugins = {} maxDelay = 5 def __init__(self, destination): self.destination = destination self.destinationName = ('%s:%d:%s' % destination).replace('.', '_') self.host, self.port, self.carbon_instance = destination self.addr = (self.host, self.port) self.started = False # This factory maintains protocol state across reconnects self.queue = deque() # Change to make this the sole source of metrics to be sent. self.connectedProtocol = None self.queueEmpty = Deferred() self.queueFull = Deferred() self.queueFull.addCallback(self.queueFullCallback) self.queueHasSpace = Deferred() self.queueHasSpace.addCallback(self.queueSpaceCallback) self.connectFailed = Deferred() self.connectionMade = Deferred() self.connectionLost = Deferred() self.deferSendPending = None # Define internal metric names self.attemptedRelays = 'destinations.%s.attemptedRelays' % self.destinationName self.fullQueueDrops = 'destinations.%s.fullQueueDrops' % self.destinationName self.queuedUntilConnected = 'destinations.%s.queuedUntilConnected' % self.destinationName self.relayMaxQueueLength = 'destinations.%s.relayMaxQueueLength' % self.destinationName def clientProtocol(self): raise NotImplementedError() def scheduleSend(self): if self.deferSendPending and self.deferSendPending.active(): return self.deferSendPending = reactor.callLater(settings.TIME_TO_DEFER_SENDING, self.sendQueued) def sendQueued(self): if self.connectedProtocol: self.connectedProtocol.sendQueued() def queueFullCallback(self, result): state.events.cacheFull() log.clients('%s send queue is full (%d datapoints)' % (self, result)) def queueSpaceCallback(self, result): if self.queueFull.called: log.clients('%s send queue has space available' % self.connectedProtocol) self.queueFull = Deferred() self.queueFull.addCallback(self.queueFullCallback) state.events.cacheSpaceAvailable() self.queueHasSpace = Deferred() self.queueHasSpace.addCallback(self.queueSpaceCallback) def buildProtocol(self, addr): self.connectedProtocol = self.clientProtocol() self.connectedProtocol.factory = self return self.connectedProtocol def startConnecting(self): # calling this startFactory yields recursion problems self.started = True self.connector = reactor.connectTCP(self.host, self.port, self) def stopConnecting(self): self.started = False self.stopTrying() if self.connectedProtocol and self.connectedProtocol.connected: return self.connectedProtocol.disconnect() @property def queueSize(self): return len(self.queue) def hasQueuedDatapoints(self): return bool(self.queue) def takeSomeFromQueue(self): """Use self.queue, which is a collections.deque, to pop up to settings.MAX_DATAPOINTS_PER_MESSAGE items from the left of the queue. """ def yield_max_datapoints(): for count in range(settings.MAX_DATAPOINTS_PER_MESSAGE): try: yield self.queue.popleft() except IndexError: raise StopIteration return list(yield_max_datapoints()) def checkQueue(self): """Check if the queue is empty. If the queue isn't empty or doesn't exist yet, then this will invoke the callback chain on the self.queryEmpty Deferred chain with the argument 0, and will re-set the queueEmpty callback chain with a new Deferred object. """ if not self.queue: self.queueEmpty.callback(0) self.queueEmpty = Deferred() def enqueue(self, metric, datapoint): self.queue.append((metric, datapoint)) def enqueue_from_left(self, metric, datapoint): self.queue.appendleft((metric, datapoint)) def sendDatapoint(self, metric, datapoint): instrumentation.increment(self.attemptedRelays) instrumentation.max(self.relayMaxQueueLength, self.queueSize) if self.queueSize >= settings.MAX_QUEUE_SIZE: if not self.queueFull.called: self.queueFull.callback(self.queueSize) instrumentation.increment(self.fullQueueDrops) else: self.enqueue(metric, datapoint) if self.connectedProtocol: self.scheduleSend() else: instrumentation.increment(self.queuedUntilConnected) def sendHighPriorityDatapoint(self, metric, datapoint): """The high priority datapoint is one relating to the carbon daemon itself. It puts the datapoint on the left of the deque, ahead of other stats, so that when the carbon-relay, specifically, is overwhelmed its stats are more likely to make it through and expose the issue at hand. In addition, these stats go on the deque even when the max stats capacity has been reached. This relies on not creating the deque with a fixed max size. """ instrumentation.increment(self.attemptedRelays) self.enqueue_from_left(metric, datapoint) if self.connectedProtocol: self.scheduleSend() else: instrumentation.increment(self.queuedUntilConnected) def startedConnecting(self, connector): log.clients("%s::startedConnecting (%s:%d)" % (self, connector.host, connector.port)) def clientConnectionLost(self, connector, reason): ReconnectingClientFactory.clientConnectionLost(self, connector, reason) log.clients("%s::clientConnectionLost (%s:%d) %s" % (self, connector.host, connector.port, reason.getErrorMessage())) self.connectedProtocol = None self.connectionLost.callback(0) self.connectionLost = Deferred() def clientConnectionFailed(self, connector, reason): ReconnectingClientFactory.clientConnectionFailed(self, connector, reason) log.clients("%s::clientConnectionFailed (%s:%d) %s" % (self, connector.host, connector.port, reason.getErrorMessage())) self.connectFailed.callback(dict(connector=connector, reason=reason)) self.connectFailed = Deferred() def disconnect(self): self.queueEmpty.addCallback(lambda result: self.stopConnecting()) readyToStop = DeferredList( [self.connectionLost, self.connectFailed], fireOnOneCallback=True, fireOnOneErrback=True) self.checkQueue() # This can happen if the client is stopped before a connection is ever made if (not readyToStop.called) and (not self.started): readyToStop.callback(None) return readyToStop def __str__(self): return 'CarbonClientFactory(%s:%d:%s)' % self.destination __repr__ = __str__ # Basic clients and associated factories. class CarbonPickleClientProtocol(CarbonClientProtocol, Int32StringReceiver): def _sendDatapointsNow(self, datapoints): self.sendString(pickle.dumps(datapoints, protocol=-1)) class CarbonPickleClientFactory(CarbonClientFactory): plugin_name = "pickle" def clientProtocol(self): return CarbonPickleClientProtocol() class CarbonLineClientProtocol(CarbonClientProtocol, LineOnlyReceiver): def _sendDatapointsNow(self, datapoints): for metric, datapoint in datapoints: self.sendLine("%s %s %d" % (metric, datapoint[1], datapoint[0])) class CarbonLineClientFactory(CarbonClientFactory): plugin_name = "line" def clientProtocol(self): return CarbonLineClientProtocol() class CarbonClientManager(Service): def __init__(self, router): self.router = router self.client_factories = {} # { destination : CarbonClientFactory() } def createFactory(self, destination): from carbon.conf import settings factory_name = settings["DESTINATION_PROTOCOL"] factory_class = CarbonClientFactory.plugins.get(factory_name) if not factory_class: print ("In carbon.conf, DESTINATION_PROTOCOL must be one of %s. " "Invalid value: '%s'" % (', '.join(CarbonClientFactory.plugins), factory_name)) raise SystemExit(1) return factory_class(destination) def startService(self): if 'signal' in globals().keys(): log.debug("Installing SIG_IGN for SIGHUP") signal.signal(signal.SIGHUP, signal.SIG_IGN) Service.startService(self) for factory in self.client_factories.values(): if not factory.started: factory.startConnecting() def stopService(self): Service.stopService(self) return self.stopAllClients() def startClient(self, destination): if destination in self.client_factories: return log.clients("connecting to carbon daemon at %s:%d:%s" % destination) self.router.addDestination(destination) factory = self.createFactory(destination) self.client_factories[destination] = factory connectAttempted = DeferredList( [factory.connectionMade, factory.connectFailed], fireOnOneCallback=True, fireOnOneErrback=True) if self.running: factory.startConnecting() # this can trigger & replace connectFailed return connectAttempted def stopClient(self, destination): factory = self.client_factories.get(destination) if factory is None: return self.router.removeDestination(destination) stopCompleted = factory.disconnect() stopCompleted.addCallback(lambda result: self.disconnectClient(destination)) return stopCompleted def disconnectClient(self, destination): factory = self.client_factories.pop(destination) c = factory.connector if c and c.state == 'connecting' and not factory.hasQueuedDatapoints(): c.stopConnecting() def stopAllClients(self): deferreds = [] for destination in list(self.client_factories): deferreds.append(self.stopClient(destination)) return DeferredList(deferreds) def sendDatapoint(self, metric, datapoint): for destination in self.router.getDestinations(metric): self.client_factories[destination].sendDatapoint(metric, datapoint) def sendHighPriorityDatapoint(self, metric, datapoint): for destination in self.router.getDestinations(metric): self.client_factories[destination].sendHighPriorityDatapoint(metric, datapoint) def __str__(self): return "<%s[%x]>" % (self.__class__.__name__, id(self)) class RelayProcessor(pipeline.Processor): plugin_name = 'relay' def process(self, metric, datapoint): state.client_manager.sendDatapoint(metric, datapoint) return pipeline.Processor.NO_OUTPUT carbon-1.0.2/lib/carbon/protobuf.py0000644000000000000000000000320113131244450017131 0ustar rootroot00000000000000from twisted.protocols.basic import Int32StringReceiver from carbon import log from carbon.protocols import MetricReceiver from carbon.client import CarbonClientProtocol, CarbonClientFactory from carbon.carbon_pb2 import Payload from google.protobuf.message import DecodeError class MetricProtobufReceiver(MetricReceiver, Int32StringReceiver): plugin_name = "protobuf" MAX_LENGTH = 2 ** 20 def stringReceived(self, data): try: payload_pb = Payload.FromString(data) except DecodeError: log.listener('invalid protobuf received from %s, ignoring' % self.peerName) return for metric_pb in payload_pb.metrics: for point_pb in metric_pb.points: self.metricReceived( metric_pb.metric, (point_pb.timestamp, point_pb.value)) class CarbonProtobufClientProtocol(CarbonClientProtocol, Int32StringReceiver): def _sendDatapointsNow(self, datapoints): metrics = {} payload_pb = Payload() for metric, datapoint in datapoints: if metric not in metrics: metric_pb = payload_pb.metrics.add() metric_pb.metric = metric metrics[metric] = metric_pb else: metric_pb = metrics[metric] point_pb = metric_pb.points.add() point_pb.timestamp = int(datapoint[0]) point_pb.value = datapoint[1] self.sendString(payload_pb.SerializeToString()) class CarbonProtobufClientFactory(CarbonClientFactory): plugin_name = "protobuf" def clientProtocol(self): return CarbonProtobufClientProtocol() carbon-1.0.2/lib/carbon/service.py0000644000000000000000000001423513131244455016747 0ustar rootroot00000000000000"""Copyright 2009 Chris Davis Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.""" from os.path import exists from twisted.application.service import MultiService from twisted.application.internet import TCPServer, TCPClient from twisted.internet.protocol import ServerFactory from twisted.python.components import Componentized from twisted.python.log import ILogObserver # Attaching modules to the global state module simplifies import order hassles from carbon import state, events, instrumentation, util from carbon.exceptions import CarbonConfigException from carbon.log import carbonLogObserver from carbon.pipeline import Processor, run_pipeline, run_pipeline_generated state.events = events state.instrumentation = instrumentation # Import plugins. try: import carbon.manhole except ImportError: pass try: import carbon.amqp_listener except ImportError: pass try: import carbon.protobuf except ImportError, e: pass class CarbonRootService(MultiService): """Root Service that properly configures twistd logging""" def setServiceParent(self, parent): MultiService.setServiceParent(self, parent) if isinstance(parent, Componentized): parent.setComponent(ILogObserver, carbonLogObserver) def createBaseService(config, settings): root_service = CarbonRootService() root_service.setName(settings.program) if settings.USE_WHITELIST: from carbon.regexlist import WhiteList, BlackList WhiteList.read_from(settings.whitelist) BlackList.read_from(settings.blacklist) # Instantiate an instrumentation service that will record metrics about # this service. from carbon.instrumentation import InstrumentationService service = InstrumentationService() service.setServiceParent(root_service) return root_service def setupPipeline(pipeline, root_service, settings): state.pipeline_processors = [] for processor in pipeline: args = [] if ':' in processor: processor, arglist = processor.split(':', 1) args = arglist.split(',') if processor == 'aggregate': setupAggregatorProcessor(root_service, settings) elif processor == 'rewrite': setupRewriterProcessor(root_service, settings) elif processor == 'relay': setupRelayProcessor(root_service, settings) elif processor == 'write': setupWriterProcessor(root_service, settings) else: raise ValueError("Invalid pipeline processor '%s'" % processor) plugin_class = Processor.plugins[processor] state.pipeline_processors.append(plugin_class(*args)) if processor == 'relay': state.pipeline_processors_generated.append(plugin_class(*args)) events.metricReceived.addHandler(run_pipeline) events.metricGenerated.addHandler(run_pipeline_generated) def activate_processors(): for processor in state.pipeline_processors: processor.pipeline_ready() from twisted.internet import reactor reactor.callWhenRunning(activate_processors) def createCacheService(config): from carbon.conf import settings root_service = createBaseService(config, settings) setupPipeline(['write'], root_service, settings) setupReceivers(root_service, settings) return root_service def createAggregatorService(config): from carbon.conf import settings settings.RELAY_METHOD = 'consistent-hashing' root_service = createBaseService(config, settings) setupPipeline(['rewrite:pre', 'aggregate', 'rewrite:post', 'relay'], root_service, settings) setupReceivers(root_service, settings) return root_service def createRelayService(config): from carbon.conf import settings root_service = createBaseService(config, settings) setupPipeline(['relay'], root_service, settings) setupReceivers(root_service, settings) return root_service def setupReceivers(root_service, settings): from carbon.protocols import MetricReceiver for plugin_name, plugin_class in MetricReceiver.plugins.items(): plugin_class.build(root_service) def setupAggregatorProcessor(root_service, settings): from carbon.aggregator.processor import AggregationProcessor # Register the plugin class from carbon.aggregator.rules import RuleManager aggregation_rules_path = settings["aggregation-rules"] if not exists(aggregation_rules_path): raise CarbonConfigException("aggregation processor: file does not exist {0}".format(aggregation_rules_path)) RuleManager.read_from(aggregation_rules_path) def setupRewriterProcessor(root_service, settings): from carbon.rewrite import RewriteRuleManager rewrite_rules_path = settings["rewrite-rules"] RewriteRuleManager.read_from(rewrite_rules_path) def setupRelayProcessor(root_service, settings): from carbon.routers import DatapointRouter from carbon.client import CarbonClientManager router_class = DatapointRouter.plugins[settings.RELAY_METHOD] router = router_class(settings) state.client_manager = CarbonClientManager(router) state.client_manager.setServiceParent(root_service) for destination in util.parseDestinations(settings.DESTINATIONS): state.client_manager.startClient(destination) def setupWriterProcessor(root_service, settings): from carbon import cache # Register CacheFeedingProcessor from carbon.protocols import CacheManagementHandler from carbon.writer import WriterService from carbon import events factory = ServerFactory() factory.protocol = CacheManagementHandler service = TCPServer( settings.CACHE_QUERY_PORT, factory, interface=settings.CACHE_QUERY_INTERFACE) service.setServiceParent(root_service) writer_service = WriterService() writer_service.setServiceParent(root_service) if settings.USE_FLOW_CONTROL: events.cacheFull.addHandler(events.pauseReceivingMetrics) events.cacheSpaceAvailable.addHandler(events.resumeReceivingMetrics) carbon-1.0.2/lib/carbon/amqp_publisher.py0000644000000000000000000000754413131244450020322 0ustar rootroot00000000000000#!/usr/bin/env python """ Copyright 2009 Lucio Torre Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. Will publish metrics over AMQP """ import os import time from optparse import OptionParser from twisted.internet.defer import inlineCallbacks from twisted.internet import reactor, task from twisted.internet.protocol import ClientCreator from txamqp.protocol import AMQClient from txamqp.client import TwistedDelegate from txamqp.content import Content import txamqp.spec @inlineCallbacks def writeMetric(metric_path, value, timestamp, host, port, username, password, vhost, exchange, spec=None, channel_number=1, ssl=False): if not spec: spec = txamqp.spec.load(os.path.normpath( os.path.join(os.path.dirname(__file__), 'amqp0-8.xml'))) delegate = TwistedDelegate() connector = ClientCreator(reactor, AMQClient, delegate=delegate, vhost=vhost, spec=spec) if ssl: from twisted.internet.ssl import ClientContextFactory conn = yield connector.connectSSL(host, port, ClientContextFactory()) else: conn = yield connector.connectTCP(host, port) yield conn.authenticate(username, password) channel = yield conn.channel(channel_number) yield channel.channel_open() yield channel.exchange_declare(exchange=exchange, type="topic", durable=True, auto_delete=False) message = Content( "%f %d" % (value, timestamp) ) message["delivery mode"] = 2 channel.basic_publish(exchange=exchange, content=message, routing_key=metric_path) yield channel.channel_close() def main(): parser = OptionParser(usage="%prog [options] [timestamp]") parser.add_option("-t", "--host", dest="host", help="host name", metavar="HOST", default="localhost") parser.add_option("-p", "--port", dest="port", type=int, help="port number", metavar="PORT", default=5672) parser.add_option("-u", "--user", dest="username", help="username", metavar="USERNAME", default="guest") parser.add_option("-w", "--password", dest="password", help="password", metavar="PASSWORD", default="guest") parser.add_option("-v", "--vhost", dest="vhost", help="vhost", metavar="VHOST", default="/") parser.add_option("-s", "--ssl", dest="ssl", help="ssl", metavar="SSL", action="store_true", default=False) parser.add_option("-e", "--exchange", dest="exchange", help="exchange", metavar="EXCHANGE", default="graphite") (options, args) = parser.parse_args() try: metric_path = args[0] value = float(args[1]) if len(args) > 2: timestamp = int(args[2]) else: timestamp = time.time() except ValueError: parser.print_usage() raise SystemExit(1) d = writeMetric(metric_path, value, timestamp, options.host, options.port, options.username, options.password, vhost=options.vhost, exchange=options.exchange, ssl=options.ssl) d.addErrback(lambda f: f.printTraceback()) d.addBoth(lambda _: reactor.stop()) reactor.run() if __name__ == "__main__": main() carbon-1.0.2/lib/carbon/util.py0000644000000000000000000002143113131244455016260 0ustar rootroot00000000000000import sys import os import pwd import __builtin__ from os.path import abspath, basename, dirname try: from cStringIO import StringIO except ImportError: from StringIO import StringIO try: import cPickle as pickle USING_CPICKLE = True except ImportError: import pickle USING_CPICKLE = False from time import sleep, time from twisted.python.util import initgroups from twisted.scripts.twistd import runApp def dropprivs(user): uid, gid = pwd.getpwnam(user)[2:4] initgroups(uid, gid) os.setregid(gid, gid) os.setreuid(uid, uid) return (uid, gid) def run_twistd_plugin(filename): from carbon.conf import get_parser from twisted.scripts.twistd import ServerOptions bin_dir = dirname(abspath(filename)) root_dir = dirname(bin_dir) os.environ.setdefault('GRAPHITE_ROOT', root_dir) program = basename(filename).split('.')[0] # First, parse command line options as the legacy carbon scripts used to # do. parser = get_parser(program) (options, args) = parser.parse_args() if not args: parser.print_usage() return # This isn't as evil as you might think __builtin__.instance = options.instance __builtin__.program = program # Then forward applicable options to either twistd or to the plugin itself. twistd_options = ["--no_save"] # If no reactor was selected yet, try to use the epoll reactor if # available. try: from twisted.internet import epollreactor twistd_options.append("--reactor=epoll") except ImportError: pass if options.debug or options.nodaemon: twistd_options.extend(["--nodaemon"]) if options.profile: twistd_options.extend(["--profile", options.profile]) if options.profiler: twistd_options.extend(["--profiler", options.profiler]) if options.pidfile: twistd_options.extend(["--pidfile", options.pidfile]) if options.umask: twistd_options.extend(["--umask", options.umask]) if options.syslog: twistd_options.append("--syslog") # Now for the plugin-specific options. twistd_options.append(program) if options.debug: twistd_options.append("--debug") for option_name, option_value in vars(options).items(): if (option_value is not None and option_name not in ("debug", "profile", "profiler", "pidfile", "umask", "nodaemon", "syslog")): twistd_options.extend(["--%s" % option_name.replace("_", "-"), option_value]) # Finally, append extra args so that twistd has a chance to process them. twistd_options.extend(args) config = ServerOptions() config.parseOptions(twistd_options) runApp(config) def parseDestination(dest_string): s = dest_string.strip() bidx = s.rfind(']:') # find closing bracket and following colon. cidx = s.find(':') if s.startswith('[') and bidx is not None: server = s[1:bidx] port = s[bidx + 2:] elif cidx is not None: server = s[:cidx] port = s[cidx + 1:] else: raise ValueError("Invalid destination string \"%s\"" % dest_string) if ':' in port: port, _, instance = port.partition(':') else: instance = None return server, int(port), instance def parseDestinations(destination_strings): return [parseDestination(dest_string) for dest_string in destination_strings] # Yes this is duplicated in whisper. Yes, duplication is bad. # But the code is needed in both places and we do not want to create # a dependency on whisper especiaily as carbon moves toward being a more # generic storage service that can use various backends. UnitMultipliers = { 's' : 1, 'm' : 60, 'h' : 60 * 60, 'd' : 60 * 60 * 24, 'w' : 60 * 60 * 24 * 7, 'y' : 60 * 60 * 24 * 365, } def getUnitString(s): if s not in UnitMultipliers: raise ValueError("Invalid unit '%s'" % s) return s def parseRetentionDef(retentionDef): import re (precision, points) = retentionDef.strip().split(':') if precision.isdigit(): precision = int(precision) * UnitMultipliers[getUnitString('s')] else: precision_re = re.compile(r'^(\d+)([a-z]+)$') match = precision_re.match(precision) if match: precision = int(match.group(1)) * UnitMultipliers[getUnitString(match.group(2))] else: raise ValueError("Invalid precision specification '%s'" % precision) if points.isdigit(): points = int(points) else: points_re = re.compile(r'^(\d+)([a-z]+)$') match = points_re.match(points) if match: points = int(match.group(1)) * UnitMultipliers[getUnitString(match.group(2))] / precision else: raise ValueError("Invalid retention specification '%s'" % points) return (precision, points) # This whole song & dance is due to pickle being insecure # yet performance critical for carbon. We leave the insecure # mode (which is faster) as an option (USE_INSECURE_UNPICKLER). # The SafeUnpickler classes were largely derived from # http://nadiana.com/python-pickle-insecure if USING_CPICKLE: class SafeUnpickler(object): PICKLE_SAFE = { 'copy_reg': set(['_reconstructor']), '__builtin__': set(['object']), } @classmethod def find_class(cls, module, name): if not module in cls.PICKLE_SAFE: raise pickle.UnpicklingError('Attempting to unpickle unsafe module %s' % module) __import__(module) mod = sys.modules[module] if not name in cls.PICKLE_SAFE[module]: raise pickle.UnpicklingError('Attempting to unpickle unsafe class %s' % name) return getattr(mod, name) @classmethod def loads(cls, pickle_string): pickle_obj = pickle.Unpickler(StringIO(pickle_string)) pickle_obj.find_global = cls.find_class return pickle_obj.load() else: class SafeUnpickler(pickle.Unpickler): PICKLE_SAFE = { 'copy_reg': set(['_reconstructor']), '__builtin__': set(['object']), } def find_class(self, module, name): if not module in self.PICKLE_SAFE: raise pickle.UnpicklingError('Attempting to unpickle unsafe module %s' % module) __import__(module) mod = sys.modules[module] if not name in self.PICKLE_SAFE[module]: raise pickle.UnpicklingError('Attempting to unpickle unsafe class %s' % name) return getattr(mod, name) @classmethod def loads(cls, pickle_string): return cls(StringIO(pickle_string)).load() def get_unpickler(insecure=False): if insecure: return pickle else: return SafeUnpickler class TokenBucket(object): '''This is a basic tokenbucket rate limiter implementation for use in enforcing various configurable rate limits''' def __init__(self, capacity, fill_rate): '''Capacity is the total number of tokens the bucket can hold, fill rate is the rate in tokens (or fractional tokens) to be added to the bucket per second.''' self.capacity = float(capacity) self._tokens = float(capacity) self.fill_rate = float(fill_rate) self.timestamp = time() def drain(self, cost, blocking=False): '''Given a number of tokens (or fractions) drain will return True and drain the number of tokens from the bucket if the capacity allows, otherwise we return false and leave the contents of the bucket.''' if cost <= self.tokens: self._tokens -= cost return True else: if blocking: tokens_needed = cost - self._tokens seconds_per_token = 1 / self.fill_rate seconds_left = seconds_per_token * tokens_needed time_to_sleep = self.timestamp + seconds_left - time() if time_to_sleep > 0: sleep(time_to_sleep) self._tokens -= cost return True return False def setCapacityAndFillRate(self, new_capacity, new_fill_rate): delta = float(new_capacity) - self.capacity self.capacity = float(new_capacity) self.fill_rate = float(new_fill_rate) self._tokens = delta + self._tokens @property def tokens(self): '''The tokens property will return the current number of tokens in the bucket.''' if self._tokens < self.capacity: now = time() delta = self.fill_rate * (now - self.timestamp) self._tokens = min(self.capacity, self._tokens + delta) self.timestamp = now return self._tokens class PluginRegistrar(type): """Clever subclass detection hack that makes plugin loading trivial. To use this, define an abstract base class for plugin implementations that defines the plugin API. Give that base class a __metaclass__ of PluginRegistrar, and define a 'plugins = {}' class member. Subclasses defining a 'plugin_name' member will then appear in the plugins dict. """ def __init__(classObj, name, bases, members): super(PluginRegistrar, classObj).__init__(name, bases, members) if hasattr(classObj, 'plugin_name'): classObj.plugins[classObj.plugin_name] = classObj carbon-1.0.2/lib/twisted/0000755000000000000000000000000013131244747015153 5ustar rootroot00000000000000carbon-1.0.2/lib/twisted/plugins/0000755000000000000000000000000013131244747016634 5ustar rootroot00000000000000carbon-1.0.2/lib/twisted/plugins/carbon_aggregator_plugin.py0000644000000000000000000000125313131244455024227 0ustar rootroot00000000000000from zope.interface import implements from twisted.plugin import IPlugin from twisted.application.service import IServiceMaker from carbon import conf class CarbonAggregatorServiceMaker(object): implements(IServiceMaker, IPlugin) tapname = "carbon-aggregator" description = "Aggregate stats for graphite." options = conf.CarbonAggregatorOptions def makeService(self, options): """ Construct a C{carbon-aggregator} service. """ from carbon import service return service.createAggregatorService(options) # Now construct an object which *provides* the relevant interfaces serviceMaker = CarbonAggregatorServiceMaker() carbon-1.0.2/lib/twisted/plugins/carbon_relay_plugin.py0000644000000000000000000000121113131244455023213 0ustar rootroot00000000000000from zope.interface import implements from twisted.plugin import IPlugin from twisted.application.service import IServiceMaker from carbon import conf class CarbonRelayServiceMaker(object): implements(IServiceMaker, IPlugin) tapname = "carbon-relay" description = "Relay stats for graphite." options = conf.CarbonRelayOptions def makeService(self, options): """ Construct a C{carbon-relay} service. """ from carbon import service return service.createRelayService(options) # Now construct an object which *provides* the relevant interfaces serviceMaker = CarbonRelayServiceMaker() carbon-1.0.2/lib/twisted/plugins/carbon_cache_plugin.py0000644000000000000000000000121313131244455023144 0ustar rootroot00000000000000from zope.interface import implements from twisted.plugin import IPlugin from twisted.application.service import IServiceMaker from carbon import conf class CarbonCacheServiceMaker(object): implements(IServiceMaker, IPlugin) tapname = "carbon-cache" description = "Collect stats for graphite." options = conf.CarbonCacheOptions def makeService(self, options): """ Construct a C{carbon-cache} service. """ from carbon import service return service.createCacheService(options) # Now construct an object which *provides* the relevant interfaces serviceMaker = CarbonCacheServiceMaker() carbon-1.0.2/setup.cfg0000644000000000000000000000023513131244747014543 0ustar rootroot00000000000000[bdist_rpm] requires = python-twisted whisper post-install = distro/redhat/misc/postinstall [install] prefix = /opt/graphite install-lib = %(prefix)s/lib carbon-1.0.2/MANIFEST.in0000644000000000000000000000021513131244450014445 0ustar rootroot00000000000000recursive-include conf/ * recursive-include distro/ * exclude conf/*.conf include LICENSE include lib/carbon/amqp0-8.xml include MANIFEST.in