pax_global_header 0000666 0000000 0000000 00000000064 12474406655 0014527 g ustar 00root root 0000000 0000000 52 comment=0aced26eb050ceb98ee9d5d6cdca8db448666986
scales-1.0.9/ 0000775 0000000 0000000 00000000000 12474406655 0013010 5 ustar 00root root 0000000 0000000 scales-1.0.9/.gitignore 0000664 0000000 0000000 00000000032 12474406655 0014773 0 ustar 00root root 0000000 0000000 *.egg/
build/
dist/
venv/
scales-1.0.9/.travis.yml 0000664 0000000 0000000 00000000274 12474406655 0015124 0 ustar 00root root 0000000 0000000 language: python
python:
- "2.7"
env:
- TOX_ENV=py27
- TOX_ENV=pypy
- TOX_ENV=py32
- TOX_ENV=py33
- TOX_ENV=py34
install:
- pip install tox
script:
- tox -e $TOX_ENV
scales-1.0.9/AUTHORS 0000664 0000000 0000000 00000000057 12474406655 0014062 0 ustar 00root root 0000000 0000000 Greplin
Joe Shaw (@joeshaw)
Adam (@NorthIsUp)
scales-1.0.9/LICENSE 0000664 0000000 0000000 00000026136 12474406655 0014025 0 ustar 00root root 0000000 0000000
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
scales-1.0.9/README.md 0000664 0000000 0000000 00000015136 12474406655 0014275 0 ustar 00root root 0000000 0000000 scales - Metrics for Python
===========================
[](https://travis-ci.org/Cue/scales)
Tracks server state and statistics, allowing you to see what your server is
doing. It can also send metrics to Graphite for graphing or to a file for crash forensics.
scales is inspired by the fantastic [metrics](https://github.com/codahale/metrics) library, though it is by
no means a port.
This is a brand new release - issue reports and pull requests are very much appreciated!
### Installation
You can get a release from PyPI:
pip install scales
Or you can get it from GitHub:
git clone https://github.com/Cue/scales
cd scales
python setup.py install
The HTTP statistics viewer in scales requires one of the following web frameworks:
[Flask](http://flask.pocoo.org/)
[Tornado](http://www.tornadoweb.org/)
[Twisted](http://twistedmatrix.com/trac/)
If you aren't sure, go with Flask; it's compatible with most every other event
loop. You can get it with `pip install flask`.
Scales is tested with Python 2.7 and 3.3. For some reason it does not
work with PyPy; pull requests for this are welcome, if you can figure
out what's up.
### How to use it
Getting started and adding stats only takes a few lines of code:
```python
from greplin import scales
STATS = scales.collection('/web',
scales.IntStat('errors'),
scales.IntStat('success'))
# In a request handler
STATS.success += 1
```
This code will collect two integer stats, which is nice, but what you really
want to do is *look* at those stats, to get insight into what your server is
doing. There are two main ways of doing this: the HTTP server and Graphite
logging.
The HTTP server is the simplest way to get stats out of a running server. The
easiest way, if you have Flask installed, is to do this:
```python
import greplin.scales.flaskhandler as statserver
statserver.serveInBackground(8765, serverName='something-server-42')
```
This will spawn a background thread that will listen on port 8765, and serve up
a very convenient view of all your stats. To see it, go to
http://localhost:8765/status/
You can also get the stats in JSON by appending `?format=json` to the
URL. `?format=prettyjson` is the same thing, but pretty-printed.
The HTTP server is good for doing spot checks on the internals of running
servers, but what about continuous monitoring? How do you generate graphs of
stats over time? This is where [Graphite](http://graphite.wikidot.com/) comes
in. Graphite is a server for collecting stats and graphing them, and scales has
easy support for using it. Again, this is handled in a background thread:
```python
graphitePeriodicPusher = graphite.GraphitePeriodicPusher('graphite-collector-hostname', 2003, 'my.server.prefix.')
graphitePeriodicPusher.allow("*") # Logs everything to graphite
graphitePeriodicPusher.start()
```
That's it! Numeric stats will now be pushed to Graphite every minute.
Note that, by default, if you don't use `allow`, nothing is logged to graphite.
You can also exclude stats from graphite logging with the `forbid(prefix)` method
of the `GraphitePeriodicPusher` class.
#### Timing sections of code
To better understand the performance of certain critical sections of your code,
scales lets you collect timing information:
```python
from greplin import scales
STATS = scales.collection('/web',
scales.IntStat('errors'),
scales.IntStat('success'),
scales.PmfStat('latency'))
# In a request handler
with STATS.latency.time():
do_something_expensive()
```
This will collect statistics on the running times of that section of code: mean
time, median, standard deviation, and several percentiles to help you locate
outlier times. This happens in pretty small constant memory, so don't worry
about the cost; time anything you like.
You can gather this same kind of sample statistics about any quantity. Just make
a `PmfStat` and assign new values to it:
```python
for person in people:
person.perturb(42)
STATS.wistfulness = person.getFeelings('wistfulness')
```
#### Metering Rates
Scales can track 1/5/15 minute averages with `MeterStat`:
```python
from greplin.scales.meter import MeterStat
STATS = scales.collection('/web', MeterStat('hits'))
def handleRequest(..):
STATS.hits.mark() # or .mark(NUMBER), or STATS.hits = NUMBER
```
#### Class Stats
While global stats are easy to use, sometimes making stats class-based makes
more sense. This is supported; just make sure to give each instance of the class
a unique identifier with `scales.init`.
```python
class Handler(object):
requests = scales.IntStat('requests')
latency = scales.PmfStat('latency')
byPath = scales.IntDictStat('byPath')
def __init__(self):
scales.init(self, '/handler')
def handleRequest(self, request):
with self.latency.time():
doSomething()
self.requests += 1
self.byPath[request.path] += 1
```
#### Gauges
Simple lambdas can be used to generate stat values.
```python
STATS = scales.collection(scales.Stat('currentTime', lambda: time.time())
```
Of course this works with arbitrary function objects, so the example above could
also be written:
```python
STATS = scales.collection(scales.Stat('currentTime', time.time)
```
#### Hierarchical Stats + Aggregation
Stats can inherit their path from the object that creates them, and (non-gauge) stats can be aggregated up to ancestors.
```python
class Processor(object):
"""Example processing management object."""
threadStates = scales.HistogramAggregationStat('state')
finished = scales.SumAggregationStat('finished')
def __init__(self):
scales.init(self, '/processor')
self.threads = 0
def createThread(self):
threadId = self.threads
self.threads += 1
SomeThread(threadId).start()
class SomeThread(object):
"""Stub of a processing thread object."""
state = scales.Stat('state')
finished = scales.IntStat('finished')
def __init__(self, threadId):
scales.initChild(self, 'thread-%d' % threadId)
def processingLoop(self):
while True:
self.state = 'waitingForTask'
getTask()
self.state = 'performingTask'
doTask()
self.finished += 1
```
This will result in a stat at the path `/processor/finished` which counts the
total of the `finished` stats in each `SomeThread` object, as well as per-object
stats with paths like `/processor/thread-0/finished`. There will also be stats
like `/processor/state/waitingForTask` which aggregates the number of threads in
the `waitingForTask` state.
### Authors
[Greplin, Inc.](http://www.greplin.com)
### License
Copyright 2011 The scales Authors.
Published under The Apache License, see LICENSE
scales-1.0.9/requirements-optional.txt 0000664 0000000 0000000 00000000071 12474406655 0020115 0 ustar 00root root 0000000 0000000 Flask==0.9
Twisted==10.2.0
bottle==0.11.6
tornado==2.2.1
scales-1.0.9/requirements.txt 0000664 0000000 0000000 00000000013 12474406655 0016266 0 ustar 00root root 0000000 0000000 six==1.5.2
scales-1.0.9/setup.py 0000775 0000000 0000000 00000003254 12474406655 0014531 0 ustar 00root root 0000000 0000000 #!/usr/bin/env python
# Copyright 2011 The scales Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Setup script for scales."""
try:
from setuptools import setup
except ImportError:
from distutils.core import setup
setup(name='scales',
version='1.0.9',
description='Stats for Python processes',
license='Apache',
author='Greplin, Inc.',
author_email='opensource@greplin.com',
url='https://www.github.com/Cue/scales',
install_requires=[
'six',
],
package_dir={'':'src'},
packages=[
'greplin',
'greplin.scales',
],
namespace_packages=[
'greplin',
],
tests_require=[
'nose',
],
test_suite = 'nose.collector',
zip_safe = True,
classifiers=[
'Programming Language :: Python',
'Programming Language :: Python :: 2',
'Programming Language :: Python :: 2.6',
'Programming Language :: Python :: 2.7',
'Programming Language :: Python :: 3',
'Programming Language :: Python :: 3.2',
'Programming Language :: Python :: 3.3',
'Programming Language :: Python :: 3.4',
],
)
scales-1.0.9/src/ 0000775 0000000 0000000 00000000000 12474406655 0013577 5 ustar 00root root 0000000 0000000 scales-1.0.9/src/.gitignore 0000664 0000000 0000000 00000000021 12474406655 0015560 0 ustar 00root root 0000000 0000000 scales.egg-info/
scales-1.0.9/src/greplin/ 0000775 0000000 0000000 00000000000 12474406655 0015237 5 ustar 00root root 0000000 0000000 scales-1.0.9/src/greplin/.gitignore 0000664 0000000 0000000 00000000012 12474406655 0017220 0 ustar 00root root 0000000 0000000 *.py[cod]
scales-1.0.9/src/greplin/__init__.py 0000664 0000000 0000000 00000001251 12474406655 0017347 0 ustar 00root root 0000000 0000000 # Copyright 2011 The scales Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""The Greplin root package."""
import pkg_resources
pkg_resources.declare_namespace('greplin')
scales-1.0.9/src/greplin/scales/ 0000775 0000000 0000000 00000000000 12474406655 0016511 5 ustar 00root root 0000000 0000000 scales-1.0.9/src/greplin/scales/.gitignore 0000664 0000000 0000000 00000000020 12474406655 0020471 0 ustar 00root root 0000000 0000000 *.bak
*.py[cod]
scales-1.0.9/src/greplin/scales/__init__.py 0000664 0000000 0000000 00000044630 12474406655 0020631 0 ustar 00root root 0000000 0000000 # Copyright 2011 The scales Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Classes for tracking system statistics."""
import collections
import inspect
import itertools
import gc
import six
import unittest
import json
import time
from contextlib import contextmanager
try:
from UserDict import UserDict
except ImportError:
from collections import UserDict
from greplin.scales.samplestats import ExponentiallyDecayingReservoir
ID_KEY = '__STATS__id'
NEXT_ID = itertools.count()
def statsId(obj):
"""Gets a unique ID for each object."""
if hasattr(obj, ID_KEY):
return getattr(obj, ID_KEY)
newId = next(NEXT_ID)
setattr(obj, ID_KEY, newId)
return newId
def init(obj, context=None):
"""Initializes stats collection in the given object at the given context path. Defaults to root-level stats."""
return _Stats.init(obj, context)
def initChild(obj, name):
"""Initializes stats collection in the given object as a child of the object that created it."""
return _Stats.initChild(obj, name, '')
def initChildOfType(obj, name, subContext=None):
"""Initializes stats collection in the given object as a child of the object that created it."""
return _Stats.initChild(obj, name, subContext)
def reset():
"""Resets stats state - should only be called from tests."""
_Stats.reset()
def getStats():
"""Gets the stats dict."""
return _Stats.stats
def setCollapsed(path):
"""Sets a stat as collapsed."""
return _Stats.setCollapsed(path)
class StatContainer(UserDict):
"""Container of stats. Also contains configuration of how the container should be displayed."""
def __init__(self):
UserDict.__init__(self)
self.__collapsed = False
def setCollapsed(self, isCollapsed):
"""Sets whether the stat container displays as collapsed by default."""
self.__collapsed = isCollapsed
def isCollapsed(self):
"""Returns whether the stat container displays as collapsed by default."""
return self.__collapsed
class _Stats(object):
"""Static class for stats aggregation."""
stats = StatContainer()
parentMap = {}
containerMap = {}
subId = 0
@classmethod
def reset(cls):
"""Resets the static state. Should only be called by tests."""
cls.stats = StatContainer()
cls.parentMap = {}
cls.containerMap = {}
cls.subId = 0
for stat in gc.get_objects():
if isinstance(stat, Stat):
stat._aggregators = {}
@classmethod
def init(cls, obj, context):
"""Implementation of init."""
addr = statsId(obj)
if addr not in cls.containerMap:
cls.containerMap[addr] = cls.__getStatContainer(context)
return cls.containerMap[addr]
@classmethod
def __getSelf(cls, frame):
"""Extracts the self object out of a stack frame."""
return inspect.getargvalues(frame).locals.get('self', None)
@classmethod
def initChild(cls, obj, name, subContext, parent = None):
"""Implementation of initChild."""
addr = statsId(obj)
if addr not in cls.containerMap:
if not parent:
# Find out the parent of the calling object by going back through the call stack until a self != this.
f = inspect.currentframe()
while not cls.__getSelf(f):
f = f.f_back
this = cls.__getSelf(f)
f = f.f_back
while cls.__getSelf(f) == this or not cls.__getSelf(f):
f = f.f_back
parent = cls.__getSelf(f)
# Default subcontext to an autoincrementing ID.
if subContext is None:
cls.subId += 1
subContext = cls.subId
if subContext is not '':
path = '%s/%s' % (name, subContext)
else:
path = name
# Now that we have the name, create an entry for this object.
cls.parentMap[addr] = parent
container = cls.getContainerForObject(statsId(parent))
if not container and isinstance(parent, unittest.TestCase):
cls.init(parent, '/test-case')
cls.containerMap[addr] = cls.__getStatContainer(path, cls.getContainerForObject(statsId(parent)))
return cls.containerMap[addr]
@classmethod
def __getStatContainer(cls, context, parent=None):
"""Get the stat container for the given context under the given parent."""
container = parent
if container is None:
container = cls.stats
if context is not None:
context = str(context).lstrip('/')
for key in context.split('/'):
container.setdefault(key, StatContainer())
container = container[key]
return container
@classmethod
def getContainerForObject(cls, instanceId):
"""Get the stat container for the given object."""
return cls.containerMap.get(instanceId, None)
@classmethod
def getStat(cls, obj, name):
"""Gets the stat for the given object with the given name, or None if no such stat exists."""
objClass = type(obj)
for theClass in objClass.__mro__:
if theClass == object:
break
for value in theClass.__dict__.values():
if isinstance(value, Stat) and value.getName() == name:
return value
@classmethod
def getAggregator(cls, instanceId, name):
"""Gets the aggregate stat for the given stat."""
parent = cls.parentMap.get(instanceId)
while parent:
stat = cls.getStat(parent, name)
if stat:
return stat, parent
parent = cls.parentMap.get(statsId(parent))
@classmethod
def setCollapsed(cls, path):
"""Collapses a stat."""
cls.__getStatContainer(path).setCollapsed(True)
class Stat(object):
"""Basic stat value class."""
def __init__(self, name, value='', logger = None):
self.__name = name
self.__default = value
self._logger = logger
self._aggregators = {}
def getName(self):
"""Gets the name of the stat."""
return self.__name
def __get__(self, instance, _):
container = _Stats.getContainerForObject(statsId(instance))
if self.__name not in container:
container[self.__name] = self._getDefault(instance)
return container[self.__name]
def _getInit(self):
"""Internal method to return the initial value for a stat that is never set."""
return self.__default
def _getDefault(self, _):
"""Internal method to return the default for a stat that hasn't stored a value yet."""
return self.__default
def _aggregate(self, instanceId, container, value, subKey = None):
"""Performs stat aggregation."""
# Get the aggregator.
if instanceId not in self._aggregators:
self._aggregators[instanceId] = _Stats.getAggregator(instanceId, self.__name)
aggregator = self._aggregators[instanceId]
# If we are aggregating, get the old value.
if aggregator:
oldValue = container.get(self.__name)
if subKey:
oldValue = oldValue[subKey]
aggregator[0].update(aggregator[1], oldValue, value, subKey)
else:
aggregator[0].update(aggregator[1], oldValue, value)
def __set__(self, instance, value):
instanceId = statsId(instance)
container = _Stats.getContainerForObject(instanceId)
self._aggregate(instanceId, container, value)
container[self.__name] = value
if self._logger:
self._logger('Updated stat "%s" with value: %s' % (self.__name, value))
def updateItem(self, instance, subKey, value):
"""Updates a child value. Must be called before the update has actually occurred."""
instanceId = statsId(instance)
container = _Stats.getContainerForObject(instanceId)
self._aggregate(instanceId, container, value, subKey)
def logger(self, logger):
"""Log textual updates about this value to the given function."""
self._logger = logger
return self
class IntStat(Stat):
"""Integer stat value class."""
def __init__(self, name, value=0):
Stat.__init__(self, name, value)
class DoubleStat(Stat):
"""Double stat value class."""
def __init__(self, name, value=0.0):
Stat.__init__(self, name, value)
class IntDict(UserDict):
"""Dictionary of integers."""
def __init__(self, parent, instance, autoDelete=False):
UserDict.__init__(self)
self.parent = parent
self.instance = instance
self.autoDelete = autoDelete
def __getitem__(self, item):
if item in self:
return UserDict.__getitem__(self, item)
else:
return 0
def __setitem__(self, key, value):
self.parent.updateItem(self.instance, key, value)
if value or not self.autoDelete:
UserDict.__setitem__(self, key, value)
elif UserDict.__contains__(self, key):
UserDict.__delitem__(self, key)
class IntDictStat(Stat):
"""Dictionary stat value class."""
def __init__(self, name, autoDelete = False):
Stat.__init__(self, name)
self.autoDelete = autoDelete
def _getDefault(self, instance):
return IntDict(self, instance, self.autoDelete)
class StringDict(UserDict):
"""Dictionary of strings."""
def __init__(self, parent, instance):
UserDict.__init__(self)
self.parent = parent
self.instance = instance
def __getitem__(self, item):
if item in self:
return UserDict.__getitem__(self, item)
else:
return ''
def __setitem__(self, key, value):
self.parent.updateItem(self.instance, key, value)
UserDict.__setitem__(self, key, value)
class StringDictStat(Stat):
"""Dictionary stat value class."""
def _getDefault(self, instance):
return StringDict(self, instance)
class AggregationStat(Stat):
"""A stat that aggregates child stats."""
def __init__(self, name, value):
Stat.__init__(self, name, value)
def update(self, instance, oldValue, newValue):
"""Updates the aggregate based on a change in the child value."""
raise NotImplementedError
class ChildAggregationStat(Stat):
"""A stat that aggregates values within child stats."""
def __init__(self, name, value):
Stat.__init__(self, name, value)
def update(self, instance, oldValue, newValue, subKey):
"""Updates the aggregate based on a change in the child sub-value."""
class SumAggregationStat(AggregationStat):
"""A stat that aggregates child stats in to a sum."""
def __init__(self, name):
AggregationStat.__init__(self, name, 0)
def update(self, instance, oldValue, newValue):
"""Updates the aggregate based on a change in the child value."""
self.__set__(instance,
self.__get__(instance, None) + newValue - (oldValue or 0))
class HistogramAggregationStat(AggregationStat):
"""A stat that aggregates child stats in to a histogram counting each unique child value."""
def __init__(self, name, autoDelete = False):
AggregationStat.__init__(self, name, None)
self.autoDelete = autoDelete
def _getDefault(self, _):
return collections.defaultdict(int)
def update(self, instance, oldValue, newValue):
"""Updates the aggregate based on a change in the child value."""
histogram = self.__get__(instance, None)
if oldValue:
histogram[oldValue] -= 1
if self.autoDelete and histogram[oldValue] == 0:
del histogram[oldValue]
if newValue:
histogram[newValue] += 1
class IntDictSumAggregationStat(ChildAggregationStat):
"""A stat that aggregates child int dict stats in to a int dict summing child values."""
def __init__(self, name):
ChildAggregationStat.__init__(self, name, None)
def _getDefault(self, _):
return collections.defaultdict(int)
def update(self, instance, oldValue, newValue, subKey):
"""Updates the aggregate based on a change in the child value."""
histogram = self.__get__(instance, None)
histogram[subKey] += newValue - oldValue
class PmfStatDict(UserDict):
"""Ugly hack defaultdict-like thing."""
class TimeManager(object):
"""Context manager for timing."""
def __init__(self, container):
self.container = container
self.msg99 = None
self.start = None
self.__discard = False
def __enter__(self):
self.start = time.time()
return self
def __exit__(self, *_):
if not self.__discard:
latency = time.time() - self.start
self.container.addValue(latency)
if self.container.percentile99 is not None and latency >= self.container.percentile99:
if self.msg99 is not None:
logger, msg, args = self.msg99
logger.warn(msg, *args)
def warn99(self, logger, msg, *args):
"""If this time through the timed section of code takes longer
than the 99th percentile time, then this will call
logger.warn(msg, *args) at the end of the section."""
self.msg99 = (logger, msg, args)
def discard(self):
"""Discard this sample."""
self.__discard = True
def __init__(self, sample = None):
UserDict.__init__(self)
if sample:
self.__sample = sample
else:
self.__sample = ExponentiallyDecayingReservoir()
self.__timestamp = 0
self.percentile99 = None
self['count'] = 0
def __getitem__(self, item):
if item in self:
return UserDict.__getitem__(self, item)
else:
return 0.0
def addValue(self, value):
"""Updates the dictionary."""
self['count'] += 1
self.__sample.update(value)
if time.time() > self.__timestamp + 20 and len(self.__sample) > 1:
self.__timestamp = time.time()
self['min'] = self.__sample.min
self['max'] = self.__sample.max
self['mean'] = self.__sample.mean
self['stddev'] = self.__sample.stddev
percentiles = self.__sample.percentiles([0.5, 0.75, 0.95, 0.98, 0.99, 0.999])
self['median'] = percentiles[0]
self['75percentile'] = percentiles[1]
self['95percentile'] = percentiles[2]
self['98percentile'] = percentiles[3]
self['99percentile'] = percentiles[4]
self.percentile99 = percentiles[4]
self['999percentile'] = percentiles[5]
def time(self):
"""Measure the time this section of code takes. For use in with statements."""
return self.TimeManager(self)
class PmfStat(Stat):
"""A stat that stores min, max, mean, standard deviation, and some
percentiles for arbitrary floating-point data. This is potentially a
bit expensive, so its child values are only updated once every
twenty seconds."""
def __init__(self, name, _=None):
Stat.__init__(self, name, None)
def _getDefault(self, _):
return PmfStatDict()
def __set__(self, instance, value):
self.__get__(instance, None).addValue(value)
class NamedPmfDict(UserDict):
"""Dictionary of strings."""
def __init__(self):
UserDict.__init__(self)
def __getitem__(self, item):
if item not in self:
value = PmfStatDict()
UserDict.__setitem__(self, item, value)
return UserDict.__getitem__(self, item)
def __setitem__(self, key, value):
self[key].addValue(value)
class NamedPmfDictStat(Stat):
"""Dictionary stat value class. Not compatible with aggregation at this time."""
def _getDefault(self, _):
return NamedPmfDict()
class StateTimeStatDict(UserDict):
"""Special dict that tracks time spent in current state."""
def __init__(self, parent, instance):
UserDict.__init__(self)
self.parent = parent
self.instance = instance
def __getitem__(self, item):
if item in self:
value = UserDict.__getitem__(self, item)
else:
value = 0.0
if item is not None and item == self.parent.state:
return value + (time.time() - self.parent.time)
else:
return value
def incr(self, item, value):
"""Increment a key by the given amount."""
if item in self:
old = UserDict.__getitem__(self, item)
else:
old = 0.0
self[item] = old + value
@contextmanager
def acquire(self):
"""Assuming that the current state is an integer (it defaults to
zero), increment it for the duration of the body of the with
statement."""
self.parent.__set__(self.instance, self.parent.state + 1)
try:
yield
finally:
self.parent.__set__(self.instance, self.parent.state - 1)
class StateTimeStat(Stat):
"""A stat that stores the amount of time spent in each of a finite
number of discrete states. This can be used to track things like
number of concurrent users, connection pool usage, how much time a
finite state machine spends in each state, or anything like that. To
use it, just set the stat to the new state every time the state
changes."""
def __init__(self, name, _=None):
Stat.__init__(self, name, None)
self.state = 0
self.time = None
def _getDefault(self, instance):
return StateTimeStatDict(self, instance)
def __set__(self, instance, value):
if value == self.state:
return
histogram = self.__get__(instance, None)
now = time.time()
if self.time is not None:
histogram.incr(self.state, now - self.time)
self.state = value
self.time = now
def filterCollapsedItems(data):
"""Return a filtered iteration over a list of items."""
return ((key, value)\
for key, value in six.iteritems(data) \
if not (isinstance(value, StatContainer) and value.isCollapsed()))
class StatContainerEncoder(json.JSONEncoder):
"""JSON encoding that takes in to account collapsed stat containers and stat functions."""
# pylint: disable=E0202
def default(self, obj):
if isinstance(obj, UserDict):
return dict(filterCollapsedItems(obj.data))
elif hasattr(obj, '__call__'):
return obj()
else:
return json.JSONEncoder.default(self, obj)
def dumpStatsTo(filename):
"""Writes the stats dict to filanem"""
with open(filename, 'w') as f:
latest = getStats()
latest['last-updated'] = time.time()
json.dump(getStats(), f, cls=StatContainerEncoder)
def collection(path, *stats):
"""Creates a named stats collection object."""
def initMethod(self):
"""Init method for the underlying stat object's class."""
init(self, path)
attributes = {'__init__': initMethod}
for stat in stats:
attributes[stat.getName()] = stat
newClass = type('Stats:%s' % path, (object,), attributes)
instance = newClass()
for stat in stats:
default = stat._getInit() # Consider this method package-protected. # pylint: disable=W0212
if default:
setattr(instance, stat.getName(), default)
return instance
scales-1.0.9/src/greplin/scales/aggregation.py 0000664 0000000 0000000 00000024423 12474406655 0021357 0 ustar 00root root 0000000 0000000 # Copyright 2011 The scales Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Utilities for multi-server stat aggregation."""
from collections import defaultdict
import datetime
import json
import os
import re
import six
class DefaultFormat(object):
"""The default format"""
def getCount(self, data):
"""Get the count"""
return data['count']
def getValue(self, data):
"""Get the value"""
return data['average']
class DirectFormat(object):
"""The direct format (pointed straight at the field we want)"""
def getCount(self, _):
"The count"
return 1
def getValue(self, data):
"The value"
return data
class TimerFormat(object):
"""A Yammer Metrics Timer datum"""
def getCount(self, data):
"""Get the count"""
assert data['type'] == "timer"
return data['rate']['count']
def getValue(self, data):
"""Get the value"""
assert data['type'] == "timer"
return data['duration']['median']
class TimerMeanFormat(object):
"""A Yammer Metrics Timer datum"""
def getCount(self, data):
"""Get the count"""
assert data['type'] == "timer"
return data['rate']['count']
def getValue(self, data):
"""Get the value"""
assert data['type'] == "timer"
return data['duration']['mean']
class CounterFormat(object):
"""A Yammer Metrics Counter datum"""
def getCount(self, data):
"""Get the count"""
assert data['type'] == "counter"
return data['count']
def getValue(self, data):
"""Get the value of a count (just the count)"""
assert data['type'] == "counter"
return data['count']
class MeterFormat(object):
"""A Yammer Metrics Meter datum"""
def getCount(self, data):
"""Get the count"""
assert data['type'] == "meter"
return data['count']
def getValue(self, data):
"""Get the value"""
assert data['type'] == "meter"
return data['mean']
class GaugeFormat(object):
"""A Yammer Metrics Gauge datum"""
def getValue(self, data):
"""Get the value"""
assert data['type'] == 'gauge'
return data['value']
class DataFormats(object):
"""Different ways data can be formatted"""
DEFAULT = DefaultFormat()
DIRECT = DirectFormat()
TIMER = TimerFormat()
TIMER_MEAN = TimerMeanFormat()
COUNTER = CounterFormat()
METER = MeterFormat()
GAUGE = GaugeFormat()
class Aggregator(object):
"""Base class for stat aggregators."""
def __init__(self, name = None, dataFormat = DataFormats.DEFAULT):
self.name = name or self.DEFAULT_NAME
self._dataFormat = dataFormat
def clone(self):
"""Creates a clone of this aggregator."""
return type(self)(name = self.name, dataFormat = self._dataFormat)
class Average(Aggregator):
"""Aggregate average values of a stat."""
DEFAULT_NAME = "average"
_count = 0
_total = 0
def addValue(self, _, value):
"""Adds a value from the given source."""
if value is not None:
try:
self._count += self._dataFormat.getCount(value)
self._total += self._dataFormat.getValue(value) * self._dataFormat.getCount(value)
except TypeError:
self._count += 1
self._total += value
def result(self):
"""Formats the result."""
return {
"count": self._count,
"total": self._total,
"average": float(self._total) / self._count if self._count else 0
}
class Sum(Aggregator):
"""Aggregate sum of a stat."""
DEFAULT_NAME = "sum"
total = 0
def addValue(self, _, value):
"""Adds a value from the given source."""
self.total += self._dataFormat.getValue(value)
def result(self):
"""Formats the result."""
return self.total
def _humanSortKey(s):
"""Sort strings with numbers in a way that makes sense to humans (e.g., 5 < 20)"""
if isinstance(s, str):
return [w.isdigit() and int(w) or w for w in re.split(r'(\d+)', s)]
else:
return s
class InverseMap(Aggregator):
"""Aggregate sum of a stat."""
DEFAULT_NAME = "inverse"
def __init__(self, *args, **kw):
Aggregator.__init__(self, *args, **kw)
self.__result = defaultdict(list)
def addValue(self, source, data):
"""Adds a value from the given source."""
self.__result[self._dataFormat.getValue(data)].append(source)
def result(self):
"""Formats the result."""
for value in six.itervalues(self.__result):
value.sort(key = _humanSortKey)
return self.__result
class Sorted(Aggregator):
"""Aggregate sorted version of a stat."""
DEFAULT_NAME = "sorted"
# pylint: disable=W0622
def __init__(self, cmp=None, key=None, reverse=False, *args, **kw):
Aggregator.__init__(self, *args, **kw)
self.__result = []
self.__cmp = cmp
self.__key = key
self.__reverse = reverse
def addValue(self, source, data):
"""Adds a value from the given source."""
self.__result.append((source, self._dataFormat.getValue(data)))
def result(self):
"""Formats the result."""
self.__result.sort(cmp = self.__cmp, key = self.__key, reverse = self.__reverse)
return self.__result
def clone(self):
"""Creates a clone of this aggregator."""
return type(self)(self.__cmp, self.__key, self.__reverse, name = self.name, dataFormat = self._dataFormat)
class Highlight(Aggregator):
"""Picks a single value across all sources and highlights it."""
value = None
source = None
def __init__(self, name, fn, dataFormat = DataFormats.DEFAULT):
"""Creates a highlight aggregator - this will pick one of the values to highlight.
Args:
name: The name of this aggregator.
fn: Callable that takes (a, b) and returns True if b should be selected as the highlight, where as is the
previous chosen highlight.
"""
Aggregator.__init__(self, name)
self.fn = fn
def addValue(self, source, value):
"""Adds a value from the given source."""
if self.source is None or self.fn(self.value, value):
self.value = value
self.source = source
def result(self):
"""Formats the result."""
return {
"source": self.source,
"value": self.value
}
def clone(self):
"""Creates a clone of this aggregator."""
return Highlight(self.name, self.fn)
class Aggregation(object):
"""Aggregates stat dictionaries."""
def __init__(self, aggregators):
"""Creates a stat aggregation object from a hierarchical dict representation:
agg = aggregation.Aggregation({
'http_hits' : {
'200': [aggregation.Sum(dataFormat=aggregation.DataFormats.DIRECT)],
'404': [aggregation.Sum(dataFormat=aggregation.DataFormats.DIRECT)]
}})
Also supports regular expression in aggregations keys:
agg = aggregation.Aggregation({
'http_hits' : {
('ok', re.compile("[1-3][0-9][0-9]")): [aggregation.Sum(dataFormat=aggregation.DataFormats.DIRECT)],
('err', re.compile("[4-5][0-9][0-9]")): [aggregation.Sum(dataFormat=aggregation.DataFormats.DIRECT)]
}})
"""
self._aggregators = aggregators
self._result = {}
def addSource(self, source, data):
"""Adds the given source's stats."""
self._aggregate(source, self._aggregators, data, self._result)
def addJsonDirectory(self, directory, test=None):
"""Adds data from json files in the given directory."""
for filename in os.listdir(directory):
try:
fullPath = os.path.join(directory, filename)
if not test or test(filename, fullPath):
with open(fullPath) as f:
jsonData = json.load(f)
name, _ = os.path.splitext(filename)
self.addSource(name, jsonData)
except ValueError:
continue
def _clone(self, aggregators):
"""Clones a list of aggregators."""
return [x.clone() for x in aggregators]
def _aggregate(self, source, aggregators, data, result):
"""Performs aggregation at a specific node in the data/aggregator tree."""
if data is None:
return
if hasattr(aggregators, 'items'):
# Keep walking the tree.
for key, value in six.iteritems(aggregators):
if isinstance(key, tuple):
key, regex = key
for dataKey, dataValue in six.iteritems(data):
if regex.match(dataKey):
result.setdefault(key, {})
self._aggregate(source, value, dataValue, result[key])
else:
if key == '*':
for dataKey, dataValue in six.iteritems(data):
result.setdefault(dataKey, {})
self._aggregate(source, value, dataValue, result[dataKey])
elif key in data:
result.setdefault(key, {})
self._aggregate(source, value, data[key], result[key])
else:
# We found a leaf.
for aggregator in aggregators:
if aggregator.name not in result:
result[aggregator.name] = aggregator.clone()
result[aggregator.name].addValue(source, data)
def result(self, root = None):
"""Formats the result."""
root = root or self._result
if isinstance(root, Aggregator):
return root.result()
else:
result = {}
for key, value in six.iteritems(root):
if value:
result[key] = self.result(value)
return result
class FileInclusionTest(object):
"""Object to help create good file inclusion tests."""
def __init__(self, ignoreByName = None, maxAge = None):
self.ignoreByName = ignoreByName
self.maxAge = maxAge
def __call__(self, _, fullPath):
"""Tests if a file should be included in the aggregation."""
try:
# Ignore incoming files
if self.ignoreByName and self.ignoreByName(fullPath):
return False
# Ignore old, dead files.
if self.maxAge:
stat = os.stat(fullPath)
age = datetime.datetime.now() - datetime.datetime.fromtimestamp(stat.st_mtime)
if age > self.maxAge:
return False
return True
except: # pylint: disable=W0702
return False
scales-1.0.9/src/greplin/scales/aggregation_test.py 0000664 0000000 0000000 00000003125 12474406655 0022412 0 ustar 00root root 0000000 0000000 # Copyright 2011 The scales Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Stat aggregation tests."""
import re
from greplin.scales import aggregation
import unittest
class AggregationTest(unittest.TestCase):
"""Test cases for stat aggregation classes."""
def testNoData(self):
"This used to infinite loop."
agg = aggregation.Aggregation({
'a': {
'*': [aggregation.Sum()]
}
})
agg.addSource('source1', {'a': {}})
agg.result()
def testRegex(self):
"Test regexes in aggregation keys"
agg = aggregation.Aggregation({
'a' : {
('success', re.compile("[1-3][0-9][0-9]")): [aggregation.Sum(dataFormat = aggregation.DataFormats.DIRECT)],
('error', re.compile("[4-5][0-9][0-9]")): [aggregation.Sum(dataFormat = aggregation.DataFormats.DIRECT)]
}})
agg.addSource('source1', {'a': {'200': 10, '302': 10, '404': 1, '500': 3}})
result = agg.result()
self.assertEquals(result['a']['success']['sum'], 20)
self.assertEquals(result['a']['error']['sum'], 4)
if __name__ == '__main__':
unittest.main()
scales-1.0.9/src/greplin/scales/bottlehandler.py 0000664 0000000 0000000 00000003073 12474406655 0021715 0 ustar 00root root 0000000 0000000
from six import StringIO
from greplin import scales
from greplin.scales import formats, util
from bottle import abort, request, response, run, Bottle
import functools
def bottlestats(server_name, path=''):
"""Renders a GET request, by showing this nodes stats and children."""
path = path.lstrip('/')
parts = path.split('/')
if not parts[0]:
parts = parts[1:]
stat_dict = util.lookup(scales.getStats(), parts)
if stat_dict is None:
abort(404, "Not Found")
return
output = StringIO()
output_format = request.query.get('format', 'html')
query = request.query.get('query', None)
if output_format == 'json':
response.content_type = "application/json"
formats.jsonFormat(output, stat_dict, query)
elif output_format == 'prettyjson':
formats.jsonFormat(output, stat_dict, query, pretty=True)
response.content_type = "application/json"
else:
formats.htmlHeader(output, '/' + path, server_name, query)
formats.htmlFormat(output, tuple(parts), stat_dict, query)
response.content_type = "text/html"
return output.getvalue()
def register_stats_handler(app, server_name, prefix='/status/'):
"""Register the stats handler with a Flask app, serving routes
with a given prefix. The prefix defaults to '/_stats/', which is
generally what you want."""
if not prefix.endswith('/'):
prefix += '/'
handler = functools.partial(bottlestats, server_name)
app.get(prefix, callback=handler)
app.get(prefix + 'Stats
')
output.write('%s
' % serverName)
output.write(
'