pax_global_header 0000666 0000000 0000000 00000000064 13531322035 0014507 g ustar 00root root 0000000 0000000 52 comment=7360389a2f5a98dd8868d26aa10e5454ec8ad5b5
stream-lib-2.9.8/ 0000775 0000000 0000000 00000000000 13531322035 0013566 5 ustar 00root root 0000000 0000000 stream-lib-2.9.8/.gitignore 0000664 0000000 0000000 00000000171 13531322035 0015555 0 ustar 00root root 0000000 0000000 target/
.classpath
.project
.settings/
.idea/
pom.xml.versionsBackup
.#*
target/
pom.xml.releaseBackup
release.properties stream-lib-2.9.8/.travis.yml 0000664 0000000 0000000 00000000043 13531322035 0015674 0 ustar 00root root 0000000 0000000 language: java
jdk:
- oraclejdk8
stream-lib-2.9.8/3rd_party_licenses.txt 0000664 0000000 0000000 00000011372 13531322035 0020127 0 ustar 00root root 0000000 0000000
LICENSES FOR THIRD-PARTY COMPONENTS
===============================================================================
The following sections list licensing information for
libraries included with the stream-lib source and components
used to test stream-lib.
The following software may be included in this product:
===============================================================================
Fastutil » 8.1.1
Fastutil » 8.1.1 uses the Apache 2.0 license, shown below. See the License for details about distribution rights, and the specific rights regarding derivate works.
http://www.apache.org/licenses/LICENSE-2.0.txt
---------------------------------------------------------------------------
JUnit
JUnit » 4.12 uses Eclipse Public License - Version 1.0, shown below. See the License for details about distribution rights, and the specific rights regarding derivate works.
http://www.eclipse.org/org/documents/epl-v10.php
---------------------------------------------------------------------------
SLF4J Simple Binding
SLF4J API Module
Copyright (c) 2004-2007 QOS.ch
SLF4J Simple Binding 1.7.25 and SLF4J API Module 1.7.25 use MIT license, shown below. See the License for details about distribution rights, and the specific rights regarding derivate works.
https://opensource.org/licenses/mit-license.php
---------------------------------------------------------------------------
Colt » 1.2.0
Packages cern.colt* , cern.jet*, cern.clhep
Copyright (c) 1999 CERN - European Organization for Nuclear Research.
Permission to use, copy, modify, distribute and sell this software and its documentation for any purpose is hereby granted without fee, provided that the above copyright notice appear in all copies and that both that copyright notice and this permission notice appear in supporting documentation. CERN makes no representations about the suitability of this software for any purpose. It is provided "as is" without expressed or implied warranty.
Packages hep.aida.*
Written by Pavel Binko, Dino Ferrero Merlino, Wolfgang Hoschek, Tony Johnson, Andreas Pfeiffer, and others. Check the FreeHEP home page for more info. Permission to use and/or redistribute this work is granted under the terms of the LGPL License, with the exception that any usage related to military applications is expressly forbidden. The software and documentation made available under the terms of this license are provided with no warranty.
---------------------------------------------------------------------------
Charts4j » 1.3
https://github.com/julienchastang/charts4j/blob/master/LICENSE.txt
/**
*
* The MIT License
*
* Copyright (c) 2011 the original author or authors.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
---------------------------------------------------------------------------
Apache Commons Codec » 1.11
Apache 2.0 license, shown below. See the License for details about distribution rights, and the specific rights regarding derivate works.
https://www.apache.org/licenses/LICENSE-2.0.txt
---------------------------------------------------------------------------
Guava: Google Core Libraries For Java
Guava: Google Core Libraries For Java » 24.1-jre uses the Apache 2.0 license, shown below. See the License for details about distribution rights, and the specific rights regarding derivate works.
http://www.apache.org/licenses/LICENSE-2.0.txt
---------------------------------------------------------------------------
Mahout Math » 0.13.0
Mahout Math » 0.13.0 uses the Apache 2.0 license, shown below. See the License for details about distribution rights, and the specific rights regarding derivate works.
https://github.com/apache/mahout/blob/master/LICENSE.txt stream-lib-2.9.8/CHANGES.txt 0000664 0000000 0000000 00000000033 13531322035 0015373 0 ustar 00root root 0000000 0000000 2.0.0:
* Initial Release.
stream-lib-2.9.8/LICENSE.txt 0000664 0000000 0000000 00000026133 13531322035 0015416 0 ustar 00root root 0000000 0000000
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright 2011 Clearspring Technologies
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
stream-lib-2.9.8/NOTICE.txt 0000664 0000000 0000000 00000000474 13531322035 0015315 0 ustar 00root root 0000000 0000000 stream-lib
Copyright 2016 AddThis
This product includes software developed by AddThis.
This product also includes code adapted from:
Apache Solr (http://lucene.apache.org/solr/)
Copyright 2014 The Apache Software Foundation
Apache Mahout (http://mahout.apache.org/)
Copyright 2014 The Apache Software Foundation
stream-lib-2.9.8/README.mdown 0000664 0000000 0000000 00000012070 13531322035 0015571 0 ustar 00root root 0000000 0000000 [](https://travis-ci.org/addthis/stream-lib)
## Description
A Java library for summarizing data in streams for which it is
infeasible to store all events. More specifically, there are classes
for estimating: cardinality (i.e. counting things); set membership;
top-k elements and frequency. One particularly useful feature is that
cardinality estimators with compatible configurations may be safely
merged.
These classes may be used directly in a JVM project or with the
provided shell scripts and good old Unix IO redirection.
The ideas here are not original to us. We have endeavored to create
useful implementations from iterating over the existing academic
literature. As such this library relies heavily on the work of
others. Please read the [Sources](#Sources) and
[Reference](#References) sections.
## Examples
$ echo -e "foo\nfoo\nbar" | ./bin/topk
item count error
---- ----- -----
foo 2 0
bar 1 0
Item count: 3
$ echo -e "foo\nfoo\nbar" | ./bin/cardinality
Item Count Cardinality Estimate
---------- --------------------
3 2
## Maven Artifact [](https://maven-badges.herokuapp.com/maven-central/com.clearspring.analytics/stream)
``` xml
com.clearspring.analytics
stream
2.9.5
```
## Building
Assuming you have [Apache Maven](http://maven.apache.org/) installed
and configured:
mvn package
And you should be all set.
## Where People Hang Out
Mailing list: http://groups.google.com/group/stream-lib-user
## Sources
The set membership code is the Bloom Filter implementation from Apache
Cassandra circa December 2009. The changes here are minimal and were
for the purpose of testing and independent use. Apache Software
Foundation headers have been retained on these files. By extension we
also include [murmurhash](http://murmurhash.googlepages.com/).
We were inspired to use this code by Jonathan Ellis' post
[All you ever wanted to know about writing bloom filters](http://spyced.blogspot.com/2009/01/all-you-ever-wanted-to-know-about.html).
## References
There are javadoc references to specific papers. These were the ones
we found most relevant during out research.
#### Cardinality
* Min Cai, Jianping Pan, Yu K. Kwok, and Kai Hwang. Fast and accurate
traffic matrix measurement using adaptive cardinality counting. In
MineNet ’05: Proceedings of the 2005 ACM SIGCOMM workshop on
Mining network data, pages 205–206, New York, NY, USA, 2005. ACM.
* Ahmed Metwally, Divyakant Agrawal, and Amr E. Abbadi. Why go
logarithmic if we can go linear?: Towards effective distinct counting of
search traffic. In EDBT ’08: Proceedings of the 11th international
conference on Extending database technology, pages 618–629, New York,
NY, USA, 2008. ACM.
* Nikos Ntarmos, Peter Triantafillou, and Gerhard Weikum. Counting at
large: Efficient cardinality estimation in Internet-Scale data networks.
In ICDE ’06: Proceedings of the 22nd International Conference on Data
Engineering, pages 40+, Washington, DC, USA, 2006. IEEE Computer
Society.
* Marianne Durand and Philippe Flajolet. LogLog counting of large
cardinalities. In ESA03, volume 2832 of LNCS, pages 605–617, 2003.
* Kyu Y. Whang, Brad T. Vander Zanden, and Howard M. Taylor. A
linear-time probabilistic counting algorithm for database applications.
ACM Trans. Database Syst., 15(2):208–229, 1990.
* Moses Charikar, Kevin Chen, and Martin F. Colton. Finding frequent
items in data streams. In ICALP ’02: Proceedings of the 29th
International Colloquium on Automata, Languages and Programming,
pages 693–703, London, UK, 2002. Springer-Verlag.
* Stefan Heule, Marc Nunkesser, Alex Hall. HyperLogLog in Practice:
Algorithmic Engineering of a State of The Art Cardinality Estimation
Algorithm. Proceedings of the EDBT 2013 Conference, ACM, Genoa, Italy
#### Top-K
* Graham Cormode and S. Muthukrishnan. An improved data stream
summary: The Count-Min sketch and its applications. pages 29–38.
2004. 10.1016/j.jalgor.2003.12.001
http://dl.acm.org/citation.cfm?id=1073718
* Cheqing Jin, Weining Qian, Chaofeng Sha, Jeffrey X. Yu, and Aoying
Zhou. Dynamically maintaining frequent items over a data stream. In
CIKM ’03: Proceedings of the twelfth international conference on
Information and knowledge management, pages 287–294, New York,
NY, USA, 2003. ACM. 10.1145/956863.956918
http://dl.acm.org/citation.cfm?id=956918
* Ahmed Metwally, Divyakant Agrawal, and Amr Abbadi. Efficient
computation of frequent and top-k elements in data streams. pages
398–412. 2005. 10.1007/978-3-540-30570-5_27
http://link.springer.com/chapter/10.1007/978-3-540-30570-5_27
#### Frequency
* Graham Cormode and S. Muthukrishnan. An improved data stream
summary: The Count-Min sketch and its applications. 2004. 10.1016/j.jalgor.2003.12.001
http://dl.acm.org/citation.cfm?id=1073718
stream-lib-2.9.8/bin/ 0000775 0000000 0000000 00000000000 13531322035 0014336 5 ustar 00root root 0000000 0000000 stream-lib-2.9.8/bin/card-test-and-graph.sh 0000775 0000000 0000000 00000000363 13531322035 0020424 0 ustar 00root root 0000000 0000000 #!/bin/bash -x
# Wrap the maven boilerplate to run the cardinality tests and graph results.
mvn -e exec:java -Dexec.classpathScope="test" -Dexec.mainClass="com.clearspring.analytics.stream.cardinality.TestAndGraphResults" -Dexec.args="$*"
stream-lib-2.9.8/bin/cardinality 0000775 0000000 0000000 00000002275 13531322035 0016575 0 ustar 00root root 0000000 0000000 #!/bin/bash
STREAM_LIB_DIR=${STREAM_LIB_DIR:-./target}
HELP=0
function help () {
cat <<-HELP
cardinality -- Cardinality estimation command line utility.
Usage: $( basename $0 ) [options] [RATE]
Calculates the cardinality of a stream, reporting a summary at the end.
cardinality looks for the Stream Summary analytics library at the location
of the environment variable STREAM_LIB_DIR (default is ./target).
Arguments:
RATE Report interim summary every RATE elements.
Options:
-h Displays this help.
Stream Lib Dir: ${STREAM_LIB_DIR}
HELP
}
function fail () {
echo "PREDICTABLE FAILURE. $1"
if [ "$2" ]; then
help
fi
exit 1
}
SHIFT=0
function incshift () {
SHIFT=$(( $SHIFT + ${1:-1} ))
}
for opt in $*; do
case "$opt" in
-h | -he | -hel | -help | --h | --he | --hel | --help )
HELP=1 ;;
esac
done
while getopts "h" opt; do
case $opt in
h ) HELP=1; incshift ;;
# $opt ) B=$OPTARG; incshift 2 ;;
esac
done
shift $SHIFT
if test $HELP == 1; then
help
exit 0
fi
java -cp "${STREAM_LIB_DIR}"/stream-*SNAPSHOT.jar com.clearspring.analytics.util.ObyCount $*
stream-lib-2.9.8/bin/topk 0000775 0000000 0000000 00000002341 13531322035 0015241 0 ustar 00root root 0000000 0000000 #!/bin/bash
STREAM_LIB_DIR=${STREAM_LIB_DIR:-./target}
HELP=0
function help () {
cat <<-HELP
topk -- Finds the top elements in a stream.
Usage: $( basename $0 ) [options] [CAPACITY] [RATE]
Finds the top elements in a stream, reporting a summary at the end.
topk looks for the Stream Summary analytics library at the location
of the environment variable STREAM_LIB_DIR (default is ./target).
Arguments:
CAPACITY Size of top / k (defaults to 1000)
RATE Report interim summary every RATE elements.
Options:
-h Displays this help.
Stream Lib Dir: ${STREAM_LIB_DIR}
HELP
}
function fail () {
echo "PREDICTABLE FAILURE. $1"
if [ "$2" ]; then
help
fi
exit 1
}
SHIFT=0
function incshift () {
SHIFT=$(( $SHIFT + ${1:-1} ))
}
for opt in $*; do
case "$opt" in
-h | -he | -hel | -help | --h | --he | --hel | --help )
HELP=1 ;;
esac
done
while getopts "h" opt; do
case $opt in
h ) HELP=1; incshift ;;
# $opt ) B=$OPTARG; incshift 2 ;;
esac
done
shift $SHIFT
if test $HELP == 1; then
help
exit 0
fi
java -cp "${STREAM_LIB_DIR}"/stream-*SNAPSHOT.jar com.clearspring.analytics.util.TopK $*
stream-lib-2.9.8/pom.xml 0000664 0000000 0000000 00000006343 13531322035 0015111 0 ustar 00root root 0000000 0000000
jar-pom
com.addthis.common.build.maven.pom
3.8.0
4.0.0
com.clearspring.analytics
stream
jar
2.9.8
stream-lib
A library for summarizing data in streams for which it is infeasible to store all events
https://github.com/addthis/stream-lib
Apache License, Version 2.0
http://www.apache.org/licenses/LICENSE-2.0.txt
1.8
com.google.guava
guava
23.3-jre
it.unimi.dsi
fastutil
8.1.1
it.unimi.dsi
fastutil
junit
junit
4.12
test
org.slf4j
slf4j-simple
1.7.25
test
colt
colt
1.2.0
test
com.googlecode.charts4j
charts4j
1.3
test
commons-codec
commons-codec
1.11
test
com.google.guava
guava
test
org.apache.mahout
mahout-math
0.13.0
test
Matt Abrams
abramsm@addthis.com
Chris Burroughs
christopher@addthis.com
scm:git:git@github.com:addthis/stream-lib.git
scm:git:git@github.com:addthis/stream-lib.git
https://github.com/addthis/stream-lib
v2.9.8
stream-lib-2.9.8/src/ 0000775 0000000 0000000 00000000000 13531322035 0014355 5 ustar 00root root 0000000 0000000 stream-lib-2.9.8/src/main/ 0000775 0000000 0000000 00000000000 13531322035 0015301 5 ustar 00root root 0000000 0000000 stream-lib-2.9.8/src/main/java/ 0000775 0000000 0000000 00000000000 13531322035 0016222 5 ustar 00root root 0000000 0000000 stream-lib-2.9.8/src/main/java/com/ 0000775 0000000 0000000 00000000000 13531322035 0017000 5 ustar 00root root 0000000 0000000 stream-lib-2.9.8/src/main/java/com/clearspring/ 0000775 0000000 0000000 00000000000 13531322035 0021311 5 ustar 00root root 0000000 0000000 stream-lib-2.9.8/src/main/java/com/clearspring/analytics/ 0000775 0000000 0000000 00000000000 13531322035 0023300 5 ustar 00root root 0000000 0000000 stream-lib-2.9.8/src/main/java/com/clearspring/analytics/hash/ 0000775 0000000 0000000 00000000000 13531322035 0024223 5 ustar 00root root 0000000 0000000 stream-lib-2.9.8/src/main/java/com/clearspring/analytics/hash/Lookup3Hash.java 0000664 0000000 0000000 00000027214 13531322035 0027234 0 ustar 00root root 0000000 0000000 package com.clearspring.analytics.hash;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
*
Fast, well distributed, cross-platform hash functions.
*
*
* Development background: I was surprised to discovered that there isn't a good cross-platform hash function defined for strings. MD5, SHA, FVN, etc, all define hash functions over bytes, meaning that it's under-specified for strings.
*
*
* So I set out to create a standard 32 bit string hash that would be well defined for implementation in all languages, have very high performance, and have very good hash properties such as distribution. After evaluating all the options, I settled on using Bob Jenkins' lookup3 as a base. It's a well studied and very fast hash function, and the hashword variant can work with 32 bits at a time (perfect for hashing unicode code points). It's also even faster on the latest JVMs which can translate pairs of shifts into native rotate instructions.
*
* The only problem with using lookup3 hashword is that it includes a length in the initial value. This would suck some performance out since directly hashing a UTF8 or UTF16 string (Java) would require a pre-scan to get the actual number of unicode code points. The solution was to simply remove the length factor, which is equivalent to biasing initVal by -(numCodePoints*4). This slightly modified lookup3 I define as lookup3ycs.
*
* So the definition of the cross-platform string hash lookup3ycs is as follows:
*
* The hash value of a character sequence (a string) is defined to be the hash of its unicode code points, according to lookup3 hashword, with the initval biased by -(length*4).
*
* So by definition
*
*
* lookup3ycs(k,offset,length,initval) == lookup3(k,offset,length,initval-(length*4))
*
* AND
*
* lookup3ycs(k,offset,length,initval+(length*4)) == lookup3(k,offset,length,initval)
*
* An obvious advantage of this relationship is that you can use lookup3 if you don't have an implementation of lookup3ycs.
*
*
* @author yonik
*/
public class Lookup3Hash {
/**
* A Java implementation of hashword from lookup3.c by Bob Jenkins
* (original source ).
*
* @param k the key to hash
* @param offset offset of the start of the key
* @param length length of the key
* @param initval initial value to fold into the hash
* @return the 32 bit hash code
*/
@SuppressWarnings("fallthrough")
public static int lookup3(int[] k, int offset, int length, int initval) {
int a, b, c;
a = b = c = 0xdeadbeef + (length << 2) + initval;
int i = offset;
while (length > 3) {
a += k[i];
b += k[i + 1];
c += k[i + 2];
// mix(a,b,c)... Java needs "out" parameters!!!
// Note: recent JVMs (Sun JDK6) turn pairs of shifts (needed to do a rotate)
// into real x86 rotate instructions.
{
a -= c;
a ^= (c << 4) | (c >>> -4);
c += b;
b -= a;
b ^= (a << 6) | (a >>> -6);
a += c;
c -= b;
c ^= (b << 8) | (b >>> -8);
b += a;
a -= c;
a ^= (c << 16) | (c >>> -16);
c += b;
b -= a;
b ^= (a << 19) | (a >>> -19);
a += c;
c -= b;
c ^= (b << 4) | (b >>> -4);
b += a;
}
length -= 3;
i += 3;
}
switch (length) {
case 3:
c += k[i + 2]; // fall through
case 2:
b += k[i + 1]; // fall through
case 1:
a += k[i + 0]; // fall through
// final(a,b,c);
{
c ^= b;
c -= (b << 14) | (b >>> -14);
a ^= c;
a -= (c << 11) | (c >>> -11);
b ^= a;
b -= (a << 25) | (a >>> -25);
c ^= b;
c -= (b << 16) | (b >>> -16);
a ^= c;
a -= (c << 4) | (c >>> -4);
b ^= a;
b -= (a << 14) | (a >>> -14);
c ^= b;
c -= (b << 24) | (b >>> -24);
}
case 0:
break;
}
return c;
}
/**
* Identical to lookup3, except initval is biased by -(length<<2).
* This is equivalent to leaving out the length factor in the initial state.
* {@code lookup3ycs(k, offset, length, initval) == lookup3(k,offset,length,initval-(length<<2))}
* and
* {@code lookup3ycs(k, offset, length, initval+(length<<2)) == lookup3(k,offset,length,initval)}
*/
public static int lookup3ycs(int[] k, int offset, int length, int initval) {
return lookup3(k, offset, length, initval - (length << 2));
}
/**
* The hash value of a character sequence is defined to be the hash of
* it's unicode code points, according to {@link #lookup3ycs(int[] k, int offset, int length, int initval)}
*
* If you know the number of code points in the {@code CharSequence}, you can
* generate the same hash as the original lookup3
* via {@code lookup3ycs(s, start, end, initval+(numCodePoints<<2))}
*/
public static int lookup3ycs(CharSequence s, int start, int end, int initval) {
int a, b, c;
a = b = c = 0xdeadbeef + initval;
// only difference from lookup3 is that "+ (length<<2)" is missing
// since we don't know the number of code points to start with,
// and don't want to have to pre-scan the string to find out.
int i = start;
boolean mixed = true; // have the 3 state variables been adequately mixed?
for (; ; ) {
if (i >= end) {
break;
}
mixed = false;
char ch;
ch = s.charAt(i++);
a += Character.isHighSurrogate(ch) && i < end ? Character.toCodePoint(ch, s.charAt(i++)) : ch;
if (i >= end) {
break;
}
ch = s.charAt(i++);
b += Character.isHighSurrogate(ch) && i < end ? Character.toCodePoint(ch, s.charAt(i++)) : ch;
if (i >= end) {
break;
}
ch = s.charAt(i++);
c += Character.isHighSurrogate(ch) && i < end ? Character.toCodePoint(ch, s.charAt(i++)) : ch;
if (i >= end) {
break;
}
// mix(a,b,c)... Java needs "out" parameters!!!
// Note: recent JVMs (Sun JDK6) turn pairs of shifts (needed to do a rotate)
// into real x86 rotate instructions.
{
a -= c;
a ^= (c << 4) | (c >>> -4);
c += b;
b -= a;
b ^= (a << 6) | (a >>> -6);
a += c;
c -= b;
c ^= (b << 8) | (b >>> -8);
b += a;
a -= c;
a ^= (c << 16) | (c >>> -16);
c += b;
b -= a;
b ^= (a << 19) | (a >>> -19);
a += c;
c -= b;
c ^= (b << 4) | (b >>> -4);
b += a;
}
mixed = true;
}
if (!mixed) {
// final(a,b,c)
c ^= b;
c -= (b << 14) | (b >>> -14);
a ^= c;
a -= (c << 11) | (c >>> -11);
b ^= a;
b -= (a << 25) | (a >>> -25);
c ^= b;
c -= (b << 16) | (b >>> -16);
a ^= c;
a -= (c << 4) | (c >>> -4);
b ^= a;
b -= (a << 14) | (a >>> -14);
c ^= b;
c -= (b << 24) | (b >>> -24);
}
return c;
}
/**
*
This is the 64 bit version of lookup3ycs, corresponding to Bob Jenkin's
* lookup3 hashlittle2 with initval biased by -(numCodePoints<<2). It is equivalent
* to lookup3ycs in that if the high bits of initval==0, then the low bits of the
* result will be the same as lookup3ycs.
*
*/
public static long lookup3ycs64(CharSequence s, int start, int end, long initval) {
int a, b, c;
a = b = c = 0xdeadbeef + (int) initval;
c += (int) (initval >>> 32);
// only difference from lookup3 is that "+ (length<<2)" is missing
// since we don't know the number of code points to start with,
// and don't want to have to pre-scan the string to find out.
int i = start;
boolean mixed = true; // have the 3 state variables been adequately mixed?
for (; ; ) {
if (i >= end) {
break;
}
mixed = false;
char ch;
ch = s.charAt(i++);
a += Character.isHighSurrogate(ch) && i < end ? Character.toCodePoint(ch, s.charAt(i++)) : ch;
if (i >= end) {
break;
}
ch = s.charAt(i++);
b += Character.isHighSurrogate(ch) && i < end ? Character.toCodePoint(ch, s.charAt(i++)) : ch;
if (i >= end) {
break;
}
ch = s.charAt(i++);
c += Character.isHighSurrogate(ch) && i < end ? Character.toCodePoint(ch, s.charAt(i++)) : ch;
if (i >= end) {
break;
}
// mix(a,b,c)... Java needs "out" parameters!!!
// Note: recent JVMs (Sun JDK6) turn pairs of shifts (needed to do a rotate)
// into real x86 rotate instructions.
{
a -= c;
a ^= (c << 4) | (c >>> -4);
c += b;
b -= a;
b ^= (a << 6) | (a >>> -6);
a += c;
c -= b;
c ^= (b << 8) | (b >>> -8);
b += a;
a -= c;
a ^= (c << 16) | (c >>> -16);
c += b;
b -= a;
b ^= (a << 19) | (a >>> -19);
a += c;
c -= b;
c ^= (b << 4) | (b >>> -4);
b += a;
}
mixed = true;
}
if (!mixed) {
// final(a,b,c)
c ^= b;
c -= (b << 14) | (b >>> -14);
a ^= c;
a -= (c << 11) | (c >>> -11);
b ^= a;
b -= (a << 25) | (a >>> -25);
c ^= b;
c -= (b << 16) | (b >>> -16);
a ^= c;
a -= (c << 4) | (c >>> -4);
b ^= a;
b -= (a << 14) | (a >>> -14);
c ^= b;
c -= (b << 24) | (b >>> -24);
}
return c + (((long) b) << 32);
}
public static long lookup3ycs64(CharSequence s) {
return lookup3ycs64(s, 0, s.length(), -1);
}
}
stream-lib-2.9.8/src/main/java/com/clearspring/analytics/hash/MurmurHash.java 0000664 0000000 0000000 00000014217 13531322035 0027166 0 ustar 00root root 0000000 0000000 package com.clearspring.analytics.hash;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with this
* work for additional information regarding copyright ownership. The ASF
* licenses this file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
/**
* This is a very fast, non-cryptographic hash suitable for general hash-based
* lookup. See http://murmurhash.googlepages.com/ for more details.
*
*
* The C version of MurmurHash 2.0 found at that site was ported to Java by
* Andrzej Bialecki (ab at getopt org).
*
*/
public class MurmurHash {
public static int hash(Object o) {
if (o == null) {
return 0;
}
if (o instanceof Long) {
return hashLong((Long) o);
}
if (o instanceof Integer) {
return hashLong((Integer) o);
}
if (o instanceof Double) {
return hashLong(Double.doubleToRawLongBits((Double) o));
}
if (o instanceof Float) {
return hashLong(Float.floatToRawIntBits((Float) o));
}
if (o instanceof String) {
return hash(((String) o).getBytes());
}
if (o instanceof byte[]) {
return hash((byte[]) o);
}
return hash(o.toString());
}
public static int hash(byte[] data) {
return hash(data, data.length, -1);
}
public static int hash(byte[] data, int seed) {
return hash(data, data.length, seed);
}
public static int hash(byte[] data, int length, int seed) {
int m = 0x5bd1e995;
int r = 24;
int h = seed ^ length;
int len_4 = length >> 2;
for (int i = 0; i < len_4; i++) {
int i_4 = i << 2;
int k = data[i_4 + 3];
k = k << 8;
k = k | (data[i_4 + 2] & 0xff);
k = k << 8;
k = k | (data[i_4 + 1] & 0xff);
k = k << 8;
k = k | (data[i_4 + 0] & 0xff);
k *= m;
k ^= k >>> r;
k *= m;
h *= m;
h ^= k;
}
// avoid calculating modulo
int len_m = len_4 << 2;
int left = length - len_m;
if (left != 0) {
if (left >= 3) {
h ^= (int) data[length - 3] << 16;
}
if (left >= 2) {
h ^= (int) data[length - 2] << 8;
}
if (left >= 1) {
h ^= (int) data[length - 1];
}
h *= m;
}
h ^= h >>> 13;
h *= m;
h ^= h >>> 15;
return h;
}
public static int hashLong(long data) {
int m = 0x5bd1e995;
int r = 24;
int h = 0;
int k = (int) data * m;
k ^= k >>> r;
h ^= k * m;
k = (int) (data >> 32) * m;
k ^= k >>> r;
h *= m;
h ^= k * m;
h ^= h >>> 13;
h *= m;
h ^= h >>> 15;
return h;
}
public static long hash64(Object o) {
if (o == null) {
return 0l;
} else if (o instanceof String) {
final byte[] bytes = ((String) o).getBytes();
return hash64(bytes, bytes.length);
} else if (o instanceof byte[]) {
final byte[] bytes = (byte[]) o;
return hash64(bytes, bytes.length);
}
return hash64(o.toString());
}
// 64 bit implementation copied from here: https://github.com/tnm/murmurhash-java
/**
* Generates 64 bit hash from byte array with default seed value.
*
* @param data byte array to hash
* @param length length of the array to hash
* @return 64 bit hash of the given string
*/
public static long hash64(final byte[] data, int length) {
return hash64(data, length, 0xe17a1465);
}
/**
* Generates 64 bit hash from byte array of the given length and seed.
*
* @param data byte array to hash
* @param length length of the array to hash
* @param seed initial seed value
* @return 64 bit hash of the given array
*/
public static long hash64(final byte[] data, int length, int seed) {
final long m = 0xc6a4a7935bd1e995L;
final int r = 47;
long h = (seed & 0xffffffffl) ^ (length * m);
int length8 = length / 8;
for (int i = 0; i < length8; i++) {
final int i8 = i * 8;
long k = ((long) data[i8 + 0] & 0xff) + (((long) data[i8 + 1] & 0xff) << 8)
+ (((long) data[i8 + 2] & 0xff) << 16) + (((long) data[i8 + 3] & 0xff) << 24)
+ (((long) data[i8 + 4] & 0xff) << 32) + (((long) data[i8 + 5] & 0xff) << 40)
+ (((long) data[i8 + 6] & 0xff) << 48) + (((long) data[i8 + 7] & 0xff) << 56);
k *= m;
k ^= k >>> r;
k *= m;
h ^= k;
h *= m;
}
switch (length % 8) {
case 7:
h ^= (long) (data[(length & ~7) + 6] & 0xff) << 48;
case 6:
h ^= (long) (data[(length & ~7) + 5] & 0xff) << 40;
case 5:
h ^= (long) (data[(length & ~7) + 4] & 0xff) << 32;
case 4:
h ^= (long) (data[(length & ~7) + 3] & 0xff) << 24;
case 3:
h ^= (long) (data[(length & ~7) + 2] & 0xff) << 16;
case 2:
h ^= (long) (data[(length & ~7) + 1] & 0xff) << 8;
case 1:
h ^= (long) (data[length & ~7] & 0xff);
h *= m;
}
;
h ^= h >>> r;
h *= m;
h ^= h >>> r;
return h;
}
}
stream-lib-2.9.8/src/main/java/com/clearspring/analytics/stream/ 0000775 0000000 0000000 00000000000 13531322035 0024573 5 ustar 00root root 0000000 0000000 stream-lib-2.9.8/src/main/java/com/clearspring/analytics/stream/ConcurrentStreamSummary.java 0000664 0000000 0000000 00000011101 13531322035 0032304 0 ustar 00root root 0000000 0000000 /*
* Copyright (C) 2011 Clearspring Technologies, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.clearspring.analytics.stream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.atomic.AtomicReference;
/**
* Based on the Space-Saving algorithm and the Stream-Summary
* data structure as described in:
* Efficient Computation of Frequent and Top-k Elements in Data Streams
* by Metwally, Agrawal, and Abbadi
*
* Ideally used in multithreaded applications, otherwise see {@link StreamSummary}
*
* @param type of data in the stream to be summarized
* @author Eric Vlaanderen
*/
public class ConcurrentStreamSummary implements ITopK {
private final int capacity;
private final ConcurrentHashMap> itemMap;
private final AtomicReference> minVal;
private final AtomicLong size;
private final AtomicBoolean reachCapacity;
public ConcurrentStreamSummary(final int capacity) {
this.capacity = capacity;
this.minVal = new AtomicReference>();
this.size = new AtomicLong(0);
this.itemMap = new ConcurrentHashMap>(capacity);
this.reachCapacity = new AtomicBoolean(false);
}
@Override
public boolean offer(final T element) {
return offer(element, 1);
}
@Override
public boolean offer(final T element, final int incrementCount) {
long val = incrementCount;
ScoredItem value = new ScoredItem(element, incrementCount);
ScoredItem oldVal = itemMap.putIfAbsent(element, value);
if (oldVal != null) {
val = oldVal.addAndGetCount(incrementCount);
} else if (reachCapacity.get() || size.incrementAndGet() > capacity) {
reachCapacity.set(true);
ScoredItem oldMinVal = minVal.getAndSet(value);
itemMap.remove(oldMinVal.getItem());
while (oldMinVal.isNewItem()) {
// Wait for the oldMinVal so its error and value are completely up to date.
// no thread.sleep here due to the overhead of calling it - the waiting time will be microseconds.
}
long count = oldMinVal.getCount();
value.addAndGetCount(count);
value.setError(count);
}
value.setNewItem(false);
minVal.set(getMinValue());
return val != incrementCount;
}
private ScoredItem getMinValue() {
ScoredItem minVal = null;
for (ScoredItem entry : itemMap.values()) {
if (minVal == null || (!entry.isNewItem() && entry.getCount() < minVal.getCount())) {
minVal = entry;
}
}
return minVal;
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append("[");
for (ScoredItem entry : itemMap.values()) {
sb.append("(" + entry.getCount() + ": " + entry.getItem() + ", e: " + entry.getError() + "),");
}
sb.deleteCharAt(sb.length() - 1);
sb.append("]");
return sb.toString();
}
@Override
public List peek(final int k) {
List toReturn = new ArrayList(k);
List> values = peekWithScores(k);
for (ScoredItem value : values) {
toReturn.add(value.getItem());
}
return toReturn;
}
public List> peekWithScores(final int k) {
List> values = new ArrayList>();
for (Map.Entry> entry : itemMap.entrySet()) {
ScoredItem value = entry.getValue();
values.add(new ScoredItem(value.getItem(), value.getCount(), value.getError()));
}
Collections.sort(values);
values = values.size() > k ? values.subList(0, k) : values;
return values;
}
}
stream-lib-2.9.8/src/main/java/com/clearspring/analytics/stream/Counter.java 0000664 0000000 0000000 00000003666 13531322035 0027070 0 ustar 00root root 0000000 0000000 /*
* Copyright (C) 2011 Clearspring Technologies, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.clearspring.analytics.stream;
import java.io.Externalizable;
import java.io.IOException;
import java.io.ObjectInput;
import java.io.ObjectOutput;
import com.clearspring.analytics.util.ListNode2;
public class Counter implements Externalizable {
protected ListNode2.Bucket> bucketNode;
protected T item;
protected long count;
protected long error;
/**
* For de-serialization
*/
public Counter() {
}
public Counter(ListNode2.Bucket> bucket, T item) {
this.bucketNode = bucket;
this.count = 0;
this.error = 0;
this.item = item;
}
public T getItem() {
return item;
}
public long getCount() {
return count;
}
public long getError() {
return error;
}
@Override
public String toString() {
return item + ":" + count + ':' + error;
}
@SuppressWarnings("unchecked")
@Override
public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException {
item = (T) in.readObject();
count = in.readLong();
error = in.readLong();
}
@Override
public void writeExternal(ObjectOutput out) throws IOException {
out.writeObject(item);
out.writeLong(count);
out.writeLong(error);
}
}
stream-lib-2.9.8/src/main/java/com/clearspring/analytics/stream/ISampleSet.java 0000664 0000000 0000000 00000001573 13531322035 0027452 0 ustar 00root root 0000000 0000000 /*
* Copyright (C) 2011 Clearspring Technologies, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.clearspring.analytics.stream;
import java.util.List;
public interface ISampleSet {
long put(T element);
long put(T element, int incrementCount);
T removeRandom();
T peek();
List peek(int k);
int size();
long count();
}
stream-lib-2.9.8/src/main/java/com/clearspring/analytics/stream/ITopK.java 0000664 0000000 0000000 00000002611 13531322035 0026424 0 ustar 00root root 0000000 0000000 /*
* Copyright (C) 2011 Clearspring Technologies, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.clearspring.analytics.stream;
import java.util.List;
public interface ITopK {
/**
* offer a single element to the top.
*
* @param element - the element to add to the top
* @return false if the element was already in the top
*/
boolean offer(T element);
/**
* offer a single element to the top and increment the count
* for that element by incrementCount.
*
* @param element - the element to add to the top
* @param incrementCount - the increment count for the given count
* @return false if the element was already in the top
*/
boolean offer(T element, int incrementCount);
/**
* @param k
* @return top k elements offered (may be an approximation)
*/
List peek(int k);
}
stream-lib-2.9.8/src/main/java/com/clearspring/analytics/stream/SampleSet.java 0000664 0000000 0000000 00000012702 13531322035 0027335 0 ustar 00root root 0000000 0000000 /*
* Copyright (C) 2011 Clearspring Technologies, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.clearspring.analytics.stream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Random;
public class SampleSet implements ISampleSet {
private Map> sampleMap;
private int size;
private long count;
private Random random;
/**
* Element with the highest frequency in the set
*/
private Node head;
/**
* Element with the lowest frequency in the set
*/
private Node tail;
public SampleSet() {
this(7);
}
public SampleSet(int capacity) {
this(capacity, new Random());
}
public SampleSet(int capacity, Random random) {
sampleMap = new HashMap>(capacity);
this.random = random;
}
public T peek() {
return (head != null) ? head.element : null;
}
public List peek(int k) {
List topK = new ArrayList(k);
for (Node itr = head; itr != null && topK.size() < k; itr = itr.next) {
topK.add(itr.element);
}
return topK;
}
public long put(T element) {
return put(element, 1);
}
public long put(T element, int incrementCount) {
Node node = sampleMap.get(element);
if (node != null) {
node.count = node.count + incrementCount;
promote(node);
} else {
node = new Node();
node.element = element;
node.count = incrementCount;
node.prev = tail;
if (tail != null) {
tail.next = node;
}
tail = node;
if (head == null) {
head = node;
}
sampleMap.put(element, node);
size++;
}
count++;
return node.count;
}
public T removeRandom() {
double p = random.nextDouble();
long weight = 0;
for (Node itr = head; itr != null; itr = itr.next) {
weight += itr.count;
if (p < weight / (double) count) {
itr.count--;
count--;
demote(itr);
if (itr.count == 0) {
removeMin();
}
return itr.element;
}
}
return null;
}
protected T removeMin() {
if (tail == null) {
return null;
}
size--;
count -= tail.count;
T minElement = tail.element;
tail = tail.prev;
if (tail != null) {
tail.next = null;
}
sampleMap.remove(minElement);
return minElement;
}
public int size() {
return size;
}
public long count() {
return count;
}
protected T peekMin() {
return tail.element;
}
protected void promote(Node node) {
// Bring node closer to the head as necessary
while (node.prev != null && node.count > node.prev.count) {
// BEFORE head... [A]node.prev.prev --> [B]node.prev --> [C]node --> [D]node.next ...tail
// AFTER head... [A]node.prev.prev --> [C]node --> [B]node.prev --> [D]node.next ...tail
Node b = node.prev, c = node, d = node.next, a = (b == null) ? null : b.prev;
// Re-link each of 3 neighboring pairs
if (a != null) {
a.next = c;
}
c.prev = a;
c.next = b;
b.prev = c;
b.next = d;
if (d != null) {
d.prev = b;
}
// B and C may have switched head/tail roles
if (head == b) {
head = c;
}
if (tail == c) {
tail = b;
}
}
}
protected void demote(Node node) {
// Bring node closer to the tail as necessary
while (node.next != null && node.count < node.next.count) {
// BEFORE head... [A]node.prev --> [B]node --> [C]node.next --> [D]node.next.next ...tail
// AFTER head... [A]node.prev --> [C]node.next --> [B]node --> [D]node.next.next ...tail
Node a = node.prev, b = node, c = node.next, d = (c == null) ? null : c.next;
// Re-link each of 3 neighboring pairs
if (a != null) {
a.next = c;
}
c.prev = a;
c.next = b;
b.prev = c;
if (d != null) {
d.prev = b;
}
b.next = d;
// B and C may have switched head/tail roles
if (head == b) {
head = c;
}
if (tail == c) {
tail = b;
}
}
}
private class Node {
private Node next;
private Node prev;
private E element;
private long count;
}
}
stream-lib-2.9.8/src/main/java/com/clearspring/analytics/stream/ScoredItem.java 0000664 0000000 0000000 00000004556 13531322035 0027506 0 ustar 00root root 0000000 0000000 /*
* Copyright (C) 2011 Clearspring Technologies, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.clearspring.analytics.stream;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicLong;
/**
* @author Eric Vlaanderen
*/
public class ScoredItem implements Comparable> {
private final AtomicLong error;
private final AtomicLong count;
private final AtomicBoolean newItem;
private final T item;
public ScoredItem(final T item, final long count, final long error) {
this.item = item;
this.error = new AtomicLong(error);
this.count = new AtomicLong(count);
this.newItem = new AtomicBoolean(true);
}
public ScoredItem(final T item, final long count) {
this(item, count, 0L);
}
public long addAndGetCount(final long delta) {
return this.count.addAndGet(delta);
}
public void setError(final long newError) {
this.error.set(newError);
}
public long getError() {
return error.get();
}
public T getItem() {
return item;
}
public boolean isNewItem() {
return newItem.get();
}
public long getCount() {
return count.get();
}
@Override
public int compareTo(final ScoredItem o) {
long x = o.count.get();
long y = count.get();
return (x < y) ? -1 : ((x == y) ? 0 : 1);
}
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append("Value: ");
sb.append(item);
sb.append(", Count: ");
sb.append(count);
sb.append(", Error: ");
sb.append(error);
sb.append(", object: ");
sb.append(super.toString());
return sb.toString();
}
public void setNewItem(final boolean newItem) {
this.newItem.set(newItem);
}
}
stream-lib-2.9.8/src/main/java/com/clearspring/analytics/stream/StochasticTopper.java 0000664 0000000 0000000 00000003772 13531322035 0030745 0 ustar 00root root 0000000 0000000 /*
* Copyright (C) 2011 Clearspring Technologies, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
*
*/
package com.clearspring.analytics.stream;
import java.util.List;
import java.util.Random;
/**
* Estimates most frequently occurring items in a data stream
* using a bounded amount of memory.
*
* Warning: this class is not thread safe.
*/
public class StochasticTopper implements ITopK {
private int sampleSize;
private ISampleSet sample;
private Random random;
private long count;
public StochasticTopper(int sampleSize) {
this(sampleSize, null);
}
public StochasticTopper(int sampleSize, Long seed) {
this.sample = new SampleSet(sampleSize);
this.sampleSize = sampleSize;
if (seed != null) {
random = new Random(seed);
} else {
random = new Random();
}
}
public boolean offer(T item, int incrementCount) {
count++;
boolean taken = false;
if (sample.count() < sampleSize) {
sample.put(item, incrementCount);
taken = true;
} else if (random.nextDouble() < sampleSize / (double) count) {
sample.removeRandom();
sample.put(item, incrementCount);
taken = true;
}
return taken;
}
public boolean offer(T item) {
return offer(item, 1);
}
/**
* Retrieve top k items
*/
public List peek(int k) {
return sample.peek(k);
}
}
stream-lib-2.9.8/src/main/java/com/clearspring/analytics/stream/StreamSummary.java 0000664 0000000 0000000 00000023513 13531322035 0030253 0 ustar 00root root 0000000 0000000 /*
* Copyright (C) 2011 Clearspring Technologies, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.clearspring.analytics.stream;
import java.io.ByteArrayInputStream;
import java.io.Externalizable;
import java.io.IOException;
import java.io.ObjectInput;
import java.io.ObjectInputStream;
import java.io.ObjectOutput;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import com.clearspring.analytics.util.DoublyLinkedList;
import com.clearspring.analytics.util.ExternalizableUtil;
import com.clearspring.analytics.util.ListNode2;
import com.clearspring.analytics.util.Pair;
/**
* Based on the Space-Saving algorithm and the Stream-Summary
* data structure as described in:
* Efficient Computation of Frequent and Top-k Elements in Data Streams
* by Metwally, Agrawal, and Abbadi
*
* @param type of data in the stream to be summarized
*/
public class StreamSummary implements ITopK, Externalizable {
protected class Bucket {
protected DoublyLinkedList> counterList;
private long count;
public Bucket(long count) {
this.count = count;
this.counterList = new DoublyLinkedList>();
}
}
protected int capacity;
private HashMap>> counterMap;
protected DoublyLinkedList bucketList;
/**
* @param capacity maximum size (larger capacities improve accuracy)
*/
public StreamSummary(int capacity) {
this.capacity = capacity;
counterMap = new HashMap>>();
bucketList = new DoublyLinkedList();
}
public int getCapacity() {
return capacity;
}
/**
* Algorithm: Space-Saving
*
* @param item stream element (e )
* @return false if item was already in the stream summary, true otherwise
*/
@Override
public boolean offer(T item) {
return offer(item, 1);
}
/**
* Algorithm: Space-Saving
*
* @param item stream element (e )
* @return false if item was already in the stream summary, true otherwise
*/
@Override
public boolean offer(T item, int incrementCount) {
return offerReturnAll(item, incrementCount).left;
}
/**
* @param item stream element (e )
* @return item dropped from summary if an item was dropped, null otherwise
*/
public T offerReturnDropped(T item, int incrementCount) {
return offerReturnAll(item, incrementCount).right;
}
/**
* @param item stream element (e )
* @return Pair where isNewItem is the return value of offer() and itemDropped is null if no item was dropped
*/
public Pair offerReturnAll(T item, int incrementCount) {
ListNode2> counterNode = counterMap.get(item);
boolean isNewItem = (counterNode == null);
T droppedItem = null;
if (isNewItem) {
if (size() < capacity) {
counterNode = bucketList.enqueue(new Bucket(0)).getValue().counterList.add(new Counter(bucketList.tail(), item));
} else {
Bucket min = bucketList.first();
counterNode = min.counterList.tail();
Counter counter = counterNode.getValue();
droppedItem = counter.item;
counterMap.remove(droppedItem);
counter.item = item;
counter.error = min.count;
}
counterMap.put(item, counterNode);
}
incrementCounter(counterNode, incrementCount);
return new Pair(isNewItem, droppedItem);
}
protected void incrementCounter(ListNode2> counterNode, int incrementCount) {
Counter counter = counterNode.getValue(); // count_i
ListNode2 oldNode = counter.bucketNode;
Bucket bucket = oldNode.getValue(); // Let Bucket_i be the bucket of count_i
bucket.counterList.remove(counterNode); // Detach count_i from Bucket_i's child-list
counter.count = counter.count + incrementCount;
// Finding the right bucket for count_i
// Because we allow a single call to increment count more than once, this may not be the adjacent bucket.
ListNode2 bucketNodePrev = oldNode;
ListNode2 bucketNodeNext = bucketNodePrev.getNext();
while (bucketNodeNext != null) {
Bucket bucketNext = bucketNodeNext.getValue(); // Let Bucket_i^+ be Bucket_i's neighbor of larger value
if (counter.count == bucketNext.count) {
bucketNext.counterList.add(counterNode); // Attach count_i to Bucket_i^+'s child-list
break;
} else if (counter.count > bucketNext.count) {
bucketNodePrev = bucketNodeNext;
bucketNodeNext = bucketNodePrev.getNext(); // Continue hunting for an appropriate bucket
} else {
// A new bucket has to be created
bucketNodeNext = null;
}
}
if (bucketNodeNext == null) {
Bucket bucketNext = new Bucket(counter.count);
bucketNext.counterList.add(counterNode);
bucketNodeNext = bucketList.addAfter(bucketNodePrev, bucketNext);
}
counter.bucketNode = bucketNodeNext;
//Cleaning up
if (bucket.counterList.isEmpty()) // If Bucket_i's child-list is empty
{
bucketList.remove(oldNode); // Detach Bucket_i from the Stream-Summary
}
}
@Override
public List peek(int k) {
List topK = new ArrayList(k);
for (ListNode2 bNode = bucketList.head(); bNode != null; bNode = bNode.getPrev()) {
Bucket b = bNode.getValue();
for (Counter c : b.counterList) {
if (topK.size() == k) {
return topK;
}
topK.add(c.item);
}
}
return topK;
}
public List> topK(int k) {
List> topK = new ArrayList>(k);
for (ListNode2 bNode = bucketList.head(); bNode != null; bNode = bNode.getPrev()) {
Bucket b = bNode.getValue();
for (Counter c : b.counterList) {
if (topK.size() == k) {
return topK;
}
topK.add(c);
}
}
return topK;
}
/**
* @return number of items stored
*/
public int size() {
return counterMap.size();
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append('[');
for (ListNode2 bNode = bucketList.head(); bNode != null; bNode = bNode.getPrev()) {
Bucket b = bNode.getValue();
sb.append('{');
sb.append(b.count);
sb.append(":[");
for (Counter c : b.counterList) {
sb.append('{');
sb.append(c.item);
sb.append(':');
sb.append(c.error);
sb.append("},");
}
if (b.counterList.size() > 0) {
sb.deleteCharAt(sb.length() - 1);
}
sb.append("]},");
}
if (bucketList.size() > 0) {
sb.deleteCharAt(sb.length() - 1);
}
sb.append(']');
return sb.toString();
}
@SuppressWarnings("unchecked")
@Override
public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException {
this.bucketList = new DoublyLinkedList();
this.capacity = in.readInt();
int size = in.readInt();
this.counterMap = new HashMap>>(size);
Bucket currentBucket = null;
ListNode2 currentBucketNode = null;
for (int i = 0; i < size; i++) {
Counter c = (Counter) in.readObject();
if (currentBucket == null || c.count != currentBucket.count) {
currentBucket = new Bucket(c.count);
currentBucketNode = bucketList.add(currentBucket);
}
c.bucketNode = currentBucketNode;
counterMap.put(c.item, currentBucket.counterList.add(c));
}
}
@Override
public void writeExternal(ObjectOutput out) throws IOException {
out.writeInt(this.capacity);
out.writeInt(this.size());
for (ListNode2 bNode = bucketList.tail(); bNode != null; bNode = bNode.getNext()) {
Bucket b = bNode.getValue();
for (Counter c : b.counterList) {
out.writeObject(c);
}
}
}
/**
* For de-serialization
*/
public StreamSummary() {
}
/**
* For de-serialization
*
* @param bytes
* @throws IOException
* @throws ClassNotFoundException
*/
public StreamSummary(byte[] bytes) throws IOException, ClassNotFoundException {
fromBytes(bytes);
}
public void fromBytes(byte[] bytes) throws IOException, ClassNotFoundException {
readExternal(new ObjectInputStream(new ByteArrayInputStream(bytes)));
}
public byte[] toBytes() throws IOException {
return ExternalizableUtil.toBytes(this);
}
}
stream-lib-2.9.8/src/main/java/com/clearspring/analytics/stream/cardinality/ 0000775 0000000 0000000 00000000000 13531322035 0027076 5 ustar 00root root 0000000 0000000 stream-lib-2.9.8/src/main/java/com/clearspring/analytics/stream/cardinality/AdaptiveCounting.java 0000664 0000000 0000000 00000012447 13531322035 0033215 0 ustar 00root root 0000000 0000000 /*
* Copyright (C) 2011 Clearspring Technologies, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.clearspring.analytics.stream.cardinality;
import java.io.Serializable;
import java.util.Arrays;
import com.clearspring.analytics.hash.Lookup3Hash;
import com.clearspring.analytics.util.IBuilder;
/**
*
* Based on the adaptive counting approach of:
* Fast and Accurate Traffic Matrix Measurement Using Adaptive Cardinality Counting
* by: Cai, Pan, Kwok, and Hwang
*
*
* TODO: use 5 bits/bucket instead of 8 (37.5% size reduction)
* TODO: super-LogLog optimizations
*/
public class AdaptiveCounting extends LogLog {
/**
* Number of empty buckets
*/
protected int b_e;
/**
* Switching empty bucket ratio
*/
protected final double B_s = 0.051;
public AdaptiveCounting(int k) {
super(k);
b_e = m;
}
public AdaptiveCounting(byte[] M) {
super(M);
for (byte b : M) {
if (b == 0) {
b_e++;
}
}
}
@Override
public boolean offer(Object o) {
boolean modified = false;
long x = Lookup3Hash.lookup3ycs64(o.toString());
int j = (int) (x >>> (Long.SIZE - k));
byte r = (byte) (Long.numberOfLeadingZeros((x << k) | (1 << (k - 1))) + 1);
if (M[j] < r) {
Rsum += r - M[j];
if (M[j] == 0) {
b_e--;
}
M[j] = r;
modified = true;
}
return modified;
}
@Override
public long cardinality() {
double B = (b_e / (double) m);
if (B >= B_s) {
return (long) Math.round(-m * Math.log(B));
}
return super.cardinality();
}
/**
* Computes the position of the first set bit of the last Long.SIZE-k bits
*
* @return Long.SIZE-k if the last k bits are all zero
*/
protected static byte rho(long x, int k) {
return (byte) (Long.numberOfLeadingZeros((x << k) | (1 << (k - 1))) + 1);
}
/**
* @return this if estimators is null or no arguments are passed
* @throws LogLogMergeException if estimators are not mergeable (all estimators must be instances of LogLog of the same size)
*/
@Override
public ICardinality merge(ICardinality... estimators) throws LogLogMergeException {
LogLog res = (LogLog) super.merge(estimators);
return new AdaptiveCounting(res.M);
}
/**
* Merges estimators to produce an estimator for their combined streams
*
* @param estimators
* @return merged estimator or null if no estimators were provided
* @throws LogLogMergeException if estimators are not mergeable (all estimators must be the same size)
*/
public static AdaptiveCounting mergeEstimators(LogLog... estimators) throws LogLogMergeException {
if (estimators == null || estimators.length == 0) {
return null;
}
return (AdaptiveCounting) estimators[0].merge(Arrays.copyOfRange(estimators, 1, estimators.length));
}
public static class Builder implements IBuilder, Serializable {
private static final long serialVersionUID = 2205437102378081334L;
protected final int k;
public Builder() {
this(16);
}
public Builder(int k) {
this.k = k;
}
@Override
public AdaptiveCounting build() {
return new AdaptiveCounting(k);
}
@Override
public int sizeof() {
return 1 << k;
}
/**
*
* For cardinalities less than 4.25M, obyCount provides a LinearCounting Builder
* (see LinearCounting.Builder.onePercentError() ) using only the
* space required to provide estimates within 1% of the actual cardinality,
* up to ~65k.
*
*
* For cardinalities greater than 4.25M, an AdaptiveCounting builder is returned
* that allocates ~65KB and provides estimates with a Gaussian error distribution
* with an average error of 0.5% and a standard deviation of 0.5%
*
*
* @param maxCardinality
* @throws IllegalArgumentException if maxCardinality is not a positive integer
* @see LinearCounting.Builder#onePercentError(int)
*/
public static IBuilder obyCount(long maxCardinality) {
if (maxCardinality <= 0) {
throw new IllegalArgumentException("maxCardinality (" + maxCardinality + ") must be a positive integer");
}
if (maxCardinality < 4250000) {
return LinearCounting.Builder.onePercentError((int) maxCardinality);
}
return new Builder(16);
}
}
}
CardinalityMergeException.java 0000664 0000000 0000000 00000001527 13531322035 0034771 0 ustar 00root root 0000000 0000000 stream-lib-2.9.8/src/main/java/com/clearspring/analytics/stream/cardinality /*
* Copyright (C) 2011 Clearspring Technologies, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.clearspring.analytics.stream.cardinality;
@SuppressWarnings("serial")
public abstract class CardinalityMergeException extends Exception {
public CardinalityMergeException(String message) {
super(message);
}
}
stream-lib-2.9.8/src/main/java/com/clearspring/analytics/stream/cardinality/CountThenEstimate.java 0000664 0000000 0000000 00000022772 13531322035 0033356 0 ustar 00root root 0000000 0000000 /*
* Copyright (C) 2011 Clearspring Technologies, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.clearspring.analytics.stream.cardinality;
import java.io.ByteArrayInputStream;
import java.io.Externalizable;
import java.io.IOException;
import java.io.ObjectInput;
import java.io.ObjectInputStream;
import java.io.ObjectOutput;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import com.clearspring.analytics.util.ExternalizableUtil;
import com.clearspring.analytics.util.IBuilder;
/**
* Exact -> Estimator cardinality counting
*
*
* Avoids allocating a large block of memory for cardinality estimation until
* a specified "tipping point" cardinality is reached.
*
*/
public class CountThenEstimate implements ICardinality, Externalizable {
protected final static byte LC = 1;
protected final static byte AC = 2;
protected final static byte HLC = 3;
protected final static byte LLC = 4;
protected final static byte HLPC = 5;
/**
* Cardinality after which exact counting gives way to estimation
*/
protected int tippingPoint;
/**
* True after switching to estimation
*/
protected boolean tipped = false;
/**
* Factory for instantiating estimator after the tipping point is reached
*/
protected IBuilder builder;
/**
* Cardinality estimator
* Null until tipping point is reached
*/
protected ICardinality estimator;
/**
* Cardinality counter
* Null after tipping point is reached
*/
protected Set counter;
/**
* Default constructor
* Exact counts up to 1000, estimation done with default Builder
*/
public CountThenEstimate() {
this(1000, AdaptiveCounting.Builder.obyCount(1000000000));
}
/**
* @param tippingPoint Cardinality at which exact counting gives way to estimation
* @param builder Factory for instantiating estimator after the tipping point is reached
*/
public CountThenEstimate(int tippingPoint, IBuilder builder) {
this.tippingPoint = tippingPoint;
this.builder = builder;
this.counter = new HashSet();
}
/**
* Deserialization constructor
*
* @param bytes
* @throws IOException
* @throws ClassNotFoundException
*/
public CountThenEstimate(byte[] bytes) throws IOException, ClassNotFoundException {
readExternal(new ObjectInputStream(new ByteArrayInputStream(bytes)));
if (!tipped && builder.sizeof() <= bytes.length) {
tip();
}
}
@Override
public long cardinality() {
if (tipped) {
return estimator.cardinality();
}
return counter.size();
}
@Override
public boolean offerHashed(long hashedLong) {
throw new UnsupportedOperationException();
}
@Override
public boolean offerHashed(int hashedInt) {
throw new UnsupportedOperationException();
}
@Override
public boolean offer(Object o) {
boolean modified = false;
if (tipped) {
modified = estimator.offer(o);
} else {
if (counter.add(o)) {
modified = true;
if (counter.size() > tippingPoint) {
tip();
}
}
}
return modified;
}
@Override
public int sizeof() {
if (tipped) {
return estimator.sizeof();
}
return -1;
}
/**
* Switch from exact counting to estimation
*/
private void tip() {
estimator = builder.build();
for (Object o : counter) {
estimator.offer(o);
}
counter = null;
builder = null;
tipped = true;
}
public boolean tipped() {
return tipped;
}
@Override
public byte[] getBytes() throws IOException {
return ExternalizableUtil.toBytes(this);
}
@SuppressWarnings("unchecked")
@Override
public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException {
tipped = in.readBoolean();
if (tipped) {
byte type = in.readByte();
byte[] bytes = new byte[in.readInt()];
in.readFully(bytes);
switch (type) {
case LC:
estimator = new LinearCounting(bytes);
break;
case AC:
estimator = new AdaptiveCounting(bytes);
break;
case HLC:
estimator = HyperLogLog.Builder.build(bytes);
break;
case HLPC:
estimator = HyperLogLogPlus.Builder.build(bytes);
break;
case LLC:
estimator = new LogLog(bytes);
break;
default:
throw new IOException("Unrecognized estimator type: " + type);
}
} else {
tippingPoint = in.readInt();
builder = (IBuilder) in.readObject();
int count = in.readInt();
assert (count <= tippingPoint) : String.format("Invalid serialization: count (%d) > tippingPoint (%d)", count, tippingPoint);
counter = new HashSet(count);
for (int i = 0; i < count; i++) {
counter.add(in.readObject());
}
}
}
@Override
public void writeExternal(ObjectOutput out) throws IOException {
out.writeBoolean(tipped);
if (tipped) {
if (estimator instanceof LinearCounting) {
out.writeByte(LC);
} else if (estimator instanceof AdaptiveCounting) {
out.writeByte(AC);
} else if (estimator instanceof HyperLogLog) {
out.writeByte(HLC);
} else if (estimator instanceof HyperLogLogPlus) {
out.writeByte(HLPC);
} else if (estimator instanceof LogLog) {
out.writeByte(LLC);
} else {
throw new IOException("Estimator unsupported for serialization: " + estimator.getClass().getName());
}
byte[] bytes = estimator.getBytes();
out.writeInt(bytes.length);
out.write(bytes);
} else {
out.writeInt(tippingPoint);
out.writeObject(builder);
out.writeInt(counter.size());
for (Object o : counter) {
out.writeObject(o);
}
}
}
@Override
public ICardinality merge(ICardinality... estimators) throws CardinalityMergeException {
if (estimators == null) {
return mergeEstimators(this);
}
CountThenEstimate[] all = Arrays.copyOf(estimators, estimators.length + 1, CountThenEstimate[].class);
all[all.length - 1] = this;
return mergeEstimators(all);
}
/**
* Merges estimators to produce an estimator for their combined streams
*
* @param estimators
* @return merged estimator or null if no estimators were provided
* @throws CountThenEstimateMergeException if estimators are not mergeable (all must be CountThenEstimate made with the same builder)
*/
public static CountThenEstimate mergeEstimators(CountThenEstimate... estimators) throws CardinalityMergeException {
CountThenEstimate merged = null;
int numEstimators = (estimators == null) ? 0 : estimators.length;
if (numEstimators > 0) {
List tipped = new ArrayList(numEstimators);
List untipped = new ArrayList(numEstimators);
for (CountThenEstimate estimator : estimators) {
if (estimator.tipped) {
tipped.add(estimator.estimator);
} else {
untipped.add(estimator);
}
}
if (untipped.size() > 0) {
merged = new CountThenEstimate(untipped.get(0).tippingPoint, untipped.get(0).builder);
for (CountThenEstimate cte : untipped) {
for (Object o : cte.counter) {
merged.offer(o);
}
}
} else {
merged = new CountThenEstimate(0, new LinearCounting.Builder(1));
merged.tip();
merged.estimator = tipped.remove(0);
}
if (!tipped.isEmpty()) {
if (!merged.tipped) {
merged.tip();
}
merged.estimator = merged.estimator.merge(tipped.toArray(new ICardinality[tipped.size()]));
}
}
return merged;
}
@SuppressWarnings("serial")
protected static class CountThenEstimateMergeException extends CardinalityMergeException {
public CountThenEstimateMergeException(String message) {
super(message);
}
}
}
stream-lib-2.9.8/src/main/java/com/clearspring/analytics/stream/cardinality/HyperLogLog.java 0000664 0000000 0000000 00000032543 13531322035 0032143 0 ustar 00root root 0000000 0000000 /*
* Copyright (C) 2012 Clearspring Technologies, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.clearspring.analytics.stream.cardinality;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.DataInput;
import java.io.DataInputStream;
import java.io.DataOutput;
import java.io.DataOutputStream;
import java.io.Externalizable;
import java.io.IOException;
import java.io.ObjectInput;
import java.io.ObjectInputStream;
import java.io.ObjectOutput;
import java.io.Serializable;
import com.clearspring.analytics.hash.MurmurHash;
import com.clearspring.analytics.util.Bits;
import com.clearspring.analytics.util.IBuilder;
/**
* Java implementation of HyperLogLog (HLL) algorithm from this paper:
*
* http://algo.inria.fr/flajolet/Publications/FlFuGaMe07.pdf
*
* HLL is an improved version of LogLog that is capable of estimating
* the cardinality of a set with accuracy = 1.04/sqrt(m) where
* m = 2^b. So we can control accuracy vs space usage by increasing
* or decreasing b.
*
* The main benefit of using HLL over LL is that it only requires 64%
* of the space that LL does to get the same accuracy.
*
* This implementation implements a single counter. If a large (millions)
* number of counters are required you may want to refer to:
*
* http://dsiutils.di.unimi.it/
*
* It has a more complex implementation of HLL that supports multiple counters
* in a single object, drastically reducing the java overhead from creating
* a large number of objects.
*
* This implementation leveraged a javascript implementation that Yammer has
* been working on:
*
* https://github.com/yammer/probablyjs
*
* Note that this implementation does not include the long range correction function
* defined in the original paper. Empirical evidence shows that the correction
* function causes more harm than good.
*
*
*
* Users have different motivations to use different types of hashing functions.
* Rather than try to keep up with all available hash functions and to remove
* the concern of causing future binary incompatibilities this class allows clients
* to offer the value in hashed int or long form. This way clients are free
* to change their hash function on their own time line. We recommend using Google's
* Guava Murmur3_128 implementation as it provides good performance and speed when
* high precision is required. In our tests the 32bit MurmurHash function included
* in this project is faster and produces better results than the 32 bit murmur3
* implementation google provides.
*
*/
public class HyperLogLog implements ICardinality, Serializable {
private final RegisterSet registerSet;
private final int log2m;
private final double alphaMM;
/**
* Create a new HyperLogLog instance using the specified standard deviation.
*
* @param rsd - the relative standard deviation for the counter.
* smaller values create counters that require more space.
*/
public HyperLogLog(double rsd) {
this(log2m(rsd));
}
private static int log2m(double rsd) {
return (int) (Math.log((1.106 / rsd) * (1.106 / rsd)) / Math.log(2));
}
private static double rsd(int log2m) {
return 1.106 / Math.sqrt(Math.exp(log2m * Math.log(2)));
}
private static double logBase(double exponent, double base) {
return Math.log(exponent) / Math.log(base);
}
private static int accuracyToLog2m(double accuracy) {
return Math.toIntExact(2 * Math.round(logBase(1.04 / (1 - accuracy), 2)));
}
private static void validateLog2m(int log2m) {
if (log2m < 0 || log2m > 30) {
throw new IllegalArgumentException("log2m argument is "
+ log2m + " and is outside the range [0, 30]");
}
}
/**
* Create a new HyperLogLog instance. The log2m parameter defines the accuracy of
* the counter. The larger the log2m the better the accuracy.
*
* accuracy = 1 - 1.04/sqrt(2^log2m)
*
* @param log2m - the number of bits to use as the basis for the HLL instance
*/
public HyperLogLog(int log2m) {
this(log2m, new RegisterSet(1 << log2m));
}
/**
* Creates a new HyperLogLog instance using the given registers. Used for unmarshalling a serialized
* instance and for merging multiple counters together.
*
* @param registerSet - the initial values for the register set
*/
@Deprecated
public HyperLogLog(int log2m, RegisterSet registerSet) {
validateLog2m(log2m);
this.registerSet = registerSet;
this.log2m = log2m;
int m = 1 << this.log2m;
alphaMM = getAlphaMM(log2m, m);
}
@Override
public boolean offerHashed(long hashedValue) {
// j becomes the binary address determined by the first b log2m of x
// j will be between 0 and 2^log2m
final int j = (int) (hashedValue >>> (Long.SIZE - log2m));
final int r = Long.numberOfLeadingZeros((hashedValue << this.log2m) | (1 << (this.log2m - 1)) + 1) + 1;
return registerSet.updateIfGreater(j, r);
}
@Override
public boolean offerHashed(int hashedValue) {
// j becomes the binary address determined by the first b log2m of x
// j will be between 0 and 2^log2m
final int j = hashedValue >>> (Integer.SIZE - log2m);
final int r = Integer.numberOfLeadingZeros((hashedValue << this.log2m) | (1 << (this.log2m - 1)) + 1) + 1;
return registerSet.updateIfGreater(j, r);
}
@Override
public boolean offer(Object o) {
final int x = MurmurHash.hash(o);
return offerHashed(x);
}
@Override
public long cardinality() {
double registerSum = 0;
int count = registerSet.count;
double zeros = 0.0;
for (int j = 0; j < registerSet.count; j++) {
int val = registerSet.get(j);
registerSum += 1.0 / (1 << val);
if (val == 0) {
zeros++;
}
}
double estimate = alphaMM * (1 / registerSum);
if (estimate <= (5.0 / 2.0) * count) {
// Small Range Estimate
return Math.round(linearCounting(count, zeros));
} else {
return Math.round(estimate);
}
}
@Override
public int sizeof() {
return registerSet.size * 4;
}
@Override
public byte[] getBytes() throws IOException {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
DataOutput dos = new DataOutputStream(baos);
writeBytes(dos);
baos.close();
return baos.toByteArray();
}
private void writeBytes(DataOutput serializedByteStream) throws IOException {
serializedByteStream.writeInt(log2m);
serializedByteStream.writeInt(registerSet.size * 4);
for (int x : registerSet.readOnlyBits()) {
serializedByteStream.writeInt(x);
}
}
/**
* Add all the elements of the other set to this set.
*
* This operation does not imply a loss of precision.
*
* @param other A compatible Hyperloglog instance (same log2m)
* @throws CardinalityMergeException if other is not compatible
*/
public void addAll(HyperLogLog other) throws CardinalityMergeException {
if (this.sizeof() != other.sizeof()) {
throw new HyperLogLogMergeException("Cannot merge estimators of different sizes");
}
registerSet.merge(other.registerSet);
}
@Override
public ICardinality merge(ICardinality... estimators) throws CardinalityMergeException {
HyperLogLog merged = new HyperLogLog(log2m, new RegisterSet(this.registerSet.count));
merged.addAll(this);
if (estimators == null) {
return merged;
}
for (ICardinality estimator : estimators) {
if (!(estimator instanceof HyperLogLog)) {
throw new HyperLogLogMergeException("Cannot merge estimators of different class");
}
HyperLogLog hll = (HyperLogLog) estimator;
merged.addAll(hll);
}
return merged;
}
private Object writeReplace() {
return new SerializationHolder(this);
}
/**
* This class exists to support Externalizable semantics for
* HyperLogLog objects without having to expose a public
* constructor, public write/read methods, or pretend final
* fields aren't final.
*
* In short, Externalizable allows you to skip some of the more
* verbose meta-data default Serializable gets you, but still
* includes the class name. In that sense, there is some cost
* to this holder object because it has a longer class name. I
* imagine people who care about optimizing for that have their
* own work-around for long class names in general, or just use
* a custom serialization framework. Therefore we make no attempt
* to optimize that here (eg. by raising this from an inner class
* and giving it an unhelpful name).
*/
private static class SerializationHolder implements Externalizable {
HyperLogLog hyperLogLogHolder;
public SerializationHolder(HyperLogLog hyperLogLogHolder) {
this.hyperLogLogHolder = hyperLogLogHolder;
}
/**
* required for Externalizable
*/
public SerializationHolder() {
}
@Override
public void writeExternal(ObjectOutput out) throws IOException {
hyperLogLogHolder.writeBytes(out);
}
@Override
public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException {
hyperLogLogHolder = Builder.build(in);
}
private Object readResolve() {
return hyperLogLogHolder;
}
}
public static class Builder implements IBuilder, Serializable {
private static final long serialVersionUID = -2567898469253021883L;
private final double rsd;
private transient int log2m;
/**
* Uses the given RSD percentage to determine how many bytes the constructed HyperLogLog will use.
*
* @deprecated Use {@link #withRsd(double)} instead. This builder's constructors did not match the (already
* themselves ambiguous) constructors of the HyperLogLog class, but there is no way to make them match without
* risking behavior changes downstream.
*/
@Deprecated
public Builder(double rsd) {
this.log2m = log2m(rsd);
validateLog2m(log2m);
this.rsd = rsd;
}
/** This constructor is private to prevent behavior change for ambiguous usages. (Legacy support). */
private Builder(int log2m) {
this.log2m = log2m;
validateLog2m(log2m);
this.rsd = rsd(log2m);
}
private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundException {
in.defaultReadObject();
this.log2m = log2m(rsd);
}
@Override
public HyperLogLog build() {
return new HyperLogLog(log2m);
}
@Override
public int sizeof() {
int k = 1 << log2m;
return RegisterSet.getBits(k) * 4;
}
public static Builder withLog2m(int log2m) {
return new Builder(log2m);
}
public static Builder withRsd(double rsd) {
return new Builder(rsd);
}
public static Builder withAccuracy(double accuracy) { return new Builder(accuracyToLog2m(accuracy)); }
public static HyperLogLog build(byte[] bytes) throws IOException {
ByteArrayInputStream bais = new ByteArrayInputStream(bytes);
return build(new DataInputStream(bais));
}
public static HyperLogLog build(DataInput serializedByteStream) throws IOException {
int log2m = serializedByteStream.readInt();
int byteArraySize = serializedByteStream.readInt();
return new HyperLogLog(log2m,
new RegisterSet(1 << log2m, Bits.getBits(serializedByteStream, byteArraySize)));
}
}
@SuppressWarnings("serial")
protected static class HyperLogLogMergeException extends CardinalityMergeException {
public HyperLogLogMergeException(String message) {
super(message);
}
}
protected static double getAlphaMM(final int p, final int m) {
// See the paper.
switch (p) {
case 4:
return 0.673 * m * m;
case 5:
return 0.697 * m * m;
case 6:
return 0.709 * m * m;
default:
return (0.7213 / (1 + 1.079 / m)) * m * m;
}
}
protected static double linearCounting(int m, double V) {
return m * Math.log(m / V);
}
}
stream-lib-2.9.8/src/main/java/com/clearspring/analytics/stream/cardinality/HyperLogLogPlus.java 0000664 0000000 0000000 00000327300 13531322035 0033005 0 ustar 00root root 0000000 0000000 /*
* Copyright (C) 2011 Clearspring Technologies, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.clearspring.analytics.stream.cardinality;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.DataInput;
import java.io.DataInputStream;
import java.io.DataOutput;
import java.io.DataOutputStream;
import java.io.Externalizable;
import java.io.IOException;
import java.io.ObjectInput;
import java.io.ObjectOutput;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.SortedMap;
import java.util.TreeMap;
import com.clearspring.analytics.hash.MurmurHash;
import com.clearspring.analytics.util.Bits;
import com.clearspring.analytics.util.IBuilder;
import com.clearspring.analytics.util.Varint;
/**
* Implementation of HyperLogLog++ described in
*
*
* http://static.googleusercontent.com/external_content/untrusted_dlcp/research.google.com/en/us/pubs/archive/40671.pdf
*
*
* Brief HyperLogLog++ Overview
*
* Uses 64 bit hashing instead of 32
* Has two representation modes: sparse and normal
*
* 'normal' is approximately the same as regular hyperloglog (still uses 64 bits)
*
* 'sparse' handles lower cardinality values with a highly accurate but poorly scaling
* strategy and leverages data compression to compete with 'normal' for as long as possible
* (sparse has the advantage on accuracy per unit of memory at low cardinality but quickly falls behind).
*/
public class HyperLogLogPlus implements ICardinality, Serializable {
public static final int[] EMPTY_SPARSE = new int[0];
private static final int INITIAL_TEMP_SET_CAPACITY = 4;
/**
* Ratio of the sparse set size to the temp set size.
*/
private static final int SPARSE_SET_TEMP_SET_RATIO = 4;
enum Format {
SPARSE, NORMAL
}
/** Used to mark codec version for serialization. */
private static final int VERSION = 2;
// threshold and bias data taken from google's bias correction data set: https://docs.google.com/document/d/1gyjfMHy43U9OWBXxfaeG-3MjGzejW1dlpyMwEYAAWEI/view?fullscreen#
static final double[] thresholdData = {10, 20, 40, 80, 220, 400, 900, 1800, 3100, 6500, 11500, 20000, 50000, 120000, 350000};
static final double[][] rawEstimateData = {
// precision 4
{11, 11.717, 12.207, 12.7896, 13.2882, 13.8204, 14.3772, 14.9342, 15.5202, 16.161, 16.7722, 17.4636, 18.0396, 18.6766, 19.3566, 20.0454, 20.7936, 21.4856, 22.2666, 22.9946, 23.766, 24.4692, 25.3638, 26.0764, 26.7864, 27.7602, 28.4814, 29.433, 30.2926, 31.0664, 31.9996, 32.7956, 33.5366, 34.5894, 35.5738, 36.2698, 37.3682, 38.0544, 39.2342, 40.0108, 40.7966, 41.9298, 42.8704, 43.6358, 44.5194, 45.773, 46.6772, 47.6174, 48.4888, 49.3304, 50.2506, 51.4996, 52.3824, 53.3078, 54.3984, 55.5838, 56.6618, 57.2174, 58.3514, 59.0802, 60.1482, 61.0376, 62.3598, 62.8078, 63.9744, 64.914, 65.781, 67.1806, 68.0594, 68.8446, 69.7928, 70.8248, 71.8324, 72.8598, 73.6246, 74.7014, 75.393, 76.6708, 77.2394,},
// precision 5
{23, 23.1194, 23.8208, 24.2318, 24.77, 25.2436, 25.7774, 26.2848, 26.8224, 27.3742, 27.9336, 28.503, 29.0494, 29.6292, 30.2124, 30.798, 31.367, 31.9728, 32.5944, 33.217, 33.8438, 34.3696, 35.0956, 35.7044, 36.324, 37.0668, 37.6698, 38.3644, 39.049, 39.6918, 40.4146, 41.082, 41.687, 42.5398, 43.2462, 43.857, 44.6606, 45.4168, 46.1248, 46.9222, 47.6804, 48.447, 49.3454, 49.9594, 50.7636, 51.5776, 52.331, 53.19, 53.9676, 54.7564, 55.5314, 56.4442, 57.3708, 57.9774, 58.9624, 59.8796, 60.755, 61.472, 62.2076, 63.1024, 63.8908, 64.7338, 65.7728, 66.629, 67.413, 68.3266, 69.1524, 70.2642, 71.1806, 72.0566, 72.9192, 73.7598, 74.3516, 75.5802, 76.4386, 77.4916, 78.1524, 79.1892, 79.8414, 80.8798, 81.8376, 82.4698, 83.7656, 84.331, 85.5914, 86.6012, 87.7016, 88.5582, 89.3394, 90.3544, 91.4912, 92.308, 93.3552, 93.9746, 95.2052, 95.727, 97.1322, 98.3944, 98.7588, 100.242, 101.1914, 102.2538, 102.8776, 103.6292, 105.1932, 105.9152, 107.0868, 107.6728, 108.7144, 110.3114, 110.8716, 111.245, 112.7908, 113.7064, 114.636, 115.7464, 116.1788, 117.7464, 118.4896, 119.6166, 120.5082, 121.7798, 122.9028, 123.4426, 124.8854, 125.705, 126.4652, 128.3464, 128.3462, 130.0398, 131.0342, 131.0042, 132.4766, 133.511, 134.7252, 135.425, 136.5172, 138.0572, 138.6694, 139.3712, 140.8598, 141.4594, 142.554, 143.4006, 144.7374, 146.1634, 146.8994, 147.605, 147.9304, 149.1636, 150.2468, 151.5876, 152.2096, 153.7032, 154.7146, 155.807, 156.9228, 157.0372, 158.5852,},
// precision 6
{46, 46.1902, 47.271, 47.8358, 48.8142, 49.2854, 50.317, 51.354, 51.8924, 52.9436, 53.4596, 54.5262, 55.6248, 56.1574, 57.2822, 57.837, 58.9636, 60.074, 60.7042, 61.7976, 62.4772, 63.6564, 64.7942, 65.5004, 66.686, 67.291, 68.5672, 69.8556, 70.4982, 71.8204, 72.4252, 73.7744, 75.0786, 75.8344, 77.0294, 77.8098, 79.0794, 80.5732, 81.1878, 82.5648, 83.2902, 84.6784, 85.3352, 86.8946, 88.3712, 89.0852, 90.499, 91.2686, 92.6844, 94.2234, 94.9732, 96.3356, 97.2286, 98.7262, 100.3284, 101.1048, 102.5962, 103.3562, 105.1272, 106.4184, 107.4974, 109.0822, 109.856, 111.48, 113.2834, 114.0208, 115.637, 116.5174, 118.0576, 119.7476, 120.427, 122.1326, 123.2372, 125.2788, 126.6776, 127.7926, 129.1952, 129.9564, 131.6454, 133.87, 134.5428, 136.2, 137.0294, 138.6278, 139.6782, 141.792, 143.3516, 144.2832, 146.0394, 147.0748, 148.4912, 150.849, 151.696, 153.5404, 154.073, 156.3714, 157.7216, 158.7328, 160.4208, 161.4184, 163.9424, 165.2772, 166.411, 168.1308, 168.769, 170.9258, 172.6828, 173.7502, 175.706, 176.3886, 179.0186, 180.4518, 181.927, 183.4172, 184.4114, 186.033, 188.5124, 189.5564, 191.6008, 192.4172, 193.8044, 194.997, 197.4548, 198.8948, 200.2346, 202.3086, 203.1548, 204.8842, 206.6508, 206.6772, 209.7254, 210.4752, 212.7228, 214.6614, 215.1676, 217.793, 218.0006, 219.9052, 221.66, 223.5588, 225.1636, 225.6882, 227.7126, 229.4502, 231.1978, 232.9756, 233.1654, 236.727, 238.1974, 237.7474, 241.1346, 242.3048, 244.1948, 245.3134, 246.879, 249.1204, 249.853, 252.6792, 253.857, 254.4486, 257.2362, 257.9534, 260.0286, 260.5632, 262.663, 264.723, 265.7566, 267.2566, 267.1624, 270.62, 272.8216, 273.2166, 275.2056, 276.2202, 278.3726, 280.3344, 281.9284, 283.9728, 284.1924, 286.4872, 287.587, 289.807, 291.1206, 292.769, 294.8708, 296.665, 297.1182, 299.4012, 300.6352, 302.1354, 304.1756, 306.1606, 307.3462, 308.5214, 309.4134, 310.8352, 313.9684, 315.837, 316.7796, 318.9858,},
// precision 7
{92, 93.4934, 94.9758, 96.4574, 97.9718, 99.4954, 101.5302, 103.0756, 104.6374, 106.1782, 107.7888, 109.9522, 111.592, 113.2532, 114.9086, 116.5938, 118.9474, 120.6796, 122.4394, 124.2176, 125.9768, 128.4214, 130.2528, 132.0102, 133.8658, 135.7278, 138.3044, 140.1316, 142.093, 144.0032, 145.9092, 148.6306, 150.5294, 152.5756, 154.6508, 156.662, 159.552, 161.3724, 163.617, 165.5754, 167.7872, 169.8444, 172.7988, 174.8606, 177.2118, 179.3566, 181.4476, 184.5882, 186.6816, 189.0824, 191.0258, 193.6048, 196.4436, 198.7274, 200.957, 203.147, 205.4364, 208.7592, 211.3386, 213.781, 215.8028, 218.656, 221.6544, 223.996, 226.4718, 229.1544, 231.6098, 234.5956, 237.0616, 239.5758, 242.4878, 244.5244, 248.2146, 250.724, 252.8722, 255.5198, 258.0414, 261.941, 264.9048, 266.87, 269.4304, 272.028, 274.4708, 278.37, 281.0624, 283.4668, 286.5532, 289.4352, 293.2564, 295.2744, 298.2118, 300.7472, 304.1456, 307.2928, 309.7504, 312.5528, 315.979, 318.2102, 322.1834, 324.3494, 327.325, 330.6614, 332.903, 337.2544, 339.9042, 343.215, 345.2864, 348.0814, 352.6764, 355.301, 357.139, 360.658, 363.1732, 366.5902, 369.9538, 373.0828, 375.922, 378.9902, 382.7328, 386.4538, 388.1136, 391.2234, 394.0878, 396.708, 401.1556, 404.1852, 406.6372, 409.6822, 412.7796, 416.6078, 418.4916, 422.131, 424.5376, 428.1988, 432.211, 434.4502, 438.5282, 440.912, 444.0448, 447.7432, 450.8524, 453.7988, 456.7858, 458.8868, 463.9886, 466.5064, 468.9124, 472.6616, 475.4682, 478.582, 481.304, 485.2738, 488.6894, 490.329, 496.106, 497.6908, 501.1374, 504.5322, 506.8848, 510.3324, 513.4512, 516.179, 520.4412, 522.6066, 526.167, 528.7794, 533.379, 536.067, 538.46, 542.9116, 545.692, 547.9546, 552.493, 555.2722, 557.335, 562.449, 564.2014, 569.0738, 571.0974, 574.8564, 578.2996, 581.409, 583.9704, 585.8098, 589.6528, 594.5998, 595.958, 600.068, 603.3278, 608.2016, 609.9632, 612.864, 615.43, 620.7794, 621.272, 625.8644, 629.206, 633.219, 634.5154, 638.6102,},
// precision 8
{184.2152, 187.2454, 190.2096, 193.6652, 196.6312, 199.6822, 203.249, 206.3296, 210.0038, 213.2074, 216.4612, 220.27, 223.5178, 227.4412, 230.8032, 234.1634, 238.1688, 241.6074, 245.6946, 249.2664, 252.8228, 257.0432, 260.6824, 264.9464, 268.6268, 272.2626, 276.8376, 280.4034, 284.8956, 288.8522, 292.7638, 297.3552, 301.3556, 305.7526, 309.9292, 313.8954, 318.8198, 322.7668, 327.298, 331.6688, 335.9466, 340.9746, 345.1672, 349.3474, 354.3028, 358.8912, 364.114, 368.4646, 372.9744, 378.4092, 382.6022, 387.843, 392.5684, 397.1652, 402.5426, 407.4152, 412.5388, 417.3592, 422.1366, 427.486, 432.3918, 437.5076, 442.509, 447.3834, 453.3498, 458.0668, 463.7346, 469.1228, 473.4528, 479.7, 484.644, 491.0518, 495.5774, 500.9068, 506.432, 512.1666, 517.434, 522.6644, 527.4894, 533.6312, 538.3804, 544.292, 550.5496, 556.0234, 562.8206, 566.6146, 572.4188, 579.117, 583.6762, 590.6576, 595.7864, 601.509, 607.5334, 612.9204, 619.772, 624.2924, 630.8654, 636.1836, 642.745, 649.1316, 655.0386, 660.0136, 666.6342, 671.6196, 678.1866, 684.4282, 689.3324, 695.4794, 702.5038, 708.129, 713.528, 720.3204, 726.463, 732.7928, 739.123, 744.7418, 751.2192, 756.5102, 762.6066, 769.0184, 775.2224, 781.4014, 787.7618, 794.1436, 798.6506, 805.6378, 811.766, 819.7514, 824.5776, 828.7322, 837.8048, 843.6302, 849.9336, 854.4798, 861.3388, 867.9894, 873.8196, 880.3136, 886.2308, 892.4588, 899.0816, 905.4076, 912.0064, 917.3878, 923.619, 929.998, 937.3482, 943.9506, 947.991, 955.1144, 962.203, 968.8222, 975.7324, 981.7826, 988.7666, 994.2648, 1000.3128, 1007.4082, 1013.7536, 1020.3376, 1026.7156, 1031.7478, 1037.4292, 1045.393, 1051.2278, 1058.3434, 1062.8726, 1071.884, 1076.806, 1082.9176, 1089.1678, 1095.5032, 1102.525, 1107.2264, 1115.315, 1120.93, 1127.252, 1134.1496, 1139.0408, 1147.5448, 1153.3296, 1158.1974, 1166.5262, 1174.3328, 1175.657, 1184.4222, 1190.9172, 1197.1292, 1204.4606, 1210.4578, 1218.8728, 1225.3336, 1226.6592, 1236.5768, 1241.363, 1249.4074, 1254.6566, 1260.8014, 1266.5454, 1274.5192,},
// precision 9
{369, 374.8294, 381.2452, 387.6698, 394.1464, 400.2024, 406.8782, 413.6598, 420.462, 427.2826, 433.7102, 440.7416, 447.9366, 455.1046, 462.285, 469.0668, 476.306, 483.8448, 491.301, 498.9886, 506.2422, 513.8138, 521.7074, 529.7428, 537.8402, 545.1664, 553.3534, 561.594, 569.6886, 577.7876, 585.65, 594.228, 602.8036, 611.1666, 620.0818, 628.0824, 637.2574, 646.302, 655.1644, 664.0056, 672.3802, 681.7192, 690.5234, 700.2084, 708.831, 718.485, 728.1112, 737.4764, 746.76, 756.3368, 766.5538, 775.5058, 785.2646, 795.5902, 804.3818, 814.8998, 824.9532, 835.2062, 845.2798, 854.4728, 864.9582, 875.3292, 886.171, 896.781, 906.5716, 916.7048, 927.5322, 937.875, 949.3972, 958.3464, 969.7274, 980.2834, 992.1444, 1003.4264, 1013.0166, 1024.018, 1035.0438, 1046.34, 1057.6856, 1068.9836, 1079.0312, 1091.677, 1102.3188, 1113.4846, 1124.4424, 1135.739, 1147.1488, 1158.9202, 1169.406, 1181.5342, 1193.2834, 1203.8954, 1216.3286, 1226.2146, 1239.6684, 1251.9946, 1262.123, 1275.4338, 1285.7378, 1296.076, 1308.9692, 1320.4964, 1333.0998, 1343.9864, 1357.7754, 1368.3208, 1380.4838, 1392.7388, 1406.0758, 1416.9098, 1428.9728, 1440.9228, 1453.9292, 1462.617, 1476.05, 1490.2996, 1500.6128, 1513.7392, 1524.5174, 1536.6322, 1548.2584, 1562.3766, 1572.423, 1587.1232, 1596.5164, 1610.5938, 1622.5972, 1633.1222, 1647.7674, 1658.5044, 1671.57, 1683.7044, 1695.4142, 1708.7102, 1720.6094, 1732.6522, 1747.841, 1756.4072, 1769.9786, 1782.3276, 1797.5216, 1808.3186, 1819.0694, 1834.354, 1844.575, 1856.2808, 1871.1288, 1880.7852, 1893.9622, 1906.3418, 1920.6548, 1932.9302, 1945.8584, 1955.473, 1968.8248, 1980.6446, 1995.9598, 2008.349, 2019.8556, 2033.0334, 2044.0206, 2059.3956, 2069.9174, 2082.6084, 2093.7036, 2106.6108, 2118.9124, 2132.301, 2144.7628, 2159.8422, 2171.0212, 2183.101, 2193.5112, 2208.052, 2221.3194, 2233.3282, 2247.295, 2257.7222, 2273.342, 2286.5638, 2299.6786, 2310.8114, 2322.3312, 2335.516, 2349.874, 2363.5968, 2373.865, 2387.1918, 2401.8328, 2414.8496, 2424.544, 2436.7592, 2447.1682, 2464.1958, 2474.3438, 2489.0006, 2497.4526, 2513.6586, 2527.19, 2540.7028, 2553.768,},
// precision 10
{738.1256, 750.4234, 763.1064, 775.4732, 788.4636, 801.0644, 814.488, 827.9654, 841.0832, 854.7864, 868.1992, 882.2176, 896.5228, 910.1716, 924.7752, 938.899, 953.6126, 968.6492, 982.9474, 998.5214, 1013.1064, 1028.6364, 1044.2468, 1059.4588, 1075.3832, 1091.0584, 1106.8606, 1123.3868, 1139.5062, 1156.1862, 1172.463, 1189.339, 1206.1936, 1223.1292, 1240.1854, 1257.2908, 1275.3324, 1292.8518, 1310.5204, 1328.4854, 1345.9318, 1364.552, 1381.4658, 1400.4256, 1419.849, 1438.152, 1456.8956, 1474.8792, 1494.118, 1513.62, 1532.5132, 1551.9322, 1570.7726, 1590.6086, 1610.5332, 1630.5918, 1650.4294, 1669.7662, 1690.4106, 1710.7338, 1730.9012, 1750.4486, 1770.1556, 1791.6338, 1812.7312, 1833.6264, 1853.9526, 1874.8742, 1896.8326, 1918.1966, 1939.5594, 1961.07, 1983.037, 2003.1804, 2026.071, 2047.4884, 2070.0848, 2091.2944, 2114.333, 2135.9626, 2158.2902, 2181.0814, 2202.0334, 2224.4832, 2246.39, 2269.7202, 2292.1714, 2314.2358, 2338.9346, 2360.891, 2384.0264, 2408.3834, 2430.1544, 2454.8684, 2476.9896, 2501.4368, 2522.8702, 2548.0408, 2570.6738, 2593.5208, 2617.0158, 2640.2302, 2664.0962, 2687.4986, 2714.2588, 2735.3914, 2759.6244, 2781.8378, 2808.0072, 2830.6516, 2856.2454, 2877.2136, 2903.4546, 2926.785, 2951.2294, 2976.468, 3000.867, 3023.6508, 3049.91, 3073.5984, 3098.162, 3121.5564, 3146.2328, 3170.9484, 3195.5902, 3221.3346, 3242.7032, 3271.6112, 3296.5546, 3317.7376, 3345.072, 3369.9518, 3394.326, 3418.1818, 3444.6926, 3469.086, 3494.2754, 3517.8698, 3544.248, 3565.3768, 3588.7234, 3616.979, 3643.7504, 3668.6812, 3695.72, 3719.7392, 3742.6224, 3770.4456, 3795.6602, 3819.9058, 3844.002, 3869.517, 3895.6824, 3920.8622, 3947.1364, 3973.985, 3995.4772, 4021.62, 4046.628, 4074.65, 4096.2256, 4121.831, 4146.6406, 4173.276, 4195.0744, 4223.9696, 4251.3708, 4272.9966, 4300.8046, 4326.302, 4353.1248, 4374.312, 4403.0322, 4426.819, 4450.0598, 4478.5206, 4504.8116, 4528.8928, 4553.9584, 4578.8712, 4603.8384, 4632.3872, 4655.5128, 4675.821, 4704.6222, 4731.9862, 4755.4174, 4781.2628, 4804.332, 4832.3048, 4862.8752, 4883.4148, 4906.9544, 4935.3516, 4954.3532, 4984.0248, 5011.217, 5035.3258, 5057.3672, 5084.1828,},
// precision 11
{1477, 1501.6014, 1526.5802, 1551.7942, 1577.3042, 1603.2062, 1629.8402, 1656.2292, 1682.9462, 1709.9926, 1737.3026, 1765.4252, 1793.0578, 1821.6092, 1849.626, 1878.5568, 1908.527, 1937.5154, 1967.1874, 1997.3878, 2027.37, 2058.1972, 2089.5728, 2120.1012, 2151.9668, 2183.292, 2216.0772, 2247.8578, 2280.6562, 2313.041, 2345.714, 2380.3112, 2414.1806, 2447.9854, 2481.656, 2516.346, 2551.5154, 2586.8378, 2621.7448, 2656.6722, 2693.5722, 2729.1462, 2765.4124, 2802.8728, 2838.898, 2876.408, 2913.4926, 2951.4938, 2989.6776, 3026.282, 3065.7704, 3104.1012, 3143.7388, 3181.6876, 3221.1872, 3261.5048, 3300.0214, 3339.806, 3381.409, 3421.4144, 3461.4294, 3502.2286, 3544.651, 3586.6156, 3627.337, 3670.083, 3711.1538, 3753.5094, 3797.01, 3838.6686, 3882.1678, 3922.8116, 3967.9978, 4009.9204, 4054.3286, 4097.5706, 4140.6014, 4185.544, 4229.5976, 4274.583, 4316.9438, 4361.672, 4406.2786, 4451.8628, 4496.1834, 4543.505, 4589.1816, 4632.5188, 4678.2294, 4724.8908, 4769.0194, 4817.052, 4861.4588, 4910.1596, 4956.4344, 5002.5238, 5048.13, 5093.6374, 5142.8162, 5187.7894, 5237.3984, 5285.6078, 5331.0858, 5379.1036, 5428.6258, 5474.6018, 5522.7618, 5571.5822, 5618.59, 5667.9992, 5714.88, 5763.454, 5808.6982, 5860.3644, 5910.2914, 5953.571, 6005.9232, 6055.1914, 6104.5882, 6154.5702, 6199.7036, 6251.1764, 6298.7596, 6350.0302, 6398.061, 6448.4694, 6495.933, 6548.0474, 6597.7166, 6646.9416, 6695.9208, 6742.6328, 6793.5276, 6842.1934, 6894.2372, 6945.3864, 6996.9228, 7044.2372, 7094.1374, 7142.2272, 7192.2942, 7238.8338, 7288.9006, 7344.0908, 7394.8544, 7443.5176, 7490.4148, 7542.9314, 7595.6738, 7641.9878, 7694.3688, 7743.0448, 7797.522, 7845.53, 7899.594, 7950.3132, 7996.455, 8050.9442, 8092.9114, 8153.1374, 8197.4472, 8252.8278, 8301.8728, 8348.6776, 8401.4698, 8453.551, 8504.6598, 8553.8944, 8604.1276, 8657.6514, 8710.3062, 8758.908, 8807.8706, 8862.1702, 8910.4668, 8960.77, 9007.2766, 9063.164, 9121.0534, 9164.1354, 9218.1594, 9267.767, 9319.0594, 9372.155, 9419.7126, 9474.3722, 9520.1338, 9572.368, 9622.7702, 9675.8448, 9726.5396, 9778.7378, 9827.6554, 9878.1922, 9928.7782, 9978.3984, 10026.578, 10076.5626, 10137.1618, 10177.5244, 10229.9176,},
// precision 12
{2954, 3003.4782, 3053.3568, 3104.3666, 3155.324, 3206.9598, 3259.648, 3312.539, 3366.1474, 3420.2576, 3474.8376, 3530.6076, 3586.451, 3643.38, 3700.4104, 3757.5638, 3815.9676, 3875.193, 3934.838, 3994.8548, 4055.018, 4117.1742, 4178.4482, 4241.1294, 4304.4776, 4367.4044, 4431.8724, 4496.3732, 4561.4304, 4627.5326, 4693.949, 4761.5532, 4828.7256, 4897.6182, 4965.5186, 5034.4528, 5104.865, 5174.7164, 5244.6828, 5316.6708, 5387.8312, 5459.9036, 5532.476, 5604.8652, 5679.6718, 5753.757, 5830.2072, 5905.2828, 5980.0434, 6056.6264, 6134.3192, 6211.5746, 6290.0816, 6367.1176, 6447.9796, 6526.5576, 6606.1858, 6686.9144, 6766.1142, 6847.0818, 6927.9664, 7010.9096, 7091.0816, 7175.3962, 7260.3454, 7344.018, 7426.4214, 7511.3106, 7596.0686, 7679.8094, 7765.818, 7852.4248, 7936.834, 8022.363, 8109.5066, 8200.4554, 8288.5832, 8373.366, 8463.4808, 8549.7682, 8642.0522, 8728.3288, 8820.9528, 8907.727, 9001.0794, 9091.2522, 9179.988, 9269.852, 9362.6394, 9453.642, 9546.9024, 9640.6616, 9732.6622, 9824.3254, 9917.7484, 10007.9392, 10106.7508, 10196.2152, 10289.8114, 10383.5494, 10482.3064, 10576.8734, 10668.7872, 10764.7156, 10862.0196, 10952.793, 11049.9748, 11146.0702, 11241.4492, 11339.2772, 11434.2336, 11530.741, 11627.6136, 11726.311, 11821.5964, 11918.837, 12015.3724, 12113.0162, 12213.0424, 12306.9804, 12408.4518, 12504.8968, 12604.586, 12700.9332, 12798.705, 12898.5142, 12997.0488, 13094.788, 13198.475, 13292.7764, 13392.9698, 13486.8574, 13590.1616, 13686.5838, 13783.6264, 13887.2638, 13992.0978, 14081.0844, 14189.9956, 14280.0912, 14382.4956, 14486.4384, 14588.1082, 14686.2392, 14782.276, 14888.0284, 14985.1864, 15088.8596, 15187.0998, 15285.027, 15383.6694, 15495.8266, 15591.3736, 15694.2008, 15790.3246, 15898.4116, 15997.4522, 16095.5014, 16198.8514, 16291.7492, 16402.6424, 16499.1266, 16606.2436, 16697.7186, 16796.3946, 16902.3376, 17005.7672, 17100.814, 17206.8282, 17305.8262, 17416.0744, 17508.4092, 17617.0178, 17715.4554, 17816.758, 17920.1748, 18012.9236, 18119.7984, 18223.2248, 18324.2482, 18426.6276, 18525.0932, 18629.8976, 18733.2588, 18831.0466, 18940.1366, 19032.2696, 19131.729, 19243.4864, 19349.6932, 19442.866, 19547.9448, 19653.2798, 19754.4034, 19854.0692, 19965.1224, 20065.1774, 20158.2212, 20253.353, 20366.3264, 20463.22,},
// precision 13
{5908.5052, 6007.2672, 6107.347, 6208.5794, 6311.2622, 6414.5514, 6519.3376, 6625.6952, 6732.5988, 6841.3552, 6950.5972, 7061.3082, 7173.5646, 7287.109, 7401.8216, 7516.4344, 7633.3802, 7751.2962, 7870.3784, 7990.292, 8110.79, 8233.4574, 8356.6036, 8482.2712, 8607.7708, 8735.099, 8863.1858, 8993.4746, 9123.8496, 9255.6794, 9388.5448, 9522.7516, 9657.3106, 9792.6094, 9930.5642, 10068.794, 10206.7256, 10347.81, 10490.3196, 10632.0778, 10775.9916, 10920.4662, 11066.124, 11213.073, 11358.0362, 11508.1006, 11659.1716, 11808.7514, 11959.4884, 12112.1314, 12265.037, 12420.3756, 12578.933, 12734.311, 12890.0006, 13047.2144, 13207.3096, 13368.5144, 13528.024, 13689.847, 13852.7528, 14018.3168, 14180.5372, 14346.9668, 14513.5074, 14677.867, 14846.2186, 15017.4186, 15184.9716, 15356.339, 15529.2972, 15697.3578, 15871.8686, 16042.187, 16216.4094, 16389.4188, 16565.9126, 16742.3272, 16919.0042, 17094.7592, 17273.965, 17451.8342, 17634.4254, 17810.5984, 17988.9242, 18171.051, 18354.7938, 18539.466, 18721.0408, 18904.9972, 19081.867, 19271.9118, 19451.8694, 19637.9816, 19821.2922, 20013.1292, 20199.3858, 20387.8726, 20572.9514, 20770.7764, 20955.1714, 21144.751, 21329.9952, 21520.709, 21712.7016, 21906.3868, 22096.2626, 22286.0524, 22475.051, 22665.5098, 22862.8492, 23055.5294, 23249.6138, 23437.848, 23636.273, 23826.093, 24020.3296, 24213.3896, 24411.7392, 24602.9614, 24805.7952, 24998.1552, 25193.9588, 25389.0166, 25585.8392, 25780.6976, 25981.2728, 26175.977, 26376.5252, 26570.1964, 26773.387, 26962.9812, 27163.0586, 27368.164, 27565.0534, 27758.7428, 27961.1276, 28163.2324, 28362.3816, 28565.7668, 28758.644, 28956.9768, 29163.4722, 29354.7026, 29561.1186, 29767.9948, 29959.9986, 30164.0492, 30366.9818, 30562.5338, 30762.9928, 30976.1592, 31166.274, 31376.722, 31570.3734, 31770.809, 31974.8934, 32179.5286, 32387.5442, 32582.3504, 32794.076, 32989.9528, 33191.842, 33392.4684, 33595.659, 33801.8672, 34000.3414, 34200.0922, 34402.6792, 34610.0638, 34804.0084, 35011.13, 35218.669, 35418.6634, 35619.0792, 35830.6534, 36028.4966, 36229.7902, 36438.6422, 36630.7764, 36833.3102, 37048.6728, 37247.3916, 37453.5904, 37669.3614, 37854.5526, 38059.305, 38268.0936, 38470.2516, 38674.7064, 38876.167, 39068.3794, 39281.9144, 39492.8566, 39684.8628, 39898.4108, 40093.1836, 40297.6858, 40489.7086, 40717.2424,},
// precision 14
{11817.475, 12015.0046, 12215.3792, 12417.7504, 12623.1814, 12830.0086, 13040.0072, 13252.503, 13466.178, 13683.2738, 13902.0344, 14123.9798, 14347.394, 14573.7784, 14802.6894, 15033.6824, 15266.9134, 15502.8624, 15741.4944, 15980.7956, 16223.8916, 16468.6316, 16715.733, 16965.5726, 17217.204, 17470.666, 17727.8516, 17986.7886, 18247.6902, 18510.9632, 18775.304, 19044.7486, 19314.4408, 19587.202, 19862.2576, 20135.924, 20417.0324, 20697.9788, 20979.6112, 21265.0274, 21550.723, 21841.6906, 22132.162, 22428.1406, 22722.127, 23020.5606, 23319.7394, 23620.4014, 23925.2728, 24226.9224, 24535.581, 24845.505, 25155.9618, 25470.3828, 25785.9702, 26103.7764, 26420.4132, 26742.0186, 27062.8852, 27388.415, 27714.6024, 28042.296, 28365.4494, 28701.1526, 29031.8008, 29364.2156, 29704.497, 30037.1458, 30380.111, 30723.8168, 31059.5114, 31404.9498, 31751.6752, 32095.2686, 32444.7792, 32794.767, 33145.204, 33498.4226, 33847.6502, 34209.006, 34560.849, 34919.4838, 35274.9778, 35635.1322, 35996.3266, 36359.1394, 36722.8266, 37082.8516, 37447.7354, 37815.9606, 38191.0692, 38559.4106, 38924.8112, 39294.6726, 39663.973, 40042.261, 40416.2036, 40779.2036, 41161.6436, 41540.9014, 41921.1998, 42294.7698, 42678.5264, 43061.3464, 43432.375, 43818.432, 44198.6598, 44583.0138, 44970.4794, 45353.924, 45729.858, 46118.2224, 46511.5724, 46900.7386, 47280.6964, 47668.1472, 48055.6796, 48446.9436, 48838.7146, 49217.7296, 49613.7796, 50010.7508, 50410.0208, 50793.7886, 51190.2456, 51583.1882, 51971.0796, 52376.5338, 52763.319, 53165.5534, 53556.5594, 53948.2702, 54346.352, 54748.7914, 55138.577, 55543.4824, 55941.1748, 56333.7746, 56745.1552, 57142.7944, 57545.2236, 57935.9956, 58348.5268, 58737.5474, 59158.5962, 59542.6896, 59958.8004, 60349.3788, 60755.0212, 61147.6144, 61548.194, 61946.0696, 62348.6042, 62763.603, 63162.781, 63560.635, 63974.3482, 64366.4908, 64771.5876, 65176.7346, 65597.3916, 65995.915, 66394.0384, 66822.9396, 67203.6336, 67612.2032, 68019.0078, 68420.0388, 68821.22, 69235.8388, 69640.0724, 70055.155, 70466.357, 70863.4266, 71276.2482, 71677.0306, 72080.2006, 72493.0214, 72893.5952, 73314.5856, 73714.9852, 74125.3022, 74521.2122, 74933.6814, 75341.5904, 75743.0244, 76166.0278, 76572.1322, 76973.1028, 77381.6284, 77800.6092, 78189.328, 78607.0962, 79012.2508, 79407.8358, 79825.725, 80238.701, 80646.891, 81035.6436, 81460.0448, 81876.3884,},
// precision 15
{23635.0036, 24030.8034, 24431.4744, 24837.1524, 25246.7928, 25661.326, 26081.3532, 26505.2806, 26933.9892, 27367.7098, 27805.318, 28248.799, 28696.4382, 29148.8244, 29605.5138, 30066.8668, 30534.2344, 31006.32, 31480.778, 31962.2418, 32447.3324, 32938.0232, 33432.731, 33930.728, 34433.9896, 34944.1402, 35457.5588, 35974.5958, 36497.3296, 37021.9096, 37554.326, 38088.0826, 38628.8816, 39171.3192, 39723.2326, 40274.5554, 40832.3142, 41390.613, 41959.5908, 42532.5466, 43102.0344, 43683.5072, 44266.694, 44851.2822, 45440.7862, 46038.0586, 46640.3164, 47241.064, 47846.155, 48454.7396, 49076.9168, 49692.542, 50317.4778, 50939.65, 51572.5596, 52210.2906, 52843.7396, 53481.3996, 54127.236, 54770.406, 55422.6598, 56078.7958, 56736.7174, 57397.6784, 58064.5784, 58730.308, 59404.9784, 60077.0864, 60751.9158, 61444.1386, 62115.817, 62808.7742, 63501.4774, 64187.5454, 64883.6622, 65582.7468, 66274.5318, 66976.9276, 67688.7764, 68402.138, 69109.6274, 69822.9706, 70543.6108, 71265.5202, 71983.3848, 72708.4656, 73433.384, 74158.4664, 74896.4868, 75620.9564, 76362.1434, 77098.3204, 77835.7662, 78582.6114, 79323.9902, 80067.8658, 80814.9246, 81567.0136, 82310.8536, 83061.9952, 83821.4096, 84580.8608, 85335.547, 86092.5802, 86851.6506, 87612.311, 88381.2016, 89146.3296, 89907.8974, 90676.846, 91451.4152, 92224.5518, 92995.8686, 93763.5066, 94551.2796, 95315.1944, 96096.1806, 96881.0918, 97665.679, 98442.68, 99229.3002, 100011.0994, 100790.6386, 101580.1564, 102377.7484, 103152.1392, 103944.2712, 104730.216, 105528.6336, 106324.9398, 107117.6706, 107890.3988, 108695.2266, 109485.238, 110294.7876, 111075.0958, 111878.0496, 112695.2864, 113464.5486, 114270.0474, 115068.608, 115884.3626, 116673.2588, 117483.3716, 118275.097, 119085.4092, 119879.2808, 120687.5868, 121499.9944, 122284.916, 123095.9254, 123912.5038, 124709.0454, 125503.7182, 126323.259, 127138.9412, 127943.8294, 128755.646, 129556.5354, 130375.3298, 131161.4734, 131971.1962, 132787.5458, 133588.1056, 134431.351, 135220.2906, 136023.398, 136846.6558, 137667.0004, 138463.663, 139283.7154, 140074.6146, 140901.3072, 141721.8548, 142543.2322, 143356.1096, 144173.7412, 144973.0948, 145794.3162, 146609.5714, 147420.003, 148237.9784, 149050.5696, 149854.761, 150663.1966, 151494.0754, 152313.1416, 153112.6902, 153935.7206, 154746.9262, 155559.547, 156401.9746, 157228.7036, 158008.7254, 158820.75, 159646.9184, 160470.4458, 161279.5348, 162093.3114, 162918.542, 163729.2842,},
// precision 16
{47271, 48062.3584, 48862.7074, 49673.152, 50492.8416, 51322.9514, 52161.03, 53009.407, 53867.6348, 54734.206, 55610.5144, 56496.2096, 57390.795, 58297.268, 59210.6448, 60134.665, 61068.0248, 62010.4472, 62962.5204, 63923.5742, 64895.0194, 65876.4182, 66862.6136, 67862.6968, 68868.8908, 69882.8544, 70911.271, 71944.0924, 72990.0326, 74040.692, 75100.6336, 76174.7826, 77252.5998, 78340.2974, 79438.2572, 80545.4976, 81657.2796, 82784.6336, 83915.515, 85059.7362, 86205.9368, 87364.4424, 88530.3358, 89707.3744, 90885.9638, 92080.197, 93275.5738, 94479.391, 95695.918, 96919.2236, 98148.4602, 99382.3474, 100625.6974, 101878.0284, 103141.6278, 104409.4588, 105686.2882, 106967.5402, 108261.6032, 109548.1578, 110852.0728, 112162.231, 113479.0072, 114806.2626, 116137.9072, 117469.5048, 118813.5186, 120165.4876, 121516.2556, 122875.766, 124250.5444, 125621.2222, 127003.2352, 128387.848, 129775.2644, 131181.7776, 132577.3086, 133979.9458, 135394.1132, 136800.9078, 138233.217, 139668.5308, 141085.212, 142535.2122, 143969.0684, 145420.2872, 146878.1542, 148332.7572, 149800.3202, 151269.66, 152743.6104, 154213.0948, 155690.288, 157169.4246, 158672.1756, 160160.059, 161650.6854, 163145.7772, 164645.6726, 166159.1952, 167682.1578, 169177.3328, 170700.0118, 172228.8964, 173732.6664, 175265.5556, 176787.799, 178317.111, 179856.6914, 181400.865, 182943.4612, 184486.742, 186033.4698, 187583.7886, 189148.1868, 190688.4526, 192250.1926, 193810.9042, 195354.2972, 196938.7682, 198493.5898, 200079.2824, 201618.912, 203205.5492, 204765.5798, 206356.1124, 207929.3064, 209498.7196, 211086.229, 212675.1324, 214256.7892, 215826.2392, 217412.8474, 218995.6724, 220618.6038, 222207.1166, 223781.0364, 225387.4332, 227005.7928, 228590.4336, 230217.8738, 231805.1054, 233408.9, 234995.3432, 236601.4956, 238190.7904, 239817.2548, 241411.2832, 243002.4066, 244640.1884, 246255.3128, 247849.3508, 249479.9734, 251106.8822, 252705.027, 254332.9242, 255935.129, 257526.9014, 259154.772, 260777.625, 262390.253, 264004.4906, 265643.59, 267255.4076, 268873.426, 270470.7252, 272106.4804, 273722.4456, 275337.794, 276945.7038, 278592.9154, 280204.3726, 281841.1606, 283489.171, 285130.1716, 286735.3362, 288364.7164, 289961.1814, 291595.5524, 293285.683, 294899.6668, 296499.3434, 298128.0462, 299761.8946, 301394.2424, 302997.6748, 304615.1478, 306269.7724, 307886.114, 309543.1028, 311153.2862, 312782.8546, 314421.2008, 316033.2438, 317692.9636, 319305.2648, 320948.7406, 322566.3364, 324228.4224, 325847.1542,},
// precision 17
{94542, 96125.811, 97728.019, 99348.558, 100987.9705, 102646.7565, 104324.5125, 106021.7435, 107736.7865, 109469.272, 111223.9465, 112995.219, 114787.432, 116593.152, 118422.71, 120267.2345, 122134.6765, 124020.937, 125927.2705, 127851.255, 129788.9485, 131751.016, 133726.8225, 135722.592, 137736.789, 139770.568, 141821.518, 143891.343, 145982.1415, 148095.387, 150207.526, 152355.649, 154515.6415, 156696.05, 158887.7575, 161098.159, 163329.852, 165569.053, 167837.4005, 170121.6165, 172420.4595, 174732.6265, 177062.77, 179412.502, 181774.035, 184151.939, 186551.6895, 188965.691, 191402.8095, 193857.949, 196305.0775, 198774.6715, 201271.2585, 203764.78, 206299.3695, 208818.1365, 211373.115, 213946.7465, 216532.076, 219105.541, 221714.5375, 224337.5135, 226977.5125, 229613.0655, 232270.2685, 234952.2065, 237645.3555, 240331.1925, 243034.517, 245756.0725, 248517.6865, 251232.737, 254011.3955, 256785.995, 259556.44, 262368.335, 265156.911, 267965.266, 270785.583, 273616.0495, 276487.4835, 279346.639, 282202.509, 285074.3885, 287942.2855, 290856.018, 293774.0345, 296678.5145, 299603.6355, 302552.6575, 305492.9785, 308466.8605, 311392.581, 314347.538, 317319.4295, 320285.9785, 323301.7325, 326298.3235, 329301.3105, 332301.987, 335309.791, 338370.762, 341382.923, 344431.1265, 347464.1545, 350507.28, 353619.2345, 356631.2005, 359685.203, 362776.7845, 365886.488, 368958.2255, 372060.6825, 375165.4335, 378237.935, 381328.311, 384430.5225, 387576.425, 390683.242, 393839.648, 396977.8425, 400101.9805, 403271.296, 406409.8425, 409529.5485, 412678.7, 415847.423, 419020.8035, 422157.081, 425337.749, 428479.6165, 431700.902, 434893.1915, 438049.582, 441210.5415, 444379.2545, 447577.356, 450741.931, 453959.548, 457137.0935, 460329.846, 463537.4815, 466732.3345, 469960.5615, 473164.681, 476347.6345, 479496.173, 482813.1645, 486025.6995, 489249.4885, 492460.1945, 495675.8805, 498908.0075, 502131.802, 505374.3855, 508550.9915, 511806.7305, 515026.776, 518217.0005, 521523.9855, 524705.9855, 527950.997, 531210.0265, 534472.497, 537750.7315, 540926.922, 544207.094, 547429.4345, 550666.3745, 553975.3475, 557150.7185, 560399.6165, 563662.697, 566916.7395, 570146.1215, 573447.425, 576689.6245, 579874.5745, 583202.337, 586503.0255, 589715.635, 592910.161, 596214.3885, 599488.035, 602740.92, 605983.0685, 609248.67, 612491.3605, 615787.912, 619107.5245, 622307.9555, 625577.333, 628840.4385, 632085.2155, 635317.6135, 638691.7195, 641887.467, 645139.9405, 648441.546, 651666.252, 654941.845,},
// precision 18
{189084, 192250.913, 195456.774, 198696.946, 201977.762, 205294.444, 208651.754, 212042.099, 215472.269, 218941.91, 222443.912, 225996.845, 229568.199, 233193.568, 236844.457, 240543.233, 244279.475, 248044.27, 251854.588, 255693.2, 259583.619, 263494.621, 267445.385, 271454.061, 275468.769, 279549.456, 283646.446, 287788.198, 291966.099, 296181.164, 300431.469, 304718.618, 309024.004, 313393.508, 317760.803, 322209.731, 326675.061, 331160.627, 335654.47, 340241.442, 344841.833, 349467.132, 354130.629, 358819.432, 363574.626, 368296.587, 373118.482, 377914.93, 382782.301, 387680.669, 392601.981, 397544.323, 402529.115, 407546.018, 412593.658, 417638.657, 422762.865, 427886.169, 433017.167, 438213.273, 443441.254, 448692.421, 453937.533, 459239.049, 464529.569, 469910.083, 475274.03, 480684.473, 486070.26, 491515.237, 496995.651, 502476.617, 507973.609, 513497.19, 519083.233, 524726.509, 530305.505, 535945.728, 541584.404, 547274.055, 552967.236, 558667.862, 564360.216, 570128.148, 575965.08, 581701.952, 587532.523, 593361.144, 599246.128, 605033.418, 610958.779, 616837.117, 622772.818, 628672.04, 634675.369, 640574.831, 646585.739, 652574.547, 658611.217, 664642.684, 670713.914, 676737.681, 682797.313, 688837.897, 694917.874, 701009.882, 707173.648, 713257.254, 719415.392, 725636.761, 731710.697, 737906.209, 744103.074, 750313.39, 756504.185, 762712.579, 768876.985, 775167.859, 781359, 787615.959, 793863.597, 800245.477, 806464.582, 812785.294, 819005.925, 825403.057, 831676.197, 837936.284, 844266.968, 850642.711, 856959.756, 863322.774, 869699.931, 876102.478, 882355.787, 888694.463, 895159.952, 901536.143, 907872.631, 914293.672, 920615.14, 927130.974, 933409.404, 939922.178, 946331.47, 952745.93, 959209.264, 965590.224, 972077.284, 978501.961, 984953.19, 991413.271, 997817.479, 1004222.658, 1010725.676, 1017177.138, 1023612.529, 1030098.236, 1036493.719, 1043112.207, 1049537.036, 1056008.096, 1062476.184, 1068942.337, 1075524.95, 1081932.864, 1088426.025, 1094776.005, 1101327.448, 1107901.673, 1114423.639, 1120884.602, 1127324.923, 1133794.24, 1140328.886, 1146849.376, 1153346.682, 1159836.502, 1166478.703, 1172953.304, 1179391.502, 1185950.982, 1192544.052, 1198913.41, 1205430.994, 1212015.525, 1218674.042, 1225121.683, 1231551.101, 1238126.379, 1244673.795, 1251260.649, 1257697.86, 1264320.983, 1270736.319, 1277274.694, 1283804.95, 1290211.514, 1296858.568, 1303455.691,}
};
static final double[][] biasData = {
// precision 4
{10, 9.717, 9.207, 8.7896, 8.2882, 7.8204, 7.3772, 6.9342, 6.5202, 6.161, 5.7722, 5.4636, 5.0396, 4.6766, 4.3566, 4.0454, 3.7936, 3.4856, 3.2666, 2.9946, 2.766, 2.4692, 2.3638, 2.0764, 1.7864, 1.7602, 1.4814, 1.433, 1.2926, 1.0664, 0.999600000000001, 0.7956, 0.5366, 0.589399999999998, 0.573799999999999, 0.269799999999996, 0.368200000000002, 0.0544000000000011, 0.234200000000001, 0.0108000000000033, -0.203400000000002, -0.0701999999999998, -0.129600000000003, -0.364199999999997, -0.480600000000003, -0.226999999999997, -0.322800000000001, -0.382599999999996, -0.511200000000002, -0.669600000000003, -0.749400000000001, -0.500399999999999, -0.617600000000003, -0.6922, -0.601599999999998, -0.416200000000003, -0.338200000000001, -0.782600000000002, -0.648600000000002, -0.919800000000002, -0.851799999999997, -0.962400000000002, -0.6402, -1.1922, -1.0256, -1.086, -1.21899999999999, -0.819400000000002, -0.940600000000003, -1.1554, -1.2072, -1.1752, -1.16759999999999, -1.14019999999999, -1.3754, -1.29859999999999, -1.607, -1.3292, -1.7606,},
// precision 5
{22, 21.1194, 20.8208, 20.2318, 19.77, 19.2436, 18.7774, 18.2848, 17.8224, 17.3742, 16.9336, 16.503, 16.0494, 15.6292, 15.2124, 14.798, 14.367, 13.9728, 13.5944, 13.217, 12.8438, 12.3696, 12.0956, 11.7044, 11.324, 11.0668, 10.6698, 10.3644, 10.049, 9.6918, 9.4146, 9.082, 8.687, 8.5398, 8.2462, 7.857, 7.6606, 7.4168, 7.1248, 6.9222, 6.6804, 6.447, 6.3454, 5.9594, 5.7636, 5.5776, 5.331, 5.19, 4.9676, 4.7564, 4.5314, 4.4442, 4.3708, 3.9774, 3.9624, 3.8796, 3.755, 3.472, 3.2076, 3.1024, 2.8908, 2.7338, 2.7728, 2.629, 2.413, 2.3266, 2.1524, 2.2642, 2.1806, 2.0566, 1.9192, 1.7598, 1.3516, 1.5802, 1.43859999999999, 1.49160000000001, 1.1524, 1.1892, 0.841399999999993, 0.879800000000003, 0.837599999999995, 0.469800000000006, 0.765600000000006, 0.331000000000003, 0.591399999999993, 0.601200000000006, 0.701599999999999, 0.558199999999999, 0.339399999999998, 0.354399999999998, 0.491200000000006, 0.308000000000007, 0.355199999999996, -0.0254000000000048, 0.205200000000005, -0.272999999999996, 0.132199999999997, 0.394400000000005, -0.241200000000006, 0.242000000000004, 0.191400000000002, 0.253799999999998, -0.122399999999999, -0.370800000000003, 0.193200000000004, -0.0848000000000013, 0.0867999999999967, -0.327200000000005, -0.285600000000002, 0.311400000000006, -0.128399999999999, -0.754999999999995, -0.209199999999996, -0.293599999999998, -0.364000000000004, -0.253600000000006, -0.821200000000005, -0.253600000000006, -0.510400000000004, -0.383399999999995, -0.491799999999998, -0.220200000000006, -0.0972000000000008, -0.557400000000001, -0.114599999999996, -0.295000000000002, -0.534800000000004, 0.346399999999988, -0.65379999999999, 0.0398000000000138, 0.0341999999999985, -0.995800000000003, -0.523400000000009, -0.489000000000004, -0.274799999999999, -0.574999999999989, -0.482799999999997, 0.0571999999999946, -0.330600000000004, -0.628800000000012, -0.140199999999993, -0.540600000000012, -0.445999999999998, -0.599400000000003, -0.262599999999992, 0.163399999999996, -0.100599999999986, -0.39500000000001, -1.06960000000001, -0.836399999999998, -0.753199999999993, -0.412399999999991, -0.790400000000005, -0.29679999999999, -0.28540000000001, -0.193000000000012, -0.0772000000000048, -0.962799999999987, -0.414800000000014,},
// precision 6
{45, 44.1902, 43.271, 42.8358, 41.8142, 41.2854, 40.317, 39.354, 38.8924, 37.9436, 37.4596, 36.5262, 35.6248, 35.1574, 34.2822, 33.837, 32.9636, 32.074, 31.7042, 30.7976, 30.4772, 29.6564, 28.7942, 28.5004, 27.686, 27.291, 26.5672, 25.8556, 25.4982, 24.8204, 24.4252, 23.7744, 23.0786, 22.8344, 22.0294, 21.8098, 21.0794, 20.5732, 20.1878, 19.5648, 19.2902, 18.6784, 18.3352, 17.8946, 17.3712, 17.0852, 16.499, 16.2686, 15.6844, 15.2234, 14.9732, 14.3356, 14.2286, 13.7262, 13.3284, 13.1048, 12.5962, 12.3562, 12.1272, 11.4184, 11.4974, 11.0822, 10.856, 10.48, 10.2834, 10.0208, 9.637, 9.51739999999999, 9.05759999999999, 8.74760000000001, 8.42700000000001, 8.1326, 8.2372, 8.2788, 7.6776, 7.79259999999999, 7.1952, 6.9564, 6.6454, 6.87, 6.5428, 6.19999999999999, 6.02940000000001, 5.62780000000001, 5.6782, 5.792, 5.35159999999999, 5.28319999999999, 5.0394, 5.07480000000001, 4.49119999999999, 4.84899999999999, 4.696, 4.54040000000001, 4.07300000000001, 4.37139999999999, 3.7216, 3.7328, 3.42080000000001, 3.41839999999999, 3.94239999999999, 3.27719999999999, 3.411, 3.13079999999999, 2.76900000000001, 2.92580000000001, 2.68279999999999, 2.75020000000001, 2.70599999999999, 2.3886, 3.01859999999999, 2.45179999999999, 2.92699999999999, 2.41720000000001, 2.41139999999999, 2.03299999999999, 2.51240000000001, 2.5564, 2.60079999999999, 2.41720000000001, 1.80439999999999, 1.99700000000001, 2.45480000000001, 1.8948, 2.2346, 2.30860000000001, 2.15479999999999, 1.88419999999999, 1.6508, 0.677199999999999, 1.72540000000001, 1.4752, 1.72280000000001, 1.66139999999999, 1.16759999999999, 1.79300000000001, 1.00059999999999, 0.905200000000008, 0.659999999999997, 1.55879999999999, 1.1636, 0.688199999999995, 0.712600000000009, 0.450199999999995, 1.1978, 0.975599999999986, 0.165400000000005, 1.727, 1.19739999999999, -0.252600000000001, 1.13460000000001, 1.3048, 1.19479999999999, 0.313400000000001, 0.878999999999991, 1.12039999999999, 0.853000000000009, 1.67920000000001, 0.856999999999999, 0.448599999999999, 1.2362, 0.953399999999988, 1.02859999999998, 0.563199999999995, 0.663000000000011, 0.723000000000013, 0.756599999999992, 0.256599999999992, -0.837600000000009, 0.620000000000005, 0.821599999999989, 0.216600000000028, 0.205600000000004, 0.220199999999977, 0.372599999999977, 0.334400000000016, 0.928400000000011, 0.972800000000007, 0.192400000000021, 0.487199999999973, -0.413000000000011, 0.807000000000016, 0.120600000000024, 0.769000000000005, 0.870799999999974, 0.66500000000002, 0.118200000000002, 0.401200000000017, 0.635199999999998, 0.135400000000004, 0.175599999999974, 1.16059999999999, 0.34620000000001, 0.521400000000028, -0.586599999999976, -1.16480000000001, 0.968399999999974, 0.836999999999989, 0.779600000000016, 0.985799999999983,},
// precision 7
{91, 89.4934, 87.9758, 86.4574, 84.9718, 83.4954, 81.5302, 80.0756, 78.6374, 77.1782, 75.7888, 73.9522, 72.592, 71.2532, 69.9086, 68.5938, 66.9474, 65.6796, 64.4394, 63.2176, 61.9768, 60.4214, 59.2528, 58.0102, 56.8658, 55.7278, 54.3044, 53.1316, 52.093, 51.0032, 49.9092, 48.6306, 47.5294, 46.5756, 45.6508, 44.662, 43.552, 42.3724, 41.617, 40.5754, 39.7872, 38.8444, 37.7988, 36.8606, 36.2118, 35.3566, 34.4476, 33.5882, 32.6816, 32.0824, 31.0258, 30.6048, 29.4436, 28.7274, 27.957, 27.147, 26.4364, 25.7592, 25.3386, 24.781, 23.8028, 23.656, 22.6544, 21.996, 21.4718, 21.1544, 20.6098, 19.5956, 19.0616, 18.5758, 18.4878, 17.5244, 17.2146, 16.724, 15.8722, 15.5198, 15.0414, 14.941, 14.9048, 13.87, 13.4304, 13.028, 12.4708, 12.37, 12.0624, 11.4668, 11.5532, 11.4352, 11.2564, 10.2744, 10.2118, 9.74720000000002, 10.1456, 9.2928, 8.75040000000001, 8.55279999999999, 8.97899999999998, 8.21019999999999, 8.18340000000001, 7.3494, 7.32499999999999, 7.66140000000001, 6.90300000000002, 7.25439999999998, 6.9042, 7.21499999999997, 6.28640000000001, 6.08139999999997, 6.6764, 6.30099999999999, 5.13900000000001, 5.65800000000002, 5.17320000000001, 4.59019999999998, 4.9538, 5.08280000000002, 4.92200000000003, 4.99020000000002, 4.7328, 5.4538, 4.11360000000002, 4.22340000000003, 4.08780000000002, 3.70800000000003, 4.15559999999999, 4.18520000000001, 3.63720000000001, 3.68220000000002, 3.77960000000002, 3.6078, 2.49160000000001, 3.13099999999997, 2.5376, 3.19880000000001, 3.21100000000001, 2.4502, 3.52820000000003, 2.91199999999998, 3.04480000000001, 2.7432, 2.85239999999999, 2.79880000000003, 2.78579999999999, 1.88679999999999, 2.98860000000002, 2.50639999999999, 1.91239999999999, 2.66160000000002, 2.46820000000002, 1.58199999999999, 1.30399999999997, 2.27379999999999, 2.68939999999998, 1.32900000000001, 3.10599999999999, 1.69080000000002, 2.13740000000001, 2.53219999999999, 1.88479999999998, 1.33240000000001, 1.45119999999997, 1.17899999999997, 2.44119999999998, 1.60659999999996, 2.16700000000003, 0.77940000000001, 2.37900000000002, 2.06700000000001, 1.46000000000004, 2.91160000000002, 1.69200000000001, 0.954600000000028, 2.49300000000005, 2.2722, 1.33500000000004, 2.44899999999996, 1.20140000000004, 3.07380000000001, 2.09739999999999, 2.85640000000001, 2.29960000000005, 2.40899999999999, 1.97040000000004, 0.809799999999996, 1.65279999999996, 2.59979999999996, 0.95799999999997, 2.06799999999998, 2.32780000000002, 4.20159999999998, 1.96320000000003, 1.86400000000003, 1.42999999999995, 3.77940000000001, 1.27200000000005, 1.86440000000005, 2.20600000000002, 3.21900000000005, 1.5154, 2.61019999999996,},
// precision 8
{183.2152, 180.2454, 177.2096, 173.6652, 170.6312, 167.6822, 164.249, 161.3296, 158.0038, 155.2074, 152.4612, 149.27, 146.5178, 143.4412, 140.8032, 138.1634, 135.1688, 132.6074, 129.6946, 127.2664, 124.8228, 122.0432, 119.6824, 116.9464, 114.6268, 112.2626, 109.8376, 107.4034, 104.8956, 102.8522, 100.7638, 98.3552, 96.3556, 93.7526, 91.9292, 89.8954, 87.8198, 85.7668, 83.298, 81.6688, 79.9466, 77.9746, 76.1672, 74.3474, 72.3028, 70.8912, 69.114, 67.4646, 65.9744, 64.4092, 62.6022, 60.843, 59.5684, 58.1652, 56.5426, 55.4152, 53.5388, 52.3592, 51.1366, 49.486, 48.3918, 46.5076, 45.509, 44.3834, 43.3498, 42.0668, 40.7346, 40.1228, 38.4528, 37.7, 36.644, 36.0518, 34.5774, 33.9068, 32.432, 32.1666, 30.434, 29.6644, 28.4894, 27.6312, 26.3804, 26.292, 25.5496000000001, 25.0234, 24.8206, 22.6146, 22.4188, 22.117, 20.6762, 20.6576, 19.7864, 19.509, 18.5334, 17.9204, 17.772, 16.2924, 16.8654, 15.1836, 15.745, 15.1316, 15.0386, 14.0136, 13.6342, 12.6196, 12.1866, 12.4281999999999, 11.3324, 10.4794000000001, 11.5038, 10.129, 9.52800000000002, 10.3203999999999, 9.46299999999997, 9.79280000000006, 9.12300000000005, 8.74180000000001, 9.2192, 7.51020000000005, 7.60659999999996, 7.01840000000004, 7.22239999999999, 7.40139999999997, 6.76179999999999, 7.14359999999999, 5.65060000000005, 5.63779999999997, 5.76599999999996, 6.75139999999999, 5.57759999999996, 3.73220000000003, 5.8048, 5.63019999999995, 4.93359999999996, 3.47979999999995, 4.33879999999999, 3.98940000000005, 3.81960000000004, 3.31359999999995, 3.23080000000004, 3.4588, 3.08159999999998, 3.4076, 3.00639999999999, 2.38779999999997, 2.61900000000003, 1.99800000000005, 3.34820000000002, 2.95060000000001, 0.990999999999985, 2.11440000000005, 2.20299999999997, 2.82219999999995, 2.73239999999998, 2.7826, 3.76660000000004, 2.26480000000004, 2.31280000000004, 2.40819999999997, 2.75360000000001, 3.33759999999995, 2.71559999999999, 1.7478000000001, 1.42920000000004, 2.39300000000003, 2.22779999999989, 2.34339999999997, 0.87259999999992, 3.88400000000001, 1.80600000000004, 1.91759999999999, 1.16779999999994, 1.50320000000011, 2.52500000000009, 0.226400000000012, 2.31500000000005, 0.930000000000064, 1.25199999999995, 2.14959999999996, 0.0407999999999902, 2.5447999999999, 1.32960000000003, 0.197400000000016, 2.52620000000002, 3.33279999999991, -1.34300000000007, 0.422199999999975, 0.917200000000093, 1.12920000000008, 1.46060000000011, 1.45779999999991, 2.8728000000001, 3.33359999999993, -1.34079999999994, 1.57680000000005, 0.363000000000056, 1.40740000000005, 0.656600000000026, 0.801400000000058, -0.454600000000028, 1.51919999999996,},
// precision 9
{368, 361.8294, 355.2452, 348.6698, 342.1464, 336.2024, 329.8782, 323.6598, 317.462, 311.2826, 305.7102, 299.7416, 293.9366, 288.1046, 282.285, 277.0668, 271.306, 265.8448, 260.301, 254.9886, 250.2422, 244.8138, 239.7074, 234.7428, 229.8402, 225.1664, 220.3534, 215.594, 210.6886, 205.7876, 201.65, 197.228, 192.8036, 188.1666, 184.0818, 180.0824, 176.2574, 172.302, 168.1644, 164.0056, 160.3802, 156.7192, 152.5234, 149.2084, 145.831, 142.485, 139.1112, 135.4764, 131.76, 129.3368, 126.5538, 122.5058, 119.2646, 116.5902, 113.3818, 110.8998, 107.9532, 105.2062, 102.2798, 99.4728, 96.9582, 94.3292, 92.171, 89.7809999999999, 87.5716, 84.7048, 82.5322, 79.875, 78.3972, 75.3464, 73.7274, 71.2834, 70.1444, 68.4263999999999, 66.0166, 64.018, 62.0437999999999, 60.3399999999999, 58.6856, 57.9836, 55.0311999999999, 54.6769999999999, 52.3188, 51.4846, 49.4423999999999, 47.739, 46.1487999999999, 44.9202, 43.4059999999999, 42.5342000000001, 41.2834, 38.8954000000001, 38.3286000000001, 36.2146, 36.6684, 35.9946, 33.123, 33.4338, 31.7378000000001, 29.076, 28.9692, 27.4964, 27.0998, 25.9864, 26.7754, 24.3208, 23.4838, 22.7388000000001, 24.0758000000001, 21.9097999999999, 20.9728, 19.9228000000001, 19.9292, 16.617, 17.05, 18.2996000000001, 15.6128000000001, 15.7392, 14.5174, 13.6322, 12.2583999999999, 13.3766000000001, 11.423, 13.1232, 9.51639999999998, 10.5938000000001, 9.59719999999993, 8.12220000000002, 9.76739999999995, 7.50440000000003, 7.56999999999994, 6.70440000000008, 6.41419999999994, 6.71019999999999, 5.60940000000005, 4.65219999999999, 6.84099999999989, 3.4072000000001, 3.97859999999991, 3.32760000000007, 5.52160000000003, 3.31860000000006, 2.06940000000009, 4.35400000000004, 1.57500000000005, 0.280799999999999, 2.12879999999996, -0.214799999999968, -0.0378000000000611, -0.658200000000079, 0.654800000000023, -0.0697999999999865, 0.858400000000074, -2.52700000000004, -2.1751999999999, -3.35539999999992, -1.04019999999991, -0.651000000000067, -2.14439999999991, -1.96659999999997, -3.97939999999994, -0.604400000000169, -3.08260000000018, -3.39159999999993, -5.29640000000018, -5.38920000000007, -5.08759999999984, -4.69900000000007, -5.23720000000003, -3.15779999999995, -4.97879999999986, -4.89899999999989, -7.48880000000008, -5.94799999999987, -5.68060000000014, -6.67180000000008, -4.70499999999993, -7.27779999999984, -4.6579999999999, -4.4362000000001, -4.32139999999981, -5.18859999999995, -6.66879999999992, -6.48399999999992, -5.1260000000002, -4.4032000000002, -6.13500000000022, -5.80819999999994, -4.16719999999987, -4.15039999999999, -7.45600000000013, -7.24080000000004, -9.83179999999993, -5.80420000000004, -8.6561999999999, -6.99940000000015, -10.5473999999999, -7.34139999999979, -6.80999999999995, -6.29719999999998, -6.23199999999997,},
// precision 10
{737.1256, 724.4234, 711.1064, 698.4732, 685.4636, 673.0644, 660.488, 647.9654, 636.0832, 623.7864, 612.1992, 600.2176, 588.5228, 577.1716, 565.7752, 554.899, 543.6126, 532.6492, 521.9474, 511.5214, 501.1064, 490.6364, 480.2468, 470.4588, 460.3832, 451.0584, 440.8606, 431.3868, 422.5062, 413.1862, 404.463, 395.339, 386.1936, 378.1292, 369.1854, 361.2908, 353.3324, 344.8518, 337.5204, 329.4854, 321.9318, 314.552, 306.4658, 299.4256, 292.849, 286.152, 278.8956, 271.8792, 265.118, 258.62, 252.5132, 245.9322, 239.7726, 233.6086, 227.5332, 222.5918, 216.4294, 210.7662, 205.4106, 199.7338, 194.9012, 188.4486, 183.1556, 178.6338, 173.7312, 169.6264, 163.9526, 159.8742, 155.8326, 151.1966, 147.5594, 143.07, 140.037, 134.1804, 131.071, 127.4884, 124.0848, 120.2944, 117.333, 112.9626, 110.2902, 107.0814, 103.0334, 99.4832000000001, 96.3899999999999, 93.7202000000002, 90.1714000000002, 87.2357999999999, 85.9346, 82.8910000000001, 80.0264000000002, 78.3834000000002, 75.1543999999999, 73.8683999999998, 70.9895999999999, 69.4367999999999, 64.8701999999998, 65.0408000000002, 61.6738, 59.5207999999998, 57.0158000000001, 54.2302, 53.0962, 50.4985999999999, 52.2588000000001, 47.3914, 45.6244000000002, 42.8377999999998, 43.0072, 40.6516000000001, 40.2453999999998, 35.2136, 36.4546, 33.7849999999999, 33.2294000000002, 32.4679999999998, 30.8670000000002, 28.6507999999999, 28.9099999999999, 27.5983999999999, 26.1619999999998, 24.5563999999999, 23.2328000000002, 21.9484000000002, 21.5902000000001, 21.3346000000001, 17.7031999999999, 20.6111999999998, 19.5545999999999, 15.7375999999999, 17.0720000000001, 16.9517999999998, 15.326, 13.1817999999998, 14.6925999999999, 13.0859999999998, 13.2754, 10.8697999999999, 11.248, 7.3768, 4.72339999999986, 7.97899999999981, 8.7503999999999, 7.68119999999999, 9.7199999999998, 7.73919999999998, 5.6224000000002, 7.44560000000001, 6.6601999999998, 5.9058, 4.00199999999995, 4.51699999999983, 4.68240000000014, 3.86220000000003, 5.13639999999987, 5.98500000000013, 2.47719999999981, 2.61999999999989, 1.62800000000016, 4.65000000000009, 0.225599999999758, 0.831000000000131, -0.359400000000278, 1.27599999999984, -2.92559999999958, -0.0303999999996449, 2.37079999999969, -2.0033999999996, 0.804600000000391, 0.30199999999968, 1.1247999999996, -2.6880000000001, 0.0321999999996478, -1.18099999999959, -3.9402, -1.47940000000017, -0.188400000000001, -2.10720000000038, -2.04159999999956, -3.12880000000041, -4.16160000000036, -0.612799999999879, -3.48719999999958, -8.17900000000009, -5.37780000000021, -4.01379999999972, -5.58259999999973, -5.73719999999958, -7.66799999999967, -5.69520000000011, -1.1247999999996, -5.58520000000044, -8.04560000000038, -4.64840000000004, -11.6468000000004, -7.97519999999986, -5.78300000000036, -7.67420000000038, -10.6328000000003, -9.81720000000041,},
// precision 11
{1476, 1449.6014, 1423.5802, 1397.7942, 1372.3042, 1347.2062, 1321.8402, 1297.2292, 1272.9462, 1248.9926, 1225.3026, 1201.4252, 1178.0578, 1155.6092, 1132.626, 1110.5568, 1088.527, 1066.5154, 1045.1874, 1024.3878, 1003.37, 982.1972, 962.5728, 942.1012, 922.9668, 903.292, 884.0772, 864.8578, 846.6562, 828.041, 809.714, 792.3112, 775.1806, 757.9854, 740.656, 724.346, 707.5154, 691.8378, 675.7448, 659.6722, 645.5722, 630.1462, 614.4124, 600.8728, 585.898, 572.408, 558.4926, 544.4938, 531.6776, 517.282, 505.7704, 493.1012, 480.7388, 467.6876, 456.1872, 445.5048, 433.0214, 420.806, 411.409, 400.4144, 389.4294, 379.2286, 369.651, 360.6156, 350.337, 342.083, 332.1538, 322.5094, 315.01, 305.6686, 298.1678, 287.8116, 280.9978, 271.9204, 265.3286, 257.5706, 249.6014, 242.544, 235.5976, 229.583, 220.9438, 214.672, 208.2786, 201.8628, 195.1834, 191.505, 186.1816, 178.5188, 172.2294, 167.8908, 161.0194, 158.052, 151.4588, 148.1596, 143.4344, 138.5238, 133.13, 127.6374, 124.8162, 118.7894, 117.3984, 114.6078, 109.0858, 105.1036, 103.6258, 98.6018000000004, 95.7618000000002, 93.5821999999998, 88.5900000000001, 86.9992000000002, 82.8800000000001, 80.4539999999997, 74.6981999999998, 74.3644000000004, 73.2914000000001, 65.5709999999999, 66.9232000000002, 65.1913999999997, 62.5882000000001, 61.5702000000001, 55.7035999999998, 56.1764000000003, 52.7596000000003, 53.0302000000001, 49.0609999999997, 48.4694, 44.933, 46.0474000000004, 44.7165999999997, 41.9416000000001, 39.9207999999999, 35.6328000000003, 35.5276000000003, 33.1934000000001, 33.2371999999996, 33.3864000000003, 33.9228000000003, 30.2371999999996, 29.1373999999996, 25.2272000000003, 24.2942000000003, 19.8338000000003, 18.9005999999999, 23.0907999999999, 21.8544000000002, 19.5176000000001, 15.4147999999996, 16.9314000000004, 18.6737999999996, 12.9877999999999, 14.3688000000002, 12.0447999999997, 15.5219999999999, 12.5299999999997, 14.5940000000001, 14.3131999999996, 9.45499999999993, 12.9441999999999, 3.91139999999996, 13.1373999999996, 5.44720000000052, 9.82779999999912, 7.87279999999919, 3.67760000000089, 5.46980000000076, 5.55099999999948, 5.65979999999945, 3.89439999999922, 3.1275999999998, 5.65140000000065, 6.3062000000009, 3.90799999999945, 1.87060000000019, 5.17020000000048, 2.46680000000015, 0.770000000000437, -3.72340000000077, 1.16400000000067, 8.05340000000069, 0.135399999999208, 2.15940000000046, 0.766999999999825, 1.0594000000001, 3.15500000000065, -0.287399999999252, 2.37219999999979, -2.86620000000039, -1.63199999999961, -2.22979999999916, -0.15519999999924, -1.46039999999994, -0.262199999999211, -2.34460000000036, -2.8078000000005, -3.22179999999935, -5.60159999999996, -8.42200000000048, -9.43740000000071, 0.161799999999857, -10.4755999999998, -10.0823999999993,},
// precision 12
{2953, 2900.4782, 2848.3568, 2796.3666, 2745.324, 2694.9598, 2644.648, 2595.539, 2546.1474, 2498.2576, 2450.8376, 2403.6076, 2357.451, 2311.38, 2266.4104, 2221.5638, 2176.9676, 2134.193, 2090.838, 2048.8548, 2007.018, 1966.1742, 1925.4482, 1885.1294, 1846.4776, 1807.4044, 1768.8724, 1731.3732, 1693.4304, 1657.5326, 1621.949, 1586.5532, 1551.7256, 1517.6182, 1483.5186, 1450.4528, 1417.865, 1385.7164, 1352.6828, 1322.6708, 1291.8312, 1260.9036, 1231.476, 1201.8652, 1173.6718, 1145.757, 1119.2072, 1092.2828, 1065.0434, 1038.6264, 1014.3192, 988.5746, 965.0816, 940.1176, 917.9796, 894.5576, 871.1858, 849.9144, 827.1142, 805.0818, 783.9664, 763.9096, 742.0816, 724.3962, 706.3454, 688.018, 667.4214, 650.3106, 633.0686, 613.8094, 597.818, 581.4248, 563.834, 547.363, 531.5066, 520.455400000001, 505.583199999999, 488.366, 476.480799999999, 459.7682, 450.0522, 434.328799999999, 423.952799999999, 408.727000000001, 399.079400000001, 387.252200000001, 373.987999999999, 360.852000000001, 351.6394, 339.642, 330.902400000001, 322.661599999999, 311.662200000001, 301.3254, 291.7484, 279.939200000001, 276.7508, 263.215200000001, 254.811400000001, 245.5494, 242.306399999999, 234.8734, 223.787200000001, 217.7156, 212.0196, 200.793, 195.9748, 189.0702, 182.449199999999, 177.2772, 170.2336, 164.741, 158.613600000001, 155.311, 147.5964, 142.837, 137.3724, 132.0162, 130.0424, 121.9804, 120.451800000001, 114.8968, 111.585999999999, 105.933199999999, 101.705, 98.5141999999996, 95.0488000000005, 89.7880000000005, 91.4750000000004, 83.7764000000006, 80.9698000000008, 72.8574000000008, 73.1615999999995, 67.5838000000003, 62.6263999999992, 63.2638000000006, 66.0977999999996, 52.0843999999997, 58.9956000000002, 47.0912000000008, 46.4956000000002, 48.4383999999991, 47.1082000000006, 43.2392, 37.2759999999998, 40.0283999999992, 35.1864000000005, 35.8595999999998, 32.0998, 28.027, 23.6694000000007, 33.8266000000003, 26.3736000000008, 27.2008000000005, 21.3245999999999, 26.4115999999995, 23.4521999999997, 19.5013999999992, 19.8513999999996, 10.7492000000002, 18.6424000000006, 13.1265999999996, 18.2436000000016, 6.71860000000015, 3.39459999999963, 6.33759999999893, 7.76719999999841, 0.813999999998487, 3.82819999999992, 0.826199999999517, 8.07440000000133, -1.59080000000176, 5.01780000000144, 0.455399999998917, -0.24199999999837, 0.174800000000687, -9.07640000000174, -4.20160000000033, -3.77520000000004, -4.75179999999818, -5.3724000000002, -8.90680000000066, -6.10239999999976, -5.74120000000039, -9.95339999999851, -3.86339999999836, -13.7304000000004, -16.2710000000006, -7.51359999999841, -3.30679999999847, -13.1339999999982, -10.0551999999989, -6.72019999999975, -8.59660000000076, -10.9307999999983, -1.8775999999998, -4.82259999999951, -13.7788, -21.6470000000008, -10.6735999999983, -15.7799999999988,},
// precision 13
{5907.5052, 5802.2672, 5697.347, 5593.5794, 5491.2622, 5390.5514, 5290.3376, 5191.6952, 5093.5988, 4997.3552, 4902.5972, 4808.3082, 4715.5646, 4624.109, 4533.8216, 4444.4344, 4356.3802, 4269.2962, 4183.3784, 4098.292, 4014.79, 3932.4574, 3850.6036, 3771.2712, 3691.7708, 3615.099, 3538.1858, 3463.4746, 3388.8496, 3315.6794, 3244.5448, 3173.7516, 3103.3106, 3033.6094, 2966.5642, 2900.794, 2833.7256, 2769.81, 2707.3196, 2644.0778, 2583.9916, 2523.4662, 2464.124, 2406.073, 2347.0362, 2292.1006, 2238.1716, 2182.7514, 2128.4884, 2077.1314, 2025.037, 1975.3756, 1928.933, 1879.311, 1831.0006, 1783.2144, 1738.3096, 1694.5144, 1649.024, 1606.847, 1564.7528, 1525.3168, 1482.5372, 1443.9668, 1406.5074, 1365.867, 1329.2186, 1295.4186, 1257.9716, 1225.339, 1193.2972, 1156.3578, 1125.8686, 1091.187, 1061.4094, 1029.4188, 1000.9126, 972.3272, 944.004199999999, 915.7592, 889.965, 862.834200000001, 840.4254, 812.598399999999, 785.924200000001, 763.050999999999, 741.793799999999, 721.466, 699.040799999999, 677.997200000002, 649.866999999998, 634.911800000002, 609.8694, 591.981599999999, 570.2922, 557.129199999999, 538.3858, 521.872599999999, 502.951400000002, 495.776399999999, 475.171399999999, 459.751, 439.995200000001, 426.708999999999, 413.7016, 402.3868, 387.262599999998, 372.0524, 357.050999999999, 342.5098, 334.849200000001, 322.529399999999, 311.613799999999, 295.848000000002, 289.273000000001, 274.093000000001, 263.329600000001, 251.389599999999, 245.7392, 231.9614, 229.7952, 217.155200000001, 208.9588, 199.016599999999, 190.839199999999, 180.6976, 176.272799999999, 166.976999999999, 162.5252, 151.196400000001, 149.386999999999, 133.981199999998, 130.0586, 130.164000000001, 122.053400000001, 110.7428, 108.1276, 106.232400000001, 100.381600000001, 98.7668000000012, 86.6440000000002, 79.9768000000004, 82.4722000000002, 68.7026000000005, 70.1186000000016, 71.9948000000004, 58.998599999999, 59.0492000000013, 56.9818000000014, 47.5338000000011, 42.9928, 51.1591999999982, 37.2740000000013, 42.7220000000016, 31.3734000000004, 26.8090000000011, 25.8934000000008, 26.5286000000015, 29.5442000000003, 19.3503999999994, 26.0760000000009, 17.9527999999991, 14.8419999999969, 10.4683999999979, 8.65899999999965, 9.86720000000059, 4.34139999999752, -0.907800000000861, -3.32080000000133, -0.936199999996461, -11.9916000000012, -8.87000000000262, -6.33099999999831, -11.3366000000024, -15.9207999999999, -9.34659999999712, -15.5034000000014, -19.2097999999969, -15.357799999998, -28.2235999999975, -30.6898000000001, -19.3271999999997, -25.6083999999973, -24.409599999999, -13.6385999999984, -33.4473999999973, -32.6949999999997, -28.9063999999998, -31.7483999999968, -32.2935999999972, -35.8329999999987, -47.620600000002, -39.0855999999985, -33.1434000000008, -46.1371999999974, -37.5892000000022, -46.8164000000033, -47.3142000000007, -60.2914000000019, -37.7575999999972,},
// precision 14
{11816.475, 11605.0046, 11395.3792, 11188.7504, 10984.1814, 10782.0086, 10582.0072, 10384.503, 10189.178, 9996.2738, 9806.0344, 9617.9798, 9431.394, 9248.7784, 9067.6894, 8889.6824, 8712.9134, 8538.8624, 8368.4944, 8197.7956, 8031.8916, 7866.6316, 7703.733, 7544.5726, 7386.204, 7230.666, 7077.8516, 6926.7886, 6778.6902, 6631.9632, 6487.304, 6346.7486, 6206.4408, 6070.202, 5935.2576, 5799.924, 5671.0324, 5541.9788, 5414.6112, 5290.0274, 5166.723, 5047.6906, 4929.162, 4815.1406, 4699.127, 4588.5606, 4477.7394, 4369.4014, 4264.2728, 4155.9224, 4055.581, 3955.505, 3856.9618, 3761.3828, 3666.9702, 3575.7764, 3482.4132, 3395.0186, 3305.8852, 3221.415, 3138.6024, 3056.296, 2970.4494, 2896.1526, 2816.8008, 2740.2156, 2670.497, 2594.1458, 2527.111, 2460.8168, 2387.5114, 2322.9498, 2260.6752, 2194.2686, 2133.7792, 2074.767, 2015.204, 1959.4226, 1898.6502, 1850.006, 1792.849, 1741.4838, 1687.9778, 1638.1322, 1589.3266, 1543.1394, 1496.8266, 1447.8516, 1402.7354, 1361.9606, 1327.0692, 1285.4106, 1241.8112, 1201.6726, 1161.973, 1130.261, 1094.2036, 1048.2036, 1020.6436, 990.901400000002, 961.199800000002, 924.769800000002, 899.526400000002, 872.346400000002, 834.375, 810.432000000001, 780.659800000001, 756.013800000001, 733.479399999997, 707.923999999999, 673.858, 652.222399999999, 636.572399999997, 615.738599999997, 586.696400000001, 564.147199999999, 541.679600000003, 523.943599999999, 505.714599999999, 475.729599999999, 461.779600000002, 449.750800000002, 439.020799999998, 412.7886, 400.245600000002, 383.188199999997, 362.079599999997, 357.533799999997, 334.319000000003, 327.553399999997, 308.559399999998, 291.270199999999, 279.351999999999, 271.791400000002, 252.576999999997, 247.482400000001, 236.174800000001, 218.774599999997, 220.155200000001, 208.794399999999, 201.223599999998, 182.995600000002, 185.5268, 164.547400000003, 176.5962, 150.689599999998, 157.8004, 138.378799999999, 134.021200000003, 117.614399999999, 108.194000000003, 97.0696000000025, 89.6042000000016, 95.6030000000028, 84.7810000000027, 72.635000000002, 77.3482000000004, 59.4907999999996, 55.5875999999989, 50.7346000000034, 61.3916000000027, 50.9149999999936, 39.0384000000049, 58.9395999999979, 29.633600000001, 28.2032000000036, 26.0078000000067, 17.0387999999948, 9.22000000000116, 13.8387999999977, 8.07240000000456, 14.1549999999988, 15.3570000000036, 3.42660000000615, 6.24820000000182, -2.96940000000177, -8.79940000000352, -5.97860000000219, -14.4048000000039, -3.4143999999942, -13.0148000000045, -11.6977999999945, -25.7878000000055, -22.3185999999987, -24.409599999999, -31.9756000000052, -18.9722000000038, -22.8678000000073, -30.8972000000067, -32.3715999999986, -22.3907999999938, -43.6720000000059, -35.9038, -39.7492000000057, -54.1641999999993, -45.2749999999942, -42.2989999999991, -44.1089999999967, -64.3564000000042, -49.9551999999967, -42.6116000000038,},
// precision 15
{23634.0036, 23210.8034, 22792.4744, 22379.1524, 21969.7928, 21565.326, 21165.3532, 20770.2806, 20379.9892, 19994.7098, 19613.318, 19236.799, 18865.4382, 18498.8244, 18136.5138, 17778.8668, 17426.2344, 17079.32, 16734.778, 16397.2418, 16063.3324, 15734.0232, 15409.731, 15088.728, 14772.9896, 14464.1402, 14157.5588, 13855.5958, 13559.3296, 13264.9096, 12978.326, 12692.0826, 12413.8816, 12137.3192, 11870.2326, 11602.5554, 11340.3142, 11079.613, 10829.5908, 10583.5466, 10334.0344, 10095.5072, 9859.694, 9625.2822, 9395.7862, 9174.0586, 8957.3164, 8738.064, 8524.155, 8313.7396, 8116.9168, 7913.542, 7718.4778, 7521.65, 7335.5596, 7154.2906, 6968.7396, 6786.3996, 6613.236, 6437.406, 6270.6598, 6107.7958, 5945.7174, 5787.6784, 5635.5784, 5482.308, 5337.9784, 5190.0864, 5045.9158, 4919.1386, 4771.817, 4645.7742, 4518.4774, 4385.5454, 4262.6622, 4142.74679999999, 4015.5318, 3897.9276, 3790.7764, 3685.13800000001, 3573.6274, 3467.9706, 3368.61079999999, 3271.5202, 3170.3848, 3076.4656, 2982.38400000001, 2888.4664, 2806.4868, 2711.9564, 2634.1434, 2551.3204, 2469.7662, 2396.61139999999, 2318.9902, 2243.8658, 2171.9246, 2105.01360000001, 2028.8536, 1960.9952, 1901.4096, 1841.86079999999, 1777.54700000001, 1714.5802, 1654.65059999999, 1596.311, 1546.2016, 1492.3296, 1433.8974, 1383.84600000001, 1339.4152, 1293.5518, 1245.8686, 1193.50659999999, 1162.27959999999, 1107.19439999999, 1069.18060000001, 1035.09179999999, 999.679000000004, 957.679999999993, 925.300199999998, 888.099400000006, 848.638600000006, 818.156400000007, 796.748399999997, 752.139200000005, 725.271200000003, 692.216, 671.633600000001, 647.939799999993, 621.670599999998, 575.398799999995, 561.226599999995, 532.237999999998, 521.787599999996, 483.095799999996, 467.049599999998, 465.286399999997, 415.548599999995, 401.047399999996, 380.607999999993, 377.362599999993, 347.258799999996, 338.371599999999, 310.096999999994, 301.409199999995, 276.280799999993, 265.586800000005, 258.994399999996, 223.915999999997, 215.925399999993, 213.503800000006, 191.045400000003, 166.718200000003, 166.259000000005, 162.941200000001, 148.829400000002, 141.645999999993, 123.535399999993, 122.329800000007, 89.473399999988, 80.1962000000058, 77.5457999999926, 59.1056000000099, 83.3509999999951, 52.2906000000075, 36.3979999999865, 40.6558000000077, 42.0003999999899, 19.6630000000005, 19.7153999999864, -8.38539999999921, -0.692799999989802, 0.854800000000978, 3.23219999999856, -3.89040000000386, -5.25880000001052, -24.9052000000083, -22.6837999999989, -26.4286000000138, -34.997000000003, -37.0216000000073, -43.430400000012, -58.2390000000014, -68.8034000000043, -56.9245999999985, -57.8583999999973, -77.3097999999882, -73.2793999999994, -81.0738000000129, -87.4530000000086, -65.0254000000132, -57.296399999992, -96.2746000000043, -103.25, -96.081600000005, -91.5542000000132, -102.465200000006, -107.688599999994, -101.458000000013, -109.715800000005,},
// precision 16
{47270, 46423.3584, 45585.7074, 44757.152, 43938.8416, 43130.9514, 42330.03, 41540.407, 40759.6348, 39988.206, 39226.5144, 38473.2096, 37729.795, 36997.268, 36272.6448, 35558.665, 34853.0248, 34157.4472, 33470.5204, 32793.5742, 32127.0194, 31469.4182, 30817.6136, 30178.6968, 29546.8908, 28922.8544, 28312.271, 27707.0924, 27114.0326, 26526.692, 25948.6336, 25383.7826, 24823.5998, 24272.2974, 23732.2572, 23201.4976, 22674.2796, 22163.6336, 21656.515, 21161.7362, 20669.9368, 20189.4424, 19717.3358, 19256.3744, 18795.9638, 18352.197, 17908.5738, 17474.391, 17052.918, 16637.2236, 16228.4602, 15823.3474, 15428.6974, 15043.0284, 14667.6278, 14297.4588, 13935.2882, 13578.5402, 13234.6032, 12882.1578, 12548.0728, 12219.231, 11898.0072, 11587.2626, 11279.9072, 10973.5048, 10678.5186, 10392.4876, 10105.2556, 9825.766, 9562.5444, 9294.2222, 9038.2352, 8784.848, 8533.2644, 8301.7776, 8058.30859999999, 7822.94579999999, 7599.11319999999, 7366.90779999999, 7161.217, 6957.53080000001, 6736.212, 6548.21220000001, 6343.06839999999, 6156.28719999999, 5975.15419999999, 5791.75719999999, 5621.32019999999, 5451.66, 5287.61040000001, 5118.09479999999, 4957.288, 4798.4246, 4662.17559999999, 4512.05900000001, 4364.68539999999, 4220.77720000001, 4082.67259999999, 3957.19519999999, 3842.15779999999, 3699.3328, 3583.01180000001, 3473.8964, 3338.66639999999, 3233.55559999999, 3117.799, 3008.111, 2909.69140000001, 2814.86499999999, 2719.46119999999, 2624.742, 2532.46979999999, 2444.7886, 2370.1868, 2272.45259999999, 2196.19260000001, 2117.90419999999, 2023.2972, 1969.76819999999, 1885.58979999999, 1833.2824, 1733.91200000001, 1682.54920000001, 1604.57980000001, 1556.11240000001, 1491.3064, 1421.71960000001, 1371.22899999999, 1322.1324, 1264.7892, 1196.23920000001, 1143.8474, 1088.67240000001, 1073.60380000001, 1023.11660000001, 959.036400000012, 927.433199999999, 906.792799999996, 853.433599999989, 841.873800000001, 791.1054, 756.899999999994, 704.343200000003, 672.495599999995, 622.790399999998, 611.254799999995, 567.283200000005, 519.406599999988, 519.188400000014, 495.312800000014, 451.350799999986, 443.973399999988, 431.882199999993, 392.027000000002, 380.924200000009, 345.128999999986, 298.901400000002, 287.771999999997, 272.625, 247.253000000026, 222.490600000019, 223.590000000026, 196.407599999977, 176.425999999978, 134.725199999986, 132.4804, 110.445599999977, 86.7939999999944, 56.7038000000175, 64.915399999998, 38.3726000000024, 37.1606000000029, 46.170999999973, 49.1716000000015, 15.3362000000197, 6.71639999997569, -34.8185999999987, -39.4476000000141, 12.6830000000191, -12.3331999999937, -50.6565999999875, -59.9538000000175, -65.1054000000004, -70.7576000000117, -106.325200000021, -126.852200000023, -110.227599999984, -132.885999999999, -113.897200000007, -142.713800000027, -151.145399999979, -150.799200000009, -177.756200000003, -156.036399999983, -182.735199999996, -177.259399999981, -198.663600000029, -174.577600000019, -193.84580000001,},
// precision 17
{94541, 92848.811, 91174.019, 89517.558, 87879.9705, 86262.7565, 84663.5125, 83083.7435, 81521.7865, 79977.272, 78455.9465, 76950.219, 75465.432, 73994.152, 72546.71, 71115.2345, 69705.6765, 68314.937, 66944.2705, 65591.255, 64252.9485, 62938.016, 61636.8225, 60355.592, 59092.789, 57850.568, 56624.518, 55417.343, 54231.1415, 53067.387, 51903.526, 50774.649, 49657.6415, 48561.05, 47475.7575, 46410.159, 45364.852, 44327.053, 43318.4005, 42325.6165, 41348.4595, 40383.6265, 39436.77, 38509.502, 37594.035, 36695.939, 35818.6895, 34955.691, 34115.8095, 33293.949, 32465.0775, 31657.6715, 30877.2585, 30093.78, 29351.3695, 28594.1365, 27872.115, 27168.7465, 26477.076, 25774.541, 25106.5375, 24452.5135, 23815.5125, 23174.0655, 22555.2685, 21960.2065, 21376.3555, 20785.1925, 20211.517, 19657.0725, 19141.6865, 18579.737, 18081.3955, 17578.995, 17073.44, 16608.335, 16119.911, 15651.266, 15194.583, 14749.0495, 14343.4835, 13925.639, 13504.509, 13099.3885, 12691.2855, 12328.018, 11969.0345, 11596.5145, 11245.6355, 10917.6575, 10580.9785, 10277.8605, 9926.58100000001, 9605.538, 9300.42950000003, 8989.97850000003, 8728.73249999998, 8448.3235, 8175.31050000002, 7898.98700000002, 7629.79100000003, 7413.76199999999, 7149.92300000001, 6921.12650000001, 6677.1545, 6443.28000000003, 6278.23450000002, 6014.20049999998, 5791.20299999998, 5605.78450000001, 5438.48800000001, 5234.2255, 5059.6825, 4887.43349999998, 4682.935, 4496.31099999999, 4322.52250000002, 4191.42499999999, 4021.24200000003, 3900.64799999999, 3762.84250000003, 3609.98050000001, 3502.29599999997, 3363.84250000003, 3206.54849999998, 3079.70000000001, 2971.42300000001, 2867.80349999998, 2727.08100000001, 2630.74900000001, 2496.6165, 2440.902, 2356.19150000002, 2235.58199999999, 2120.54149999999, 2012.25449999998, 1933.35600000003, 1820.93099999998, 1761.54800000001, 1663.09350000002, 1578.84600000002, 1509.48149999999, 1427.3345, 1379.56150000001, 1306.68099999998, 1212.63449999999, 1084.17300000001, 1124.16450000001, 1060.69949999999, 1007.48849999998, 941.194499999983, 879.880500000028, 836.007500000007, 782.802000000025, 748.385499999975, 647.991500000004, 626.730500000005, 570.776000000013, 484.000500000024, 513.98550000001, 418.985499999952, 386.996999999974, 370.026500000036, 355.496999999974, 356.731499999994, 255.92200000002, 259.094000000041, 205.434499999974, 165.374500000034, 197.347500000033, 95.718499999959, 67.6165000000037, 54.6970000000438, 31.7395000000251, -15.8784999999916, 8.42500000004657, -26.3754999999655, -118.425500000012, -66.6629999999423, -42.9745000000112, -107.364999999991, -189.839000000036, -162.611499999999, -164.964999999967, -189.079999999958, -223.931499999948, -235.329999999958, -269.639500000048, -249.087999999989, -206.475499999942, -283.04449999996, -290.667000000016, -304.561499999953, -336.784499999951, -380.386500000022, -283.280499999993, -364.533000000054, -389.059499999974, -364.454000000027, -415.748000000021, -417.155000000028,},
// precision 18
{189083, 185696.913, 182348.774, 179035.946, 175762.762, 172526.444, 169329.754, 166166.099, 163043.269, 159958.91, 156907.912, 153906.845, 150924.199, 147996.568, 145093.457, 142239.233, 139421.475, 136632.27, 133889.588, 131174.2, 128511.619, 125868.621, 123265.385, 120721.061, 118181.769, 115709.456, 113252.446, 110840.198, 108465.099, 106126.164, 103823.469, 101556.618, 99308.004, 97124.508, 94937.803, 92833.731, 90745.061, 88677.627, 86617.47, 84650.442, 82697.833, 80769.132, 78879.629, 77014.432, 75215.626, 73384.587, 71652.482, 69895.93, 68209.301, 66553.669, 64921.981, 63310.323, 61742.115, 60205.018, 58698.658, 57190.657, 55760.865, 54331.169, 52908.167, 51550.273, 50225.254, 48922.421, 47614.533, 46362.049, 45098.569, 43926.083, 42736.03, 41593.473, 40425.26, 39316.237, 38243.651, 37170.617, 36114.609, 35084.19, 34117.233, 33206.509, 32231.505, 31318.728, 30403.404, 29540.0550000001, 28679.236, 27825.862, 26965.216, 26179.148, 25462.08, 24645.952, 23922.523, 23198.144, 22529.128, 21762.4179999999, 21134.779, 20459.117, 19840.818, 19187.04, 18636.3689999999, 17982.831, 17439.7389999999, 16874.547, 16358.2169999999, 15835.684, 15352.914, 14823.681, 14329.313, 13816.897, 13342.874, 12880.882, 12491.648, 12021.254, 11625.392, 11293.7610000001, 10813.697, 10456.209, 10099.074, 9755.39000000001, 9393.18500000006, 9047.57900000003, 8657.98499999999, 8395.85900000005, 8033, 7736.95900000003, 7430.59699999995, 7258.47699999996, 6924.58200000005, 6691.29399999999, 6357.92500000005, 6202.05700000003, 5921.19700000004, 5628.28399999999, 5404.96799999999, 5226.71100000001, 4990.75600000005, 4799.77399999998, 4622.93099999998, 4472.478, 4171.78700000001, 3957.46299999999, 3868.95200000005, 3691.14300000004, 3474.63100000005, 3341.67200000002, 3109.14000000001, 3071.97400000005, 2796.40399999998, 2756.17799999996, 2611.46999999997, 2471.93000000005, 2382.26399999997, 2209.22400000005, 2142.28399999999, 2013.96100000001, 1911.18999999994, 1818.27099999995, 1668.47900000005, 1519.65800000005, 1469.67599999998, 1367.13800000004, 1248.52899999998, 1181.23600000003, 1022.71900000004, 1088.20700000005, 959.03600000008, 876.095999999903, 791.183999999892, 703.337000000058, 731.949999999953, 586.86400000006, 526.024999999907, 323.004999999888, 320.448000000091, 340.672999999952, 309.638999999966, 216.601999999955, 102.922999999952, 19.2399999999907, -0.114000000059605, -32.6240000000689, -89.3179999999702, -153.497999999905, -64.2970000000205, -143.695999999996, -259.497999999905, -253.017999999924, -213.948000000091, -397.590000000084, -434.006000000052, -403.475000000093, -297.958000000101, -404.317000000039, -528.898999999976, -506.621000000043, -513.205000000075, -479.351000000024, -596.139999999898, -527.016999999993, -664.681000000099, -680.306000000099, -704.050000000047, -850.486000000034, -757.43200000003, -713.308999999892,}
};
Format format;
private RegisterSet registerSet;
private final int m;
private final int p;
//Sparse versions of m and p
private int sm;
private int sp;
private final double alphaMM;
//How big the sparse set is allowed to get before we convert to 'normal'
private int sparseSetThreshold;
private int[] tmpSet;
private int tmpIndex = 0;
private int[] sparseSet;
/**
* This constructor disables the sparse set. If the counter is likely to exceed
* the sparse set thresholds than using this constructor will help avoid the
* extra memory pressure created by maintaining the sparse set until that threshold is
* breached.
*
* @param p - the precision value for the normal set
*/
public HyperLogLogPlus(int p) {
this(p, 0);
}
/**
* Basic constructor for creating a instance that supports sparse and normal
* representations. The values of {@code p} and
* {@code sp} define the precision of the Normal and Sparse set
* representations for the data structure. {@code p} must be a value
* between 4 and {@code sp} and {@code sp} must be less than 32.
*
* @param p - the precision value for the normal set
* @param sp - the precision value for the sparse set
*/
public HyperLogLogPlus(int p, int sp) {
this(p, sp, null, null);
}
/**
* Constructor to support instances serialized with the legacy sparse
* encoding scheme.
*
* @param p - the precision value for the normal set
* @param sp - the precision value for the sparse set
* @param deltaByteSet - a list of varint byte arrays encoded using a delta encoding scheme
*/
public HyperLogLogPlus(int p, int sp, List deltaByteSet) {
this(p, sp);
sparseSet = new int[deltaByteSet.size()];
int previousValue = 0;
for (int i = 0; i < deltaByteSet.size(); i++) {
int nextValue = Varint.readUnsignedVarInt(deltaByteSet.get(i));
sparseSet[i] = nextValue + previousValue;
previousValue = sparseSet[i];
}
}
// for constructing a sparse mode hllp
private HyperLogLogPlus(int p, int sp, int[] sparseSet) {
this(p, sp, sparseSet, null);
}
// for constructing a normal mode hllp
private HyperLogLogPlus(int p, int sp, RegisterSet registerSet) {
this(p, sp, null, registerSet);
}
private HyperLogLogPlus(int p, int sp, int[] sparseSet, RegisterSet registerSet) {
if ((p < 4) || ((p > sp) && (sp != 0))) {
throw new IllegalArgumentException("p must be between 4 and sp (inclusive)");
}
if (sp > 32) {
throw new IllegalArgumentException("sp values greater than 32 not supported");
}
this.p = p;
m = p > 30 ? Integer.MAX_VALUE : 1 << p;
format = Format.NORMAL;
this.registerSet = registerSet;
if (registerSet == null) {
if (sp > 0) // Use sparse representation
{
format = Format.SPARSE;
this.sp = sp;
sm = sp > 30 ? Integer.MAX_VALUE : 1 << sp;
if (sparseSet == null) {
this.sparseSet = EMPTY_SPARSE;
} else {
this.sparseSet = sparseSet;
}
sparseSetThreshold = (int) (m * 0.75);
} else {
this.registerSet = new RegisterSet(m);
}
}
this.alphaMM = HyperLogLog.getAlphaMM(p, m);
}
/**
* Package-protected for testing purposes.
*/
int getM() {
return m;
}
/**
* Package-protected for testing purposes.
*/
int getSm() {
return sm;
}
@Override public boolean equals(Object obj) {
if (!(obj instanceof HyperLogLogPlus)) {
return false;
}
HyperLogLogPlus other = (HyperLogLogPlus) obj;
if (format == Format.SPARSE) {
mergeTempList();
}
if (other.format == Format.SPARSE) {
other.mergeTempList();
}
if (other.format != format) {
return false;
}
if (format == Format.NORMAL) {
return Arrays.equals(registerSet.readOnlyBits(), other.registerSet.readOnlyBits());
} else {
return Arrays.equals(sparseSet, other.sparseSet);
}
}
@Override public int hashCode() {
if (format == Format.SPARSE) {
mergeTempList();
}
if (format == Format.NORMAL) {
return Arrays.hashCode(registerSet.readOnlyBits());
} else {
return Arrays.hashCode(sparseSet);
}
}
@Override
public boolean offerHashed(long hashedLong) {
switch (format) {
case NORMAL:
// find first p bits of x
final long idx = hashedLong >>> (64 - p);
//Ignore the first p bits (the idx), and then find the number of leading zeros
//Push a 1 to where the bit string would have ended if we didnt just push the idx out of the way
//A one is always added to runLength for estimation calculation purposes
final int runLength = Long.numberOfLeadingZeros((hashedLong << this.p) | (long) (1 << (this.p - 1))) + 1;
return registerSet.updateIfGreater((int) idx, runLength);
case SPARSE:
//Call the sparse encoding scheme which attempts to stuff as much helpful data into 32 bits as possible
int k = encodeHash(hashedLong, p, sp);
if (tmpSet == null) {
tmpSet = new int[INITIAL_TEMP_SET_CAPACITY];
}
//Put the encoded data into the temp set
tmpSet[tmpIndex++] = k;
if (tmpIndex >= tmpSet.length) {
mergeTempList();
}
return true;
}
return false;
}
@Override
public boolean offerHashed(int hashedInt) {
throw new UnsupportedOperationException();
}
@Override
public boolean offer(Object o) {
long x = MurmurHash.hash64(o);
return offerHashed(x);
}
/**
* Converts the mode of this estimator from 'sparse' to 'normal'.
*
* Each member of the set has its longer 'sparse precision (sp)' length idx
* truncated to length p and the associated run length is placed into a register.
* Collisions are resolved by merely taking the max.
*/
private void convertToNormal() {
this.registerSet = new RegisterSet(1 << p);
for (int k : sparseSet) {
int idx = getIndex(k, p);
int r = decodeRunLength(k);
registerSet.updateIfGreater(idx, r);
}
format = Format.NORMAL;
tmpSet = null;
sparseSet = null;
}
/**
* Encode the sp length idx and, if necessary, the run length.
*
* Start with the 64 bit hash as x.
*
* Find all the bits that belong in the first sp (roughly 25) bits. (This is idx')
* Get rid of the first p (roughly 18) bits of those. (Those were idx (not prime))
*
* If all the remaining bits are zero then we are going to need to find and encode the
* full run length of leading zeros, but this only happens once in 2 ^ (sp - p) or roughly
* 2 ^ 7 times.
*
* If at least one of them is not zero, then since the run length is determined by bits
* after p and the idx' contains the first (sp - p) bits of those, then just by putting idx'
* in the encoding, we will also be giving it all the information it needs to find the run length.
*
* The relationship looks like this:
*
* ******************************************************* <- hashed length of bits
* | p bits = idx || look for leading zeros here |
* | sp bits = idx' |
* | all 0s? |
*
* If we have idx', we could theoretically scan it (as per zeroTest) when unencoding and therefore know whether
* to look for the extra run length information. However, for now we have followed the authors of
* the paper and put this information in a flag bit at the end of the encoding.
*
* Based on this flag, we know whether to adjust for the missing run length bits. We could also
* use this flag to compress all the zeros in "| all 0s? |", but saving a byte or so once in 128
* times is less than the 120 bits spent on the flag elsewhere. Of course, after compression, the losses
* are not quite so large either way.
*
* The encoding scheme now looks like:
*
* ********************************* <- smaller length of bits (half, but not to scale)
* | empty || sp bits ||F|
* | p bits || has 1 ||0|
*
*
* or if the run length was needed (ie 'all 0s?' was indeed all 0s):
*
* *********************************
* | sp bits || run len ||F|
* | p bits || 0s | |1|
*
*
* The other notable encoding feature is the inversion of the run length, which just lets the lists
* be sorted in a convenient way. (Could alternatively sort in reverse, and use descending deltas).
*
* @param x the hash bits
* @param p the 'normal' mode precision
* @param sp the 'sparse' mode precision
* @return the encoded data as an integer
*/
private static int encodeHash(long x, int p, int sp) {
//Get the idx' (the first sp bits) by pushing the rest to the right (into oblivion >:D)
int idx = (int) (x >>> (64 - sp));
//Push to the left for all the spaces you know are between the start of your bits and the left 'wall'
//then push p bits off as well so we have just our friend "all 0s?"
int zeroTest = 0;
if (p < sp) {
zeroTest = idx << ((32 - sp) + p);
}
if (zeroTest == 0) {
//See offer
final int runLength = Long.numberOfLeadingZeros((x << p) | (long) (1 << (p - 1))) + 1;
//Invert run length by xoring it with a bunch of 1s
int invrl = runLength ^ 63;
return ( (
(idx << 6) //push the idx left 6 times to make room to put in the run length
| invrl) //then merge in the run length
<< 1) //move left again to make room for the flag bit
| 1; //merge in the flag bit (set to one because we needed the run length)
} else {
//Just push left once. A zero will appear by default and that's the flag we want.
return idx << 1;
}
}
/**
* More of less the opposite of the encoding function but just for getting out run lengths.
*
* @param k encoded int
* @return run length
*/
private int decodeRunLength(int k) {
if ((k & 1) == 1) //checking the flag bit
{
//Smoosh the flag bit; it has served its purpose
//Then & with 63 to delete everything but the run length
//Then invert again to undo the inversion from before
return ((k >>> 1) & 63) ^ 63;
} else {
//In one of the encode diagrams there is a substring of bits
//labeled 'has 1'. This is where we find that one!
//First push left to clear out the empty space (that is 31-sp places)
//Then push left some more cause bits in precision p don't count for run length
//That is, push left p times.
//Lastly we add one because we love adding one to run lengths. Its our JAM
return Integer.numberOfLeadingZeros(k << (p + (31 - sp))) + 1;
}
}
/** Get the idx' from an encoding. */
private static int getSparseIndex(int k) {
if ((k & 1) == 1) {
return k >>> 7;
} else {
return k >>> 1;
}
}
/**
* Returns the idx from an encoding (note this is idx and not idx prime).
*
* @param k encoded data
* @param p 'normal' precision
*/
private int getIndex(int k, int p) {
int sparseIndex = getSparseIndex(k);
return sparseIndex >>> (sp - p);
}
/**
* Gather the cardinality estimate from this estimator.
*
* Has two procedures based on current mode. 'Normal' mode works similar to HLL but has some
* new bias corrections. 'Sparse' mode is linear counting.
*/
@Override
public long cardinality() {
if (format == Format.SPARSE) {
mergeTempList();
}
switch (format) {
case NORMAL:
double registerSum = 0;
int count = registerSet.count;
double zeros = 0;
for (int j = 0; j < registerSet.count; j++) {
int val = registerSet.get(j);
registerSum += Math.scalb(1d, -val);
if (val == 0) {
zeros++;
}
}
double estimate = alphaMM * (1 / registerSum);
double estimatePrime = estimate;
if (estimate <= (5 * m)) {
estimatePrime = estimate - getEstimateBias(estimate, p);
}
double H;
if (zeros > 0) {
H = HyperLogLog.linearCounting(count, zeros);
} else {
H = estimatePrime;
}
// when p is large the threshold is just 5*m
if (((p <= 18) && (H < thresholdData[p - 4])) || ((p > 18) && (estimate <= (5 * m)))) {
return Math.round(H);
} else {
return Math.round(estimatePrime);
}
case SPARSE:
return Math.round(HyperLogLog.linearCounting(sm, sm - sparseSet.length));
}
return 0;
}
private static double getEstimateBias(double estimate, int p) {
// get nearest neighbors for this estimate and precision
// above p = 18 there is no bias correction
if (p > 18) {
return 0;
}
double[] estimateVector = rawEstimateData[p - 4];
SortedMap estimateDistances = calcDistances(estimate, estimateVector);
int[] nearestNeighbors = getNearestNeighbors(estimateDistances);
return getBias(nearestNeighbors, p);
}
private static double getBias(int[] nearestNeighbors, int p) {
double[] biasVector = biasData[p - 4];
double biasTotal = 0.0d;
for (int nearestNeighbor : nearestNeighbors) {
biasTotal += biasVector[nearestNeighbor];
}
return biasTotal / nearestNeighbors.length;
}
private static int[] getNearestNeighbors(SortedMap distanceMap) {
int[] nearest = new int[6];
int i = 0;
for (Integer index : distanceMap.values()) {
nearest[i++] = index;
if (i >= 6) {
break;
}
}
return nearest;
}
private static SortedMap calcDistances(double estimate, double[] estimateVector) {
SortedMap distances = new TreeMap();
int index = 0;
for (double anEstimateVector : estimateVector) {
distances.put(Math.pow(estimate - anEstimateVector, 2), index++);
}
return distances;
}
/**
* Batch merges the sparse set with the temporary list. Usually called when the temporary
* list fills up, but may also be needed when suddenly converting to normal or producing a
* cardinality estimate.
*
* It works very similarly to the merge part of merge sort with some key differences:
* We don't care about the kind of order the idxs appear in, only that they are in SOME order.
* This is because we only need to be sure that we detect when they are the same. So if idx: '001' appears
* first and idx: '002' appears last, that is fine as long as that behavior is the same for both lists.
*
* We do not allow duplicate entries (we are making a set after all), and collisions are resolved by run
* length. However, most of the time the run length will be the same if two idx' are the same. Only in the
* 1 in ~128 chance case of 'all 0s?' will they differ. Because the rest of the encoding is the same we can
* do comparisons without extracting the run length and because of our earlier inversion trick, the highest
* run length duplicates will appear first. So we take those and ignore any that follow with the same idx'.
*
* @param set sparse set
* @param tmp list to be merged
* @return the new sparse set
*/
private static int[] merge(int[] set, int[] tmp) {
// iterate over each set and merge the result values
int setLength;
if (set == null) {
setLength = 0;
} else {
setLength = set.length;
}
int[] newSet = new int[setLength + tmp.length];
int newSetCounter = 0;
int seti = 0;
int tmpi = 0;
while ((seti < setLength) || (tmpi < tmp.length)) {
if (seti >= setLength) {
int tmpVal = tmp[tmpi];
newSet[newSetCounter++] = tmpVal;
tmpi++;
tmpi = consumeDuplicates(tmp, getSparseIndex(tmpVal), tmpi);
} else if (tmpi >= tmp.length) {
newSet[newSetCounter++] = set[seti++];
} else {
int setVal = set[seti];
int tmpVal = tmp[tmpi];
if (getSparseIndex(setVal) == getSparseIndex(tmpVal)) {
newSet[newSetCounter++] = Math.min(setVal, tmpVal);
tmpi++;
tmpi = consumeDuplicates(tmp, getSparseIndex(tmpVal), tmpi);
seti++;
} else if (getSparseIndex(setVal) < getSparseIndex(tmpVal)) {
newSet[newSetCounter++] = setVal;
seti++;
} else {
newSet[newSetCounter++] = tmpVal;
tmpi++;
tmpi = consumeDuplicates(tmp, getSparseIndex(tmpVal), tmpi);
}
}
}
return (newSetCounter < newSet.length)?Arrays.copyOf(newSet, newSetCounter):newSet;
}
private static int[] toIntArray(List list) {
int[] ret = new int[list.size()];
for (int i = 0; i < ret.length; i++) {
ret[i] = list.get(i);
}
return ret;
}
/**
* Eats up the inferior duplicates from the temp list
*
* @param tmp tmp list
* @param tmpIdx the idx' we want to consume
* @param tmpi the current tmp list index
* @return the new tmp list index
*/
private static int consumeDuplicates(int[] tmp, int tmpIdx, int tmpi) {
while (tmpi < tmp.length) {
int nextTmp = tmp[tmpi];
int nextTmpIdx = getSparseIndex(nextTmp);
if (tmpIdx != nextTmpIdx) {
return tmpi;
}
tmpi++;
}
return tmpi;
}
/**
* Merge this HLL++ instance with another! The power of friends! This works
* very similarly to the merge with temp list function. However, in this
* case, both lists will need their own delta decoding and neither will have
* to worry about consuming duplicates.
*
* @return the new sparse set
*/
private int[] mergeEstimators(HyperLogLogPlus other) {
int[] tmp = other.getSparseSet();
int[] set = sparseSet;
int[] newSet = new int[set.length + tmp.length];
int newSetCounter = 0;
// iterate over each set and merge the result values
int seti = 0;
int tmpi = 0;
while ((seti < set.length) || (tmpi < tmp.length)) {
if (seti >= set.length) {
newSet[newSetCounter++] = tmp[tmpi++];
} else if (tmpi >= tmp.length) {
newSet[newSetCounter++] = set[seti++];
} else {
int setVal = set[seti];
int tmpVal = tmp[tmpi];
if (getSparseIndex(setVal) == getSparseIndex(tmpVal)) {
newSet[newSetCounter++] = Math.min(setVal, tmpVal);
tmpi++;
seti++;
} else if (getSparseIndex(setVal) < getSparseIndex(tmpVal)) {
newSet[newSetCounter++] = setVal;
seti++;
} else {
newSet[newSetCounter++] = tmpVal;
tmpi++;
}
}
}
return (newSetCounter < newSet.length)?Arrays.copyOf(newSet, newSetCounter):newSet;
}
@Override
public int sizeof() {
if (registerSet == null) {
return 4 * RegisterSet.getSizeForCount(1 << p);
}
return registerSet.size * 4;
}
@Override
public byte[] getBytes() throws IOException {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
DataOutputStream dos = new DataOutputStream(baos);
// write version flag (always negative)
dos.writeInt(-VERSION);
Varint.writeUnsignedVarInt(p, dos);
Varint.writeUnsignedVarInt(sp, dos);
if (format == Format.SPARSE) {
mergeTempList();
}
switch (format) {
case NORMAL:
Varint.writeUnsignedVarInt(0, dos);
Varint.writeUnsignedVarInt(registerSet.size * 4, dos);
for (int x : registerSet.readOnlyBits()) {
dos.writeInt(x);
}
break;
case SPARSE:
Varint.writeUnsignedVarInt(1, dos);
Varint.writeUnsignedVarInt(sparseSet.length, dos);
int prevMergedDelta = 0;
for (int k : sparseSet) {
Varint.writeUnsignedVarInt(k - prevMergedDelta, dos);
prevMergedDelta = k;
}
break;
}
dos.close();
return baos.toByteArray();
}
private void writeBytes(DataOutput serializedByteStream) throws IOException {
serializedByteStream.write(getBytes());
}
/**
* Script-esque function that handles preparing to and executing merging the
* sparse set and the temp list. Set up the delta encoding, sort the temp
* list, merge the lists, blow up the temp list.
*
* The temp set grows in size at a rate proportional to the current
* size of the sparse set. The size ratio of the temp set to the sparse set
* is determined by {@link #SPARSE_SET_TEMP_SET_RATIO}. The temp set will not
* grow unless it is currently smaller by 1/2 of the target size.
*/
void mergeTempList() {
if (tmpIndex > 0) {
int[] sortedSet = sortEncodedSet(tmpSet, tmpIndex);
sparseSet = merge(sparseSet, sortedSet);
tmpIndex = 0;
if (sparseSet.length > sparseSetThreshold) {
convertToNormal();
} else if ((tmpSet.length * 2) < (sparseSet.length / SPARSE_SET_TEMP_SET_RATIO)) {
tmpSet = new int[sparseSet.length / SPARSE_SET_TEMP_SET_RATIO];
}
}
}
private static int transformToSortRepresentation(int x) {
if ((x & 1) == 0) {
return (x << 6) ^ 0x8000007F;
}
else {
return x ^ 0x80000001;
}
}
private static int transformFromSortRepresentation(int x) {
if ((x & 1) == 1) {
return (x ^ 0x80000040) >>> 6;
}
else {
return x ^ 0x80000001;
}
}
int[] sortEncodedSet(int[] encodedSet, int validIndex) {
int[] result = new int[validIndex];
for (int i = 0; i < validIndex; ++i) {
result[i] = transformToSortRepresentation(encodedSet[i]);
}
Arrays.sort(result);
for (int i = 0; i < validIndex; ++i) {
result[i] = transformFromSortRepresentation(result[i]);
}
return result;
}
/**
* Add all the elements of the other set to this set.
*
* If possible, the sparse mode is protected. A switch to the normal mode
* is triggered only if the resulting set exceed the threshold.
*
* This operation does not imply a loss of precision.
*
* @param other A compatible Hyperloglog++ instance (same p and sp)
* @throws CardinalityMergeException if other is not compatible
*/
public void addAll(HyperLogLogPlus other) throws HyperLogLogPlusMergeException {
if (other.sizeof() != sizeof()) {
throw new HyperLogLogPlusMergeException("Cannot merge estimators of different sizes");
}
if (format == Format.SPARSE) {
mergeTempList();
}
if (other.format == Format.SPARSE) {
other.mergeTempList();
}
if ((format == Format.NORMAL) && (other.format == Format.NORMAL)) {
registerSet.merge(other.registerSet);
return;
}
if ((format == Format.SPARSE) && (other.format == Format.SPARSE)) {
sparseSet = mergeEstimators(other);
if (sparseSet.length > sparseSetThreshold) {
convertToNormal();
}
return;
}
if ((format == Format.SPARSE) && (other.format == Format.NORMAL)) {
convertToNormal();
registerSet.merge(other.registerSet);
return;
}
if ((format == Format.NORMAL) && (other.format == Format.SPARSE)) {
// Iterating over other's sparse set and updating only required indexes
// of this' register set is several orders of magnitude faster than copying
// and converting other to normal mode. This use case is quite common since
// we tend to aggregate small sets to large sets.
for (int i = 0; i < other.sparseSet.length; i++) {
int k = other.sparseSet[i];
int idx = other.getIndex(k, p);
int r = other.decodeRunLength(k);
registerSet.updateIfGreater(idx, r);
}
return;
}
throw new IllegalStateException("Unhandled HLL++ merge combination");
}
/**
* Merge this HLL++ with a bunch of others! The power of minions!
*
* Most of the logic consists of case analysis about the state of this HLL++ and each one it wants to merge
* with. If either of them is 'normal' mode then the other converts to 'normal' as well. A touching sacrifice.
* 'Normal's combine just like regular HLL estimators do.
*
* If they happen to be both sparse, then it checks if their combined size would be too large and if so, they get
* relegated to normal mode anyway. Otherwise, the mergeEstimators function is called, and a new sparse HLL++ is born.
*
* @param estimators the estimators to merge with this one
* @return a new estimator with their combined knowledge
* @throws CardinalityMergeException
*/
@Override
public ICardinality merge(ICardinality... estimators) throws CardinalityMergeException {
HyperLogLogPlus merged = new HyperLogLogPlus(p, sp);
merged.addAll(this);
if (estimators == null) {
return merged;
}
for (ICardinality estimator : estimators) {
if (!(estimator instanceof HyperLogLogPlus)) {
throw new HyperLogLogPlusMergeException("Cannot merge estimators of different class");
}
HyperLogLogPlus hll = (HyperLogLogPlus) estimator;
merged.addAll(hll);
}
return merged;
}
RegisterSet getRegisterSet() {
return registerSet;
}
int[] getSparseSet() {
return sparseSet;
}
public static class Builder implements IBuilder, Serializable {
private static final long serialVersionUID = -14775219914128662L;
private final int p;
private final int sp;
public Builder(int p, int sp) {
this.p = p;
this.sp = sp;
}
@Override
public HyperLogLogPlus build() {
return new HyperLogLogPlus(p, sp);
}
@Override
public int sizeof() {
int k = 1 << p;
return RegisterSet.getBits(k) * 5;
}
public static HyperLogLogPlus build(byte[] bytes) throws IOException {
ByteArrayInputStream bais = new ByteArrayInputStream(bytes);
DataInputStream oi = new DataInputStream(bais);
int version = oi.readInt();
// the new encoding scheme includes a version field
// that is always negative. If the version field
// is not present then we'll use the legacy
// decoding method
if (version < 0) {
return decodeBytes(oi);
} else {
// need to re-create this stream
// because the first int read above
// is not present in the legacy codec
bais = new ByteArrayInputStream(bytes);
oi = new DataInputStream(bais);
return legacyDecode(oi);
}
}
public static HyperLogLogPlus build(DataInput oi) throws IOException {
int version = oi.readInt();
// the new encoding scheme includes a version field
// that is always negative. If the version field
// is not present then we'll use the legacy
// decoding method
if (version < 0) {
return decodeBytes(oi);
} else {
return legacyDecode(oi);
}
}
private static HyperLogLogPlus legacyDecode(DataInput oi) throws IOException {
int p = oi.readInt();
int sp = oi.readInt();
int formatType = oi.readInt();
if (formatType == 0) {
int size = oi.readInt();
byte[] longArrayBytes = new byte[size];
oi.readFully(longArrayBytes);
RegisterSet registerSetFromBytes = new RegisterSet(1 << p, Bits.getBits(longArrayBytes));
HyperLogLogPlus hyperLogLogPlus = new HyperLogLogPlus(p, sp, registerSetFromBytes);
hyperLogLogPlus.format = Format.NORMAL;
return hyperLogLogPlus;
} else {
int l;
List deltaByteSet = new ArrayList();
while ((l = oi.readInt()) > 0) {
byte[] longArrayBytes = new byte[l];
oi.readFully(longArrayBytes, 0, l);
deltaByteSet.add(longArrayBytes);
}
HyperLogLogPlus hyperLogLogPlus = new HyperLogLogPlus(p, sp, deltaByteSet);
hyperLogLogPlus.format = Format.SPARSE;
return hyperLogLogPlus;
}
}
private static HyperLogLogPlus decodeBytes(DataInput oi) throws IOException {
int p = Varint.readUnsignedVarInt(oi);
int sp = Varint.readUnsignedVarInt(oi);
int formatType = Varint.readUnsignedVarInt(oi);
if (formatType == 0) {
int size = Varint.readUnsignedVarInt(oi);
byte[] longArrayBytes = new byte[size];
oi.readFully(longArrayBytes);
HyperLogLogPlus hyperLogLogPlus = new HyperLogLogPlus(p, sp, new RegisterSet(1 << p, Bits.getBits(longArrayBytes)));
hyperLogLogPlus.format = Format.NORMAL;
return hyperLogLogPlus;
} else {
int[] rehydratedSparseSet = new int[Varint.readUnsignedVarInt(oi)];
int prevDeltaRead = 0;
for (int i = 0; i < rehydratedSparseSet.length; i++) {
int nextVal = Varint.readUnsignedVarInt(oi) + prevDeltaRead;
rehydratedSparseSet[i] = nextVal;
prevDeltaRead = nextVal;
}
HyperLogLogPlus hyperLogLogPlus = new HyperLogLogPlus(p, sp, rehydratedSparseSet);
hyperLogLogPlus.format = Format.SPARSE;
return hyperLogLogPlus;
}
}
}
@SuppressWarnings("serial")
static class HyperLogLogPlusMergeException extends CardinalityMergeException {
public HyperLogLogPlusMergeException(String message) {
super(message);
}
}
private Object writeReplace() {
return new SerializationHolder(this);
}
/**
* This class exists to support Externalizable semantics for
* HyperLogLog objects without having to expose a public
* constructor, public write/read methods, or pretend final
* fields aren't final.
*
* In short, Externalizable allows you to skip some of the more
* verbose meta-data default Serializable gets you, but still
* includes the class name. In that sense, there is some cost
* to this holder object because it has a longer class name. I
* imagine people who care about optimizing for that have their
* own work-around for long class names in general, or just use
* a custom serialization framework. Therefore we make no attempt
* to optimize that here (eg. by raising this from an inner class
* and giving it an unhelpful name).
*/
private static class SerializationHolder implements Externalizable {
HyperLogLogPlus hyperLogLogHolder;
public SerializationHolder(HyperLogLogPlus hyperLogLogHolder) {
this.hyperLogLogHolder = hyperLogLogHolder;
}
/** required for Externalizable */
public SerializationHolder() {}
@Override
public void writeExternal(ObjectOutput out) throws IOException {
hyperLogLogHolder.writeBytes(out);
}
@Override
public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException {
hyperLogLogHolder = Builder.build(in);
}
private Object readResolve() {
return hyperLogLogHolder;
}
}
}
stream-lib-2.9.8/src/main/java/com/clearspring/analytics/stream/cardinality/ICardinality.java 0000664 0000000 0000000 00000004443 13531322035 0032322 0 ustar 00root root 0000000 0000000 /*
* Copyright (C) 2011 Clearspring Technologies, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.clearspring.analytics.stream.cardinality;
import java.io.IOException;
public interface ICardinality {
/**
* @param o stream element
* @return false if the value returned by cardinality() is unaffected by the appearance of o in the stream.
*/
boolean offer(Object o);
/**
* Offer the value as a hashed long value
*
* @param hashedLong - the hash of the item to offer to the estimator
* @return false if the value returned by cardinality() is unaffected by the appearance of hashedLong in the stream
*/
boolean offerHashed(long hashedLong);
/**
* Offer the value as a hashed long value
*
* @param hashedInt - the hash of the item to offer to the estimator
* @return false if the value returned by cardinality() is unaffected by the appearance of hashedInt in the stream
*/
boolean offerHashed(int hashedInt);
/**
* @return the number of unique elements in the stream or an estimate thereof
*/
long cardinality();
/**
* @return size in bytes needed for serialization
*/
int sizeof();
/**
* @return
* @throws IOException
*/
byte[] getBytes() throws IOException;
/**
* Merges estimators to produce a new estimator for the combined streams
* of this estimator and those passed as arguments.
*
* Nor this estimator nor the one passed as parameters are modified.
*
* @param estimators Zero or more compatible estimators
* @throws CardinalityMergeException If at least one of the estimators is not compatible with this one
*/
ICardinality merge(ICardinality... estimators) throws CardinalityMergeException;
}
stream-lib-2.9.8/src/main/java/com/clearspring/analytics/stream/cardinality/LinearCounting.java 0000664 0000000 0000000 00000026661 13531322035 0032675 0 ustar 00root root 0000000 0000000 /*
* Copyright (C) 2011 Clearspring Technologies, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.clearspring.analytics.stream.cardinality;
import java.io.Serializable;
import java.util.Arrays;
import com.clearspring.analytics.hash.MurmurHash;
import com.clearspring.analytics.util.IBuilder;
import static java.lang.Math.exp;
import static java.lang.Math.max;
import static java.lang.Math.pow;
/**
* See A Linear-Time Probabilistic Counting Algorithm for Database Applications
* by Whang, Vander-Zanden, Taylor
*/
public class LinearCounting implements ICardinality {
/**
* Bitmap
* Hashed stream elements are mapped to bits in this array
*/
protected byte[] map;
/**
* Size of the map in bits
*/
protected final int length;
/**
* Number of bits left unset in the map
*/
protected int count;
/**
* @param size of bit array in bytes
*/
public LinearCounting(int size) {
this.length = 8 * size;
this.count = this.length;
map = new byte[size];
}
public LinearCounting(byte[] map) {
this.map = map;
this.length = 8 * map.length;
this.count = computeCount();
}
@Override
public long cardinality() {
return (long) (Math.round(length * Math.log(length / ((double) count))));
}
@Override
public byte[] getBytes() {
return map;
}
@Override
public boolean offerHashed(long hashedLong) {
throw new UnsupportedOperationException();
}
@Override
public boolean offerHashed(int hashedInt) {
throw new UnsupportedOperationException();
}
@Override
public boolean offer(Object o) {
boolean modified = false;
long hash = (long) MurmurHash.hash(o);
int bit = (int) ((hash & 0xFFFFFFFFL) % (long) length);
int i = bit / 8;
byte b = map[i];
byte mask = (byte) (1 << (bit % 8));
if ((mask & b) == 0) {
map[i] = (byte) (b | mask);
count--;
modified = true;
}
return modified;
}
@Override
public int sizeof() {
return map.length;
}
protected int computeCount() {
int c = 0;
for (byte b : map) {
c += Integer.bitCount(b & 0xFF);
}
return length - c;
}
/**
* @return (# set bits) / (total # of bits)
*/
public double getUtilization() {
return (length - count) / (double) length;
}
public int getCount() {
return count;
}
public boolean isSaturated() {
return (count == 0);
}
/**
* For debug purposes
*
* @return
*/
protected String mapAsBitString() {
StringBuilder sb = new StringBuilder();
for (byte b : map) {
String bits = Integer.toBinaryString(b);
for (int i = 0; i < 8 - bits.length(); i++) {
sb.append('0');
}
sb.append(bits);
}
return sb.toString();
}
/**
* @return this if estimators is null or no arguments are passed
* @throws LinearCountingMergeException if estimators are not mergeable (all estimators must be instances of LinearCounting of the same size)
*/
@Override
public ICardinality merge(ICardinality... estimators) throws LinearCountingMergeException {
if (estimators == null) {
return new LinearCounting(map);
}
LinearCounting[] lcs = Arrays.copyOf(estimators, estimators.length + 1, LinearCounting[].class);
lcs[lcs.length - 1] = this;
return LinearCounting.mergeEstimators(lcs);
}
/**
* Merges estimators to produce an estimator for their combined streams
*
* @param estimators
* @return merged estimator or null if no estimators were provided
* @throws LinearCountingMergeException if estimators are not mergeable (all estimators must be the same size)
*/
public static LinearCounting mergeEstimators(LinearCounting... estimators) throws LinearCountingMergeException {
LinearCounting merged = null;
if (estimators != null && estimators.length > 0) {
int size = estimators[0].map.length;
byte[] mergedBytes = new byte[size];
for (LinearCounting estimator : estimators) {
if (estimator.map.length != size) {
throw new LinearCountingMergeException("Cannot merge estimators of different sizes");
}
for (int b = 0; b < size; b++) {
mergedBytes[b] |= estimator.map[b];
}
}
merged = new LinearCounting(mergedBytes);
}
return merged;
}
@SuppressWarnings("serial")
protected static class LinearCountingMergeException extends CardinalityMergeException {
public LinearCountingMergeException(String message) {
super(message);
}
}
public static class Builder implements IBuilder, Serializable {
private static final long serialVersionUID = -4245416224034648428L;
/**
* Taken from Table II of Whang et al.
*/
protected final static int[] onePercentErrorLength =
{
5034, 5067, 5100, 5133, 5166, 5199, 5231, 5264, 5296, // 100 - 900
5329, 5647, 5957, 6260, 6556, 6847, 7132, 7412, 7688, // 1000 - 9000
7960, 10506, 12839, 15036, 17134, 19156, 21117, 23029, 24897, // 10000 - 90000
26729, 43710, 59264, 73999, 88175, 101932, 115359, 128514, 141441, // 100000 - 900000
154171, 274328, 386798, 494794, 599692, 702246, 802931, 902069, 999894, // 1000000 - 9000000
1096582 // 10000000
};
protected final int size;
public Builder() {
this(65536);
}
public Builder(int size) {
this.size = size;
}
@Override
public LinearCounting build() {
return new LinearCounting(size);
}
@Override
public int sizeof() {
return size;
}
/**
* Returns a LinearCounting.Builder that generates an LC
* estimator which keeps estimates below 1% error on average and has
* a low likelihood of saturation (0.7%) for any stream with
* cardinality less than maxCardinality
*
* @param maxCardinality
* @return
* @throws IllegalArgumentException if maxCardinality is not a positive integer
*/
public static Builder onePercentError(int maxCardinality) {
if (maxCardinality <= 0) {
throw new IllegalArgumentException("maxCardinality (" + maxCardinality + ") must be a positive integer");
}
int length = -1;
if (maxCardinality < 100) {
length = onePercentErrorLength[0];
} else if (maxCardinality < 10000000) {
int logscale = (int) Math.log10(maxCardinality);
int scaleValue = (int) Math.pow(10, logscale);
int scaleIndex = maxCardinality / scaleValue;
int index = 9 * (logscale - 2) + (scaleIndex - 1);
int lowerBound = scaleValue * scaleIndex;
length = lerp(lowerBound, onePercentErrorLength[index], lowerBound + scaleValue, onePercentErrorLength[index + 1], maxCardinality);
//System.out.println(String.format("Lower bound: %9d, Max cardinality: %9d, Upper bound: %9d", lowerBound, maxCardinality, lowerBound+scaleValue));
//System.out.println(String.format("Lower bound: %9d, Interpolated : %9d, Upper bound: %9d", onePercentErrorLength[index], length, onePercentErrorLength[index+1]));
} else if (maxCardinality < 50000000) {
length = lerp(10000000, 1096582, 50000000, 4584297, maxCardinality);
} else if (maxCardinality < 100000000) {
length = lerp(50000000, 4584297, 100000000, 8571013, maxCardinality);
} else if (maxCardinality <= 120000000) {
length = lerp(100000000, 8571013, 120000000, 10112529, maxCardinality);
} else {
length = maxCardinality / 12;
}
int sz = (int) Math.ceil(length / 8D);
//System.out.println("length: "+length+", size (bytes): "+sz);
return new Builder(sz);
}
/**
* Builds Linear Counter with arbitrary standard error and maximum expected cardinality.
*
* This method is more compute intensive than {@link #onePercentError(int)} as it is perform
* solving precision inequality in runtime. Therefore, {@link #onePercentError(int)} should be
* used whenever possible.
*
* @param eps standard error as a fraction (e.g. {@code 0.01} for 1%)
* @param maxCardinality maximum expected cardinality
*/
public static Builder withError(double eps, int maxCardinality) {
int sz = computeRequiredBitMaskLength(maxCardinality, eps);
return new Builder((int) Math.ceil(sz / 8D));
}
/**
* Runs binary search to find minimum bit mask length that holds precision inequality.
*
* @param n expected cardinality
* @param eps desired standard error
* @return minimal required bit mask length
*/
private static int computeRequiredBitMaskLength(double n, double eps) {
if (eps >= 1 || eps <= 0) {
throw new IllegalArgumentException("Epsilon should be in (0, 1) range");
}
if (n <= 0) {
throw new IllegalArgumentException("Cardinality should be positive");
}
int fromM = 1;
int toM = 100000000;
int m;
double eq;
do {
m = (toM + fromM) / 2;
eq = precisionInequalityRV(n / m, eps);
if (m > eq) {
toM = m;
} else {
fromM = m + 1;
}
} while (toM > fromM);
return m > eq ? m : m + 1;
}
/**
* @param t load factor for linear counter
* @param eps desired standard error
*/
private static double precisionInequalityRV(double t, double eps) {
return max(1.0 / pow(eps * t, 2), 5) * (exp(t) - t - 1);
}
/**
* @param x0
* @param y0
* @param x1
* @param y1
* @param x
* @return linear interpolation
*/
protected static int lerp(int x0, int y0, int x1, int y1, int x) {
return (int) Math.ceil(y0 + (x - x0) * (double) (y1 - y0) / (x1 - x0));
}
}
}
stream-lib-2.9.8/src/main/java/com/clearspring/analytics/stream/cardinality/LogLog.java 0000664 0000000 0000000 00000014344 13531322035 0031132 0 ustar 00root root 0000000 0000000 /*
* Copyright (C) 2011 Clearspring Technologies, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.clearspring.analytics.stream.cardinality;
import java.util.Arrays;
import com.clearspring.analytics.hash.MurmurHash;
import com.clearspring.analytics.util.IBuilder;
public class LogLog implements ICardinality {
/**
* Gamma function computed using Mathematica
* AccountingForm[
* N[With[{m = 2^Range[0, 31]},
* m (Gamma[-1/m]*(1 - 2^(1/m))/Log[2])^-m], 14]]
*/
protected static final double[] mAlpha = {
0,
0.44567926005415,
1.2480639342271,
2.8391255240079,
6.0165231584809,
12.369319965552,
25.073991603111,
50.482891762408,
101.30047482584,
202.93553338100,
406.20559696699,
812.74569744189,
1625.8258850594,
3251.9862536323,
6504.3069874480,
13008.948453415,
26018.231384516,
52036.797246302,
104073.92896967,
208148.19241629,
416296.71930949,
832593.77309585,
1665187.8806686,
3330376.0958140,
6660752.5261049,
13321505.386687,
26643011.107850,
53286022.550177,
106572045.43483,
213144091.20414,
426288182.74275,
852576365.81999
};
protected final int k;
protected int m;
protected double Ca;
protected byte[] M;
protected int Rsum = 0;
public LogLog(int k) {
if (k >= (mAlpha.length - 1)) {
throw new IllegalArgumentException(String.format("Max k (%d) exceeded: k=%d", mAlpha.length - 1, k));
}
this.k = k;
this.m = 1 << k;
this.Ca = mAlpha[k];
this.M = new byte[m];
}
public LogLog(byte[] M) {
this.M = M;
this.m = M.length;
this.k = Integer.numberOfTrailingZeros(m);
assert (m == (1 << k)) : "Invalid array size: M.length must be a power of 2";
this.Ca = mAlpha[k];
for (byte b : M) {
Rsum += b;
}
}
@Override
public byte[] getBytes() {
return M;
}
public int sizeof() {
return m;
}
@Override
public long cardinality() {
/*
for(int j=0; j>> (Integer.SIZE - k);
byte r = (byte) (Integer.numberOfLeadingZeros((hashedInt << k) | (1 << (k - 1))) + 1);
if (M[j] < r) {
Rsum += r - M[j];
M[j] = r;
modified = true;
}
return modified;
}
@Override
public boolean offer(Object o) {
int x = MurmurHash.hash(o);
return offerHashed(x);
}
/**
* Computes the position of the first set bit of the last Integer.SIZE-k bits
*
* @return Integer.SIZE-k if the last k bits are all zero
*/
protected static int rho(int x, int k) {
return Integer.numberOfLeadingZeros((x << k) | (1 << (k - 1))) + 1;
}
/**
* @return this if estimators is null or no arguments are passed
* @throws LogLogMergeException if estimators are not mergeable (all estimators must be instances of LogLog of the same size)
*/
@Override
public ICardinality merge(ICardinality... estimators) throws LogLogMergeException {
if (estimators == null) {
return new LogLog(M);
}
byte[] mergedBytes = Arrays.copyOf(this.M, this.M.length);
for (ICardinality estimator : estimators) {
if (!(this.getClass().isInstance(estimator))) {
throw new LogLogMergeException("Cannot merge estimators of different class");
}
if (estimator.sizeof() != this.sizeof()) {
throw new LogLogMergeException("Cannot merge estimators of different sizes");
}
LogLog ll = (LogLog) estimator;
for (int i = 0; i < mergedBytes.length; ++i) {
mergedBytes[i] = (byte) Math.max(mergedBytes[i], ll.M[i]);
}
}
return new LogLog(mergedBytes);
}
/**
* Merges estimators to produce an estimator for their combined streams
*
* @param estimators
* @return merged estimator or null if no estimators were provided
* @throws LogLogMergeException if estimators are not mergeable (all estimators must be the same size)
*/
public static LogLog mergeEstimators(LogLog... estimators) throws LogLogMergeException {
if (estimators == null || estimators.length == 0) {
return null;
}
return (LogLog) estimators[0].merge(Arrays.copyOfRange(estimators, 1, estimators.length));
}
@SuppressWarnings("serial")
protected static class LogLogMergeException extends CardinalityMergeException {
public LogLogMergeException(String message) {
super(message);
}
}
public static class Builder implements IBuilder {
protected final int k;
public Builder() {
this(16);
}
public Builder(int k) {
this.k = k;
}
@Override
public LogLog build() {
return new LogLog(k);
}
@Override
public int sizeof() {
return 1 << k;
}
}
}
stream-lib-2.9.8/src/main/java/com/clearspring/analytics/stream/cardinality/RegisterSet.java 0000664 0000000 0000000 00000006443 13531322035 0032210 0 ustar 00root root 0000000 0000000 /*
* Copyright (C) 2012 Clearspring Technologies, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.clearspring.analytics.stream.cardinality;
public class RegisterSet {
public final static int LOG2_BITS_PER_WORD = 6;
public final static int REGISTER_SIZE = 5;
public final int count;
public final int size;
private final int[] M;
public RegisterSet(int count) {
this(count, null);
}
public RegisterSet(int count, int[] initialValues) {
this.count = count;
if (initialValues == null) {
this.M = new int[getSizeForCount(count)];
} else {
this.M = initialValues;
}
this.size = this.M.length;
}
public static int getBits(int count) {
return count / LOG2_BITS_PER_WORD;
}
public static int getSizeForCount(int count) {
int bits = getBits(count);
if (bits == 0) {
return 1;
} else if (bits % Integer.SIZE == 0) {
return bits;
} else {
return bits + 1;
}
}
public void set(int position, int value) {
int bucketPos = position / LOG2_BITS_PER_WORD;
int shift = REGISTER_SIZE * (position - (bucketPos * LOG2_BITS_PER_WORD));
this.M[bucketPos] = (this.M[bucketPos] & ~(0x1f << shift)) | (value << shift);
}
public int get(int position) {
int bucketPos = position / LOG2_BITS_PER_WORD;
int shift = REGISTER_SIZE * (position - (bucketPos * LOG2_BITS_PER_WORD));
return (this.M[bucketPos] & (0x1f << shift)) >>> shift;
}
public boolean updateIfGreater(int position, int value) {
int bucket = position / LOG2_BITS_PER_WORD;
int shift = REGISTER_SIZE * (position - (bucket * LOG2_BITS_PER_WORD));
int mask = 0x1f << shift;
// Use long to avoid sign issues with the left-most shift
long curVal = this.M[bucket] & mask;
long newVal = value << shift;
if (curVal < newVal) {
this.M[bucket] = (int) ((this.M[bucket] & ~mask) | newVal);
return true;
} else {
return false;
}
}
public void merge(RegisterSet that) {
for (int bucket = 0; bucket < M.length; bucket++) {
int word = 0;
for (int j = 0; j < LOG2_BITS_PER_WORD; j++) {
int mask = 0x1f << (REGISTER_SIZE * j);
int thisVal = (this.M[bucket] & mask);
int thatVal = (that.M[bucket] & mask);
word |= (thisVal < thatVal) ? thatVal : thisVal;
}
this.M[bucket] = word;
}
}
int[] readOnlyBits() {
return M;
}
public int[] bits() {
int[] copy = new int[size];
System.arraycopy(M, 0, copy, 0, M.length);
return copy;
}
}
stream-lib-2.9.8/src/main/java/com/clearspring/analytics/stream/frequency/ 0000775 0000000 0000000 00000000000 13531322035 0026574 5 ustar 00root root 0000000 0000000 stream-lib-2.9.8/src/main/java/com/clearspring/analytics/stream/frequency/ConservativeAddSketch.java0000664 0000000 0000000 00000005707 13531322035 0033673 0 ustar 00root root 0000000 0000000 /*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.clearspring.analytics.stream.frequency;
import com.clearspring.analytics.stream.membership.Filter;
/**
* A more accurate (by some large, but ill-defined amount), but slower (by some
* small, but equally ill-defined amount) count min sketch. It seemed like a
* simple optimization and later internet searching suggested it might be
* called something like a conservative adding variant.
*/
public class ConservativeAddSketch extends CountMinSketch {
ConservativeAddSketch() {
super();
}
public ConservativeAddSketch(int depth, int width, int seed) {
super(depth, width, seed);
}
public ConservativeAddSketch(double epsOfTotalCount, double confidence, int seed) {
super(epsOfTotalCount, confidence, seed);
}
ConservativeAddSketch(int depth, int width, long size, long[] hashA, long[][] table) {
super(depth, width, size, hashA, table);
}
@Override
public void add(long item, long count) {
if (count < 0) {
// Negative values are not implemented in the regular version, and do not
// play nicely with this algorithm anyway
throw new IllegalArgumentException("Negative increments not implemented");
}
int[] buckets = new int[depth];
for (int i = 0; i < depth; ++i) {
buckets[i] = hash(item, i);
}
long min = table[0][buckets[0]];
for (int i = 1; i < depth; ++i) {
min = Math.min(min, table[i][buckets[i]]);
}
for (int i = 0; i < depth; ++i) {
long newVal = Math.max(table[i][buckets[i]], min + count);
table[i][buckets[i]] = newVal;
}
size += count;
}
@Override
public void add(String item, long count) {
if (count < 0) {
// Negative values are not implemented in the regular version, and do not
// play nicely with this algorithm anyway
throw new IllegalArgumentException("Negative increments not implemented");
}
int[] buckets = Filter.getHashBuckets(item, depth, width);
long min = table[0][buckets[0]];
for (int i = 1; i < depth; ++i) {
min = Math.min(min, table[i][buckets[i]]);
}
for (int i = 0; i < depth; ++i) {
long newVal = Math.max(table[i][buckets[i]], min + count);
table[i][buckets[i]] = newVal;
}
size += count;
}
}
stream-lib-2.9.8/src/main/java/com/clearspring/analytics/stream/frequency/CountMinSketch.java 0000664 0000000 0000000 00000027233 13531322035 0032344 0 ustar 00root root 0000000 0000000 /*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.clearspring.analytics.stream.frequency;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.io.Serializable;
import java.util.Arrays;
import java.util.Random;
import com.clearspring.analytics.stream.membership.Filter;
import com.clearspring.analytics.util.Preconditions;
/**
* Count-Min Sketch datastructure.
* An Improved Data Stream Summary: The Count-Min Sketch and its Applications
* https://web.archive.org/web/20060907232042/http://www.eecs.harvard.edu/~michaelm/CS222/countmin.pdf
*/
public class CountMinSketch implements IFrequency, Serializable {
public static final long PRIME_MODULUS = (1L << 31) - 1;
private static final long serialVersionUID = -5084982213094657923L;
int depth;
int width;
long[][] table;
long[] hashA;
long size;
double eps;
double confidence;
CountMinSketch() {
}
public CountMinSketch(int depth, int width, int seed) {
this.depth = depth;
this.width = width;
this.eps = 2.0 / width;
this.confidence = 1 - 1 / Math.pow(2, depth);
initTablesWith(depth, width, seed);
}
public CountMinSketch(double epsOfTotalCount, double confidence, int seed) {
// 2/w = eps ; w = 2/eps
// 1/2^depth <= 1-confidence ; depth >= -log2 (1-confidence)
this.eps = epsOfTotalCount;
this.confidence = confidence;
this.width = (int) Math.ceil(2 / epsOfTotalCount);
this.depth = (int) Math.ceil(-Math.log(1 - confidence) / Math.log(2));
initTablesWith(depth, width, seed);
}
CountMinSketch(int depth, int width, long size, long[] hashA, long[][] table) {
this.depth = depth;
this.width = width;
this.eps = 2.0 / width;
this.confidence = 1 - 1 / Math.pow(2, depth);
this.hashA = hashA;
this.table = table;
Preconditions.checkState(size >= 0, "The size cannot be smaller than ZER0: " + size);
this.size = size;
}
@Override
public String toString() {
return "CountMinSketch{" +
"eps=" + eps +
", confidence=" + confidence +
", depth=" + depth +
", width=" + width +
", size=" + size +
'}';
}
@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
final CountMinSketch that = (CountMinSketch) o;
if (depth != that.depth) {
return false;
}
if (width != that.width) {
return false;
}
if (Double.compare(that.eps, eps) != 0) {
return false;
}
if (Double.compare(that.confidence, confidence) != 0) {
return false;
}
if (size != that.size) {
return false;
}
if (!Arrays.deepEquals(table, that.table)) {
return false;
}
return Arrays.equals(hashA, that.hashA);
}
@Override
public int hashCode() {
int result;
long temp;
result = depth;
result = 31 * result + width;
result = 31 * result + Arrays.deepHashCode(table);
result = 31 * result + Arrays.hashCode(hashA);
result = 31 * result + (int) (size ^ (size >>> 32));
temp = Double.doubleToLongBits(eps);
result = 31 * result + (int) (temp ^ (temp >>> 32));
temp = Double.doubleToLongBits(confidence);
result = 31 * result + (int) (temp ^ (temp >>> 32));
return result;
}
private void initTablesWith(int depth, int width, int seed) {
this.table = new long[depth][width];
this.hashA = new long[depth];
Random r = new Random(seed);
// We're using a linear hash functions
// of the form (a*x+b) mod p.
// a,b are chosen independently for each hash function.
// However we can set b = 0 as all it does is shift the results
// without compromising their uniformity or independence with
// the other hashes.
for (int i = 0; i < depth; ++i) {
hashA[i] = r.nextInt(Integer.MAX_VALUE);
}
}
public double getRelativeError() {
return eps;
}
public double getConfidence() {
return confidence;
}
int hash(long item, int i) {
long hash = hashA[i] * item;
// A super fast way of computing x mod 2^p-1
// See http://www.cs.princeton.edu/courses/archive/fall09/cos521/Handouts/universalclasses.pdf
// page 149, right after Proposition 7.
hash += hash >> 32;
hash &= PRIME_MODULUS;
// Doing "%" after (int) conversion is ~2x faster than %'ing longs.
return ((int) hash) % width;
}
private static void checkSizeAfterOperation(long previousSize, String operation, long newSize) {
if (newSize < previousSize) {
throw new IllegalStateException("Overflow error: the size after calling `" + operation +
"` is smaller than the previous size. " +
"Previous size: " + previousSize +
", New size: " + newSize);
}
}
private void checkSizeAfterAdd(String item, long count) {
long previousSize = size;
size += count;
checkSizeAfterOperation(previousSize, "add(" + item + "," + count + ")", size);
}
@Override
public void add(long item, long count) {
if (count < 0) {
// Actually for negative increments we'll need to use the median
// instead of minimum, and accuracy will suffer somewhat.
// Probably makes sense to add an "allow negative increments"
// parameter to constructor.
throw new IllegalArgumentException("Negative increments not implemented");
}
for (int i = 0; i < depth; ++i) {
table[i][hash(item, i)] += count;
}
checkSizeAfterAdd(String.valueOf(item), count);
}
@Override
public void add(String item, long count) {
if (count < 0) {
// Actually for negative increments we'll need to use the median
// instead of minimum, and accuracy will suffer somewhat.
// Probably makes sense to add an "allow negative increments"
// parameter to constructor.
throw new IllegalArgumentException("Negative increments not implemented");
}
int[] buckets = Filter.getHashBuckets(item, depth, width);
for (int i = 0; i < depth; ++i) {
table[i][buckets[i]] += count;
}
checkSizeAfterAdd(item, count);
}
@Override
public long size() {
return size;
}
/**
* The estimate is correct within 'epsilon' * (total item count),
* with probability 'confidence'.
*/
@Override
public long estimateCount(long item) {
long res = Long.MAX_VALUE;
for (int i = 0; i < depth; ++i) {
res = Math.min(res, table[i][hash(item, i)]);
}
return res;
}
@Override
public long estimateCount(String item) {
long res = Long.MAX_VALUE;
int[] buckets = Filter.getHashBuckets(item, depth, width);
for (int i = 0; i < depth; ++i) {
res = Math.min(res, table[i][buckets[i]]);
}
return res;
}
/**
* Merges count min sketches to produce a count min sketch for their combined streams
*
* @param estimators
* @return merged estimator or null if no estimators were provided
* @throws CMSMergeException if estimators are not mergeable (same depth, width and seed)
*/
public static CountMinSketch merge(CountMinSketch... estimators) throws CMSMergeException {
CountMinSketch merged = null;
if (estimators != null && estimators.length > 0) {
int depth = estimators[0].depth;
int width = estimators[0].width;
long[] hashA = Arrays.copyOf(estimators[0].hashA, estimators[0].hashA.length);
long[][] table = new long[depth][width];
long size = 0;
for (CountMinSketch estimator : estimators) {
if (estimator.depth != depth) {
throw new CMSMergeException("Cannot merge estimators of different depth");
}
if (estimator.width != width) {
throw new CMSMergeException("Cannot merge estimators of different width");
}
if (!Arrays.equals(estimator.hashA, hashA)) {
throw new CMSMergeException("Cannot merge estimators of different seed");
}
for (int i = 0; i < table.length; i++) {
for (int j = 0; j < table[i].length; j++) {
table[i][j] += estimator.table[i][j];
}
}
long previousSize = size;
size += estimator.size;
checkSizeAfterOperation(previousSize, "merge(" + estimator + ")", size);
}
merged = new CountMinSketch(depth, width, size, hashA, table);
}
return merged;
}
public static byte[] serialize(CountMinSketch sketch) {
ByteArrayOutputStream bos = new ByteArrayOutputStream();
DataOutputStream s = new DataOutputStream(bos);
try {
s.writeLong(sketch.size);
s.writeInt(sketch.depth);
s.writeInt(sketch.width);
for (int i = 0; i < sketch.depth; ++i) {
s.writeLong(sketch.hashA[i]);
for (int j = 0; j < sketch.width; ++j) {
s.writeLong(sketch.table[i][j]);
}
}
s.close();
return bos.toByteArray();
} catch (IOException e) {
// Shouldn't happen
throw new RuntimeException(e);
}
}
public static CountMinSketch deserialize(byte[] data) {
ByteArrayInputStream bis = new ByteArrayInputStream(data);
DataInputStream s = new DataInputStream(bis);
try {
CountMinSketch sketch = new CountMinSketch();
sketch.size = s.readLong();
sketch.depth = s.readInt();
sketch.width = s.readInt();
sketch.eps = 2.0 / sketch.width;
sketch.confidence = 1 - 1 / Math.pow(2, sketch.depth);
sketch.hashA = new long[sketch.depth];
sketch.table = new long[sketch.depth][sketch.width];
for (int i = 0; i < sketch.depth; ++i) {
sketch.hashA[i] = s.readLong();
for (int j = 0; j < sketch.width; ++j) {
sketch.table[i][j] = s.readLong();
}
}
return sketch;
} catch (IOException e) {
// Shouldn't happen
throw new RuntimeException(e);
}
}
@SuppressWarnings("serial")
protected static class CMSMergeException extends FrequencyMergeException {
public CMSMergeException(String message) {
super(message);
}
}
}
FrequencyMergeException.java 0000664 0000000 0000000 00000001521 13531322035 0034157 0 ustar 00root root 0000000 0000000 stream-lib-2.9.8/src/main/java/com/clearspring/analytics/stream/frequency /*
* Copyright (C) 2011 Clearspring Technologies, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.clearspring.analytics.stream.frequency;
@SuppressWarnings("serial")
public abstract class FrequencyMergeException extends Exception {
public FrequencyMergeException(String message) {
super(message);
}
}
stream-lib-2.9.8/src/main/java/com/clearspring/analytics/stream/frequency/IFrequency.java 0000664 0000000 0000000 00000000377 13531322035 0031520 0 ustar 00root root 0000000 0000000 package com.clearspring.analytics.stream.frequency;
public interface IFrequency {
void add(long item, long count);
void add(String item, long count);
long estimateCount(long item);
long estimateCount(String item);
long size();
}
stream-lib-2.9.8/src/main/java/com/clearspring/analytics/stream/membership/ 0000775 0000000 0000000 00000000000 13531322035 0026726 5 ustar 00root root 0000000 0000000 stream-lib-2.9.8/src/main/java/com/clearspring/analytics/stream/membership/BitSetSerializer.java 0000664 0000000 0000000 00000003053 13531322035 0033016 0 ustar 00root root 0000000 0000000 /*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package com.clearspring.analytics.stream.membership;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.util.BitSet;
public class BitSetSerializer {
public static void serialize(BitSet bs, DataOutputStream dos) throws IOException {
ObjectOutputStream oos = new ObjectOutputStream(dos);
oos.writeObject(bs);
oos.flush();
}
public static BitSet deserialize(DataInputStream dis) throws IOException {
ObjectInputStream ois = new ObjectInputStream(dis);
try {
return (BitSet) ois.readObject();
} catch (ClassNotFoundException e) {
throw new RuntimeException(e);
}
}
}
stream-lib-2.9.8/src/main/java/com/clearspring/analytics/stream/membership/BloomCalculations.java 0000664 0000000 0000000 00000016113 13531322035 0033205 0 ustar 00root root 0000000 0000000 /**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.clearspring.analytics.stream.membership;
/**
* The following calculations are taken from:
* http://www.cs.wisc.edu/~cao/papers/summary-cache/node8.html
* "Bloom Filters - the math"
*
* This class's static methods are meant to facilitate the use of the Bloom
* Filter class by helping to choose correct values of 'bits per element' and
* 'number of hash functions, k'.
*/
public class BloomCalculations {
private static final int maxBuckets = 15;
private static final int minBuckets = 2;
private static final int minK = 1;
private static final int maxK = 8;
private static final int[] optKPerBuckets =
new int[]{1, // dummy K for 0 buckets per element
1, // dummy K for 1 buckets per element
1, 2, 3, 3, 4, 5, 5, 6, 7, 8, 8, 9, 10, 10, 11, 12, 12, 13, 14};
/**
* In the following table, the row 'i' shows false positive rates if i buckets
* per element are used. Column 'j' shows false positive rates if j hash
* functions are used. The first row is 'i=0', the first column is 'j=0'.
* Each cell (i,j) the false positive rate determined by using i buckets per
* element and j hash functions.
*/
static final double[][] probs = new double[][]{
{1.0}, // dummy row representing 0 buckets per element
{1.0, 1.0}, // dummy row representing 1 buckets per element
{1.0, 0.393, 0.400},
{1.0, 0.283, 0.237, 0.253},
{1.0, 0.221, 0.155, 0.147, 0.160},
{1.0, 0.181, 0.109, 0.092, 0.092, 0.101}, // 5
{1.0, 0.154, 0.0804, 0.0609, 0.0561, 0.0578, 0.0638},
{1.0, 0.133, 0.0618, 0.0423, 0.0359, 0.0347, 0.0364},
{1.0, 0.118, 0.0489, 0.0306, 0.024, 0.0217, 0.0216, 0.0229},
{1.0, 0.105, 0.0397, 0.0228, 0.0166, 0.0141, 0.0133, 0.0135, 0.0145},
{1.0, 0.0952, 0.0329, 0.0174, 0.0118, 0.00943, 0.00844, 0.00819, 0.00846}, // 10
{1.0, 0.0869, 0.0276, 0.0136, 0.00864, 0.0065, 0.00552, 0.00513, 0.00509},
{1.0, 0.08, 0.0236, 0.0108, 0.00646, 0.00459, 0.00371, 0.00329, 0.00314},
{1.0, 0.074, 0.0203, 0.00875, 0.00492, 0.00332, 0.00255, 0.00217, 0.00199, 0.00194},
{1.0, 0.0689, 0.0177, 0.00718, 0.00381, 0.00244, 0.00179, 0.00146, 0.00129, 0.00121, 0.0012},
{1.0, 0.0645, 0.0156, 0.00596, 0.003, 0.00183, 0.00128, 0.001, 0.000852, 0.000775, 0.000744}, // 15
{1.0, 0.0606, 0.0138, 0.005, 0.00239, 0.00139, 0.000935, 0.000702, 0.000574, 0.000505, 0.00047, 0.000459},
{1.0, 0.0571, 0.0123, 0.00423, 0.00193, 0.00107, 0.000692, 0.000499, 0.000394, 0.000335, 0.000302, 0.000287, 0.000284},
{1.0, 0.054, 0.0111, 0.00362, 0.00158, 0.000839, 0.000519, 0.00036, 0.000275, 0.000226, 0.000198, 0.000183, 0.000176},
{1.0, 0.0513, 0.00998, 0.00312, 0.0013, 0.000663, 0.000394, 0.000264, 0.000194, 0.000155, 0.000132, 0.000118, 0.000111, 0.000109},
{1.0, 0.0488, 0.00906, 0.0027, 0.00108, 0.00053, 0.000303, 0.000196, 0.00014, 0.000108, 8.89e-05, 7.77e-05, 7.12e-05, 6.79e-05, 6.71e-05} // 20
}; // the first column is a dummy column representing K=0.
/**
* Given the number of buckets that can be used per element, return the optimal
* number of hash functions in order to minimize the false positive rate.
*
* @param bucketsPerElement
* @return The number of hash functions that minimize the false positive rate.
*/
public static int computeBestK(int bucketsPerElement) {
assert bucketsPerElement >= 0;
if (bucketsPerElement >= optKPerBuckets.length) {
return optKPerBuckets[optKPerBuckets.length - 1];
}
return optKPerBuckets[bucketsPerElement];
}
/**
* A wrapper class that holds two key parameters for a Bloom Filter: the
* number of hash functions used, and the number of buckets per element used.
*/
public static final class BloomSpecification {
final int K; // number of hash functions.
final int bucketsPerElement;
public BloomSpecification(int k, int bucketsPerElement) {
K = k;
this.bucketsPerElement = bucketsPerElement;
}
}
/**
* Given a maximum tolerable false positive probability, compute a Bloom
* specification which will give less than the specified false positive rate,
* but minimize the number of buckets per element and the number of hash
* functions used. Because bandwidth (and therefore total bitvector size)
* is considered more expensive than computing power, preference is given
* to minimizing buckets per element rather than number of hash functions.
*
* @param maxFalsePosProb The maximum tolerable false positive rate.
* @return A Bloom Specification which would result in a false positive rate
* less than specified by the function call.
*/
public static BloomSpecification computeBucketsAndK(double maxFalsePosProb) {
// Handle the trivial cases
if (maxFalsePosProb >= probs[minBuckets][minK]) {
return new BloomSpecification(2, optKPerBuckets[2]);
}
if (maxFalsePosProb < probs[maxBuckets][maxK]) {
return new BloomSpecification(maxK, maxBuckets);
}
// First find the minimal required number of buckets:
int bucketsPerElement = 2;
int K = optKPerBuckets[2];
while (probs[bucketsPerElement][K] > maxFalsePosProb) {
bucketsPerElement++;
K = optKPerBuckets[bucketsPerElement];
}
// Now that the number of buckets is sufficient, see if we can relax K
// without losing too much precision.
while (probs[bucketsPerElement][K - 1] <= maxFalsePosProb) {
K--;
}
return new BloomSpecification(K, bucketsPerElement);
}
/**
* Calculate the probability of a false positive given the specified
* number of inserted elements.
*
* @param bucketsPerElement number of inserted elements.
* @param hashCount
* @return probability of a false positive.
*/
public static double getFalsePositiveProbability(int bucketsPerElement, int hashCount) {
// (1 - e^(-k * n / m)) ^ k
return Math.pow(1 - Math.exp(-hashCount * (1 / (double) bucketsPerElement)), hashCount);
}
}
stream-lib-2.9.8/src/main/java/com/clearspring/analytics/stream/membership/BloomFilter.java 0000664 0000000 0000000 00000012666 13531322035 0032022 0 ustar 00root root 0000000 0000000 /**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.clearspring.analytics.stream.membership;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.util.BitSet;
public class BloomFilter extends Filter {
static ICompactSerializer serializer_ = new BloomFilterSerializer();
public static ICompactSerializer serializer() {
return serializer_;
}
private BitSet filter_;
public BloomFilter(int numElements, int bucketsPerElement) {
this(BloomCalculations.computeBestK(bucketsPerElement), new BitSet(numElements * bucketsPerElement + 20));
}
public BloomFilter(int numElements, double maxFalsePosProbability) {
BloomCalculations.BloomSpecification spec = BloomCalculations
.computeBucketsAndK(maxFalsePosProbability);
filter_ = new BitSet(numElements * spec.bucketsPerElement + 20);
hashCount = spec.K;
}
/*
* This version is only used by the deserializer.
*/
BloomFilter(int hashes, BitSet filter) {
hashCount = hashes;
filter_ = filter;
}
public void clear() {
filter_.clear();
}
public int buckets() {
return filter_.size();
}
BitSet filter() {
return filter_;
}
public boolean isPresent(String key) {
for (int bucketIndex : getHashBuckets(key)) {
if (!filter_.get(bucketIndex)) {
return false;
}
}
return true;
}
public boolean isPresent(byte[] key) {
for (int bucketIndex : getHashBuckets(key)) {
if (!filter_.get(bucketIndex)) {
return false;
}
}
return true;
}
/*
@param key -- value whose hash is used to fill
the filter_.
This is a general purpose API.
*/
public void add(String key) {
for (int bucketIndex : getHashBuckets(key)) {
filter_.set(bucketIndex);
}
}
public void add(byte[] key) {
for (int bucketIndex : getHashBuckets(key)) {
filter_.set(bucketIndex);
}
}
public String toString() {
return filter_.toString();
}
ICompactSerializer tserializer() {
return serializer_;
}
int emptyBuckets() {
int n = 0;
for (int i = 0; i < buckets(); i++) {
if (!filter_.get(i)) {
n++;
}
}
return n;
}
public void addAll(BloomFilter other) {
if (this.getHashCount() != other.getHashCount()) {
throw new IllegalArgumentException("Cannot merge filters of different sizes");
}
this.filter().or(other.filter());
}
public Filter merge(Filter... filters) {
BloomFilter merged = new BloomFilter(this.getHashCount(), (BitSet) this.filter().clone());
if (filters == null) {
return merged;
}
for (Filter filter : filters) {
if (!(filter instanceof BloomFilter)) {
throw new IllegalArgumentException("Cannot merge filters of different class");
}
BloomFilter bf = (BloomFilter) filter;
merged.addAll(bf);
}
return merged;
}
/**
* @return a BloomFilter that always returns a positive match, for testing
*/
public static BloomFilter alwaysMatchingBloomFilter() {
BitSet set = new BitSet(64);
set.set(0, 64);
return new BloomFilter(1, set);
}
public static byte[] serialize(BloomFilter filter) {
DataOutputBuffer out = new DataOutputBuffer();
try {
BloomFilter.serializer().serialize(filter, out);
out.close();
} catch (IOException e) {
e.printStackTrace();
}
return out.getData();
}
public static BloomFilter deserialize(byte[] bytes) {
BloomFilter filter = null;
DataInputBuffer in = new DataInputBuffer();
in.reset(bytes, bytes.length);
try {
filter = BloomFilter.serializer().deserialize(in);
in.close();
} catch (IOException e) {
e.printStackTrace();
}
return filter;
}
}
class BloomFilterSerializer implements ICompactSerializer {
public void serialize(BloomFilter bf, DataOutputStream dos)
throws IOException {
dos.writeInt(bf.getHashCount());
BitSetSerializer.serialize(bf.filter(), dos);
}
public BloomFilter deserialize(DataInputStream dis) throws IOException {
int hashes = dis.readInt();
BitSet bs = BitSetSerializer.deserialize(dis);
return new BloomFilter(hashes, bs);
}
}
stream-lib-2.9.8/src/main/java/com/clearspring/analytics/stream/membership/DataInputBuffer.java 0000664 0000000 0000000 00000004653 13531322035 0032624 0 ustar 00root root 0000000 0000000 /**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.clearspring.analytics.stream.membership;
import java.io.ByteArrayInputStream;
import java.io.DataInputStream;
/**
* An implementation of the DataInputStream interface. This instance is completely thread
* unsafe.
*/
public final class DataInputBuffer extends DataInputStream {
private static class Buffer extends ByteArrayInputStream {
public Buffer() {
super(new byte[]{});
}
public void reset(byte[] input, int start, int length) {
this.buf = input;
this.count = start + length;
this.mark = start;
this.pos = start;
}
public int getPosition() {
return pos;
}
public void setPosition(int position) {
pos = position;
}
public int getLength() {
return count;
}
}
private Buffer buffer_;
/**
* Constructs a new empty buffer.
*/
public DataInputBuffer() {
this(new Buffer());
}
private DataInputBuffer(Buffer buffer) {
super(buffer);
this.buffer_ = buffer;
}
/**
* Resets the data that the buffer reads.
*/
public void reset(byte[] input, int length) {
buffer_.reset(input, 0, length);
}
/**
* Resets the data that the buffer reads.
*/
public void reset(byte[] input, int start, int length) {
buffer_.reset(input, start, length);
}
/**
* Returns the length of the input.
*/
public int getLength() {
return buffer_.getLength();
}
public int getPosition() {
return buffer_.getPosition();
}
}
stream-lib-2.9.8/src/main/java/com/clearspring/analytics/stream/membership/DataOutputBuffer.java 0000664 0000000 0000000 00000005504 13531322035 0033021 0 ustar 00root root 0000000 0000000 /**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
n * "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.clearspring.analytics.stream.membership;
import java.io.ByteArrayOutputStream;
import java.io.DataInput;
import java.io.DataOutputStream;
import java.io.IOException;
import java.util.Arrays;
/**
* An implementation of the DataOutputStream interface. This class is completely thread
* unsafe.
*/
public class DataOutputBuffer extends DataOutputStream {
private static class Buffer extends ByteArrayOutputStream {
public byte[] getData() {
return Arrays.copyOf(buf, getLength());
//return buf;
}
public int getLength() {
return count;
}
public void reset() {
count = 0;
}
public void write(DataInput in, int len) throws IOException {
int newcount = count + len;
if (newcount > buf.length) {
byte newbuf[] = new byte[Math.max(buf.length << 1, newcount)];
System.arraycopy(buf, 0, newbuf, 0, count);
buf = newbuf;
}
in.readFully(buf, count, len);
count = newcount;
}
}
private Buffer buffer;
/**
* Constructs a new empty buffer.
*/
public DataOutputBuffer() {
this(new Buffer());
}
private DataOutputBuffer(Buffer buffer) {
super(buffer);
this.buffer = buffer;
}
/**
* Returns the current contents of the buffer. Data is only valid to
* {@link #getLength()}.
*/
public byte[] getData() {
return buffer.getData();
}
/**
* Returns the length of the valid data currently in the buffer.
*/
public int getLength() {
return buffer.getLength();
}
/**
* Resets the buffer to empty.
*/
public DataOutputBuffer reset() {
this.written = 0;
buffer.reset();
return this;
}
/**
* Writes bytes from a DataInput directly into the buffer.
*/
public void write(DataInput in, int length) throws IOException {
buffer.write(in, length);
}
}
stream-lib-2.9.8/src/main/java/com/clearspring/analytics/stream/membership/Filter.java 0000664 0000000 0000000 00000005476 13531322035 0031032 0 ustar 00root root 0000000 0000000 /*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package com.clearspring.analytics.stream.membership;
import java.io.UnsupportedEncodingException;
import java.lang.reflect.Method;
import com.clearspring.analytics.hash.MurmurHash;
public abstract class Filter {
int hashCount;
public int getHashCount() {
return hashCount;
}
public int[] getHashBuckets(String key) {
return Filter.getHashBuckets(key, hashCount, buckets());
}
public int[] getHashBuckets(byte[] key) {
return Filter.getHashBuckets(key, hashCount, buckets());
}
abstract int buckets();
public abstract void add(String key);
public abstract boolean isPresent(String key);
// for testing
abstract int emptyBuckets();
@SuppressWarnings("unchecked")
ICompactSerializer getSerializer() {
Method method = null;
try {
method = getClass().getMethod("serializer");
return (ICompactSerializer) method.invoke(null);
} catch (Exception e) {
throw new RuntimeException(e);
}
}
// Murmur is faster than an SHA-based approach and provides as-good collision
// resistance. The combinatorial generation approach described in
// https://gnunet.org/sites/default/files/LessHashing2006Kirsch.pdf
// does prove to work in actual tests, and is obviously faster
// than performing further iterations of murmur.
public static int[] getHashBuckets(String key, int hashCount, int max) {
byte[] b;
try {
b = key.getBytes("UTF-16");
} catch (UnsupportedEncodingException e) {
throw new RuntimeException(e);
}
return getHashBuckets(b, hashCount, max);
}
static int[] getHashBuckets(byte[] b, int hashCount, int max) {
int[] result = new int[hashCount];
int hash1 = MurmurHash.hash(b, b.length, 0);
int hash2 = MurmurHash.hash(b, b.length, hash1);
for (int i = 0; i < hashCount; i++) {
result[i] = Math.abs((hash1 + i * hash2) % max);
}
return result;
}
}
stream-lib-2.9.8/src/main/java/com/clearspring/analytics/stream/membership/ICompactSerializer.java 0000664 0000000 0000000 00000003257 13531322035 0033331 0 ustar 00root root 0000000 0000000 /**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.clearspring.analytics.stream.membership;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.IOException;
/**
* Allows for the controlled serialization/deserialization of a given type.
*/
public interface ICompactSerializer {
/**
* Serialize the specified type into the specified DataOutputStream instance.
*
* @param t type that needs to be serialized
* @param dos DataOutput into which serialization needs to happen.
* @throws IOException
*/
public void serialize(T t, DataOutputStream dos) throws IOException;
/**
* Deserialize into the specified DataInputStream instance.
*
* @param dis DataInput from which deserialization needs to happen.
* @return the type that was deserialized
* @throws IOException
*/
public T deserialize(DataInputStream dis) throws IOException;
}
stream-lib-2.9.8/src/main/java/com/clearspring/analytics/stream/quantile/ 0000775 0000000 0000000 00000000000 13531322035 0026415 5 ustar 00root root 0000000 0000000 stream-lib-2.9.8/src/main/java/com/clearspring/analytics/stream/quantile/GroupTree.java 0000664 0000000 0000000 00000030000 13531322035 0031165 0 ustar 00root root 0000000 0000000 /*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.clearspring.analytics.stream.quantile;
import java.util.ArrayDeque;
import java.util.Deque;
import java.util.Iterator;
import com.clearspring.analytics.util.AbstractIterator;
import com.clearspring.analytics.util.Preconditions;
/**
* A tree containing TDigest.Group. This adds to the normal NavigableSet the
* ability to sum up the size of elements to the left of a particular group.
*/
public class GroupTree implements Iterable {
private int count;
private int size;
private int depth;
private TDigest.Group leaf;
private GroupTree left, right;
public GroupTree() {
count = size = depth = 0;
leaf = null;
left = right = null;
}
public GroupTree(TDigest.Group leaf) {
size = depth = 1;
this.leaf = leaf;
count = leaf.count();
left = right = null;
}
public GroupTree(GroupTree left, GroupTree right) {
this.left = left;
this.right = right;
count = left.count + right.count;
size = left.size + right.size;
rebalance();
leaf = this.right.first();
}
public void add(TDigest.Group group) {
if (size == 0) {
leaf = group;
depth = 1;
count = group.count();
size = 1;
return;
} else if (size == 1) {
int order = group.compareTo(leaf);
if (order < 0) {
left = new GroupTree(group);
right = new GroupTree(leaf);
} else if (order > 0) {
left = new GroupTree(leaf);
right = new GroupTree(group);
leaf = group;
}
} else if (group.compareTo(leaf) < 0) {
left.add(group);
} else {
right.add(group);
}
count += group.count();
size++;
depth = Math.max(left.depth, right.depth) + 1;
rebalance();
}
private void rebalance() {
int l = left.depth();
int r = right.depth();
if (l > r + 1) {
if (left.left.depth() > left.right.depth()) {
rotate(left.left.left, left.left.right, left.right, right);
} else {
rotate(left.left, left.right.left, left.right.right, right);
}
} else if (r > l + 1) {
if (right.left.depth() > right.right.depth()) {
rotate(left, right.left.left, right.left.right, right.right);
} else {
rotate(left, right.left, right.right.left, right.right.right);
}
} else {
depth = Math.max(left.depth(), right.depth()) + 1;
}
}
private void rotate(GroupTree a, GroupTree b, GroupTree c, GroupTree d) {
left = new GroupTree(a, b);
right = new GroupTree(c, d);
count = left.count + right.count;
size = left.size + right.size;
depth = Math.max(left.depth(), right.depth()) + 1;
leaf = right.first();
}
private int depth() {
return depth;
}
public int size() {
return size;
}
/**
* @return the number of items strictly before the current element
*/
public int headCount(TDigest.Group base) {
if (size == 0) {
return 0;
} else if (left == null) {
return leaf.compareTo(base) < 0 ? 1 : 0;
} else {
if (base.compareTo(leaf) < 0) {
return left.headCount(base);
} else {
return left.size + right.headCount(base);
}
}
}
/**
* @return the sum of the size() function for all elements strictly before the current element.
*/
public int headSum(TDigest.Group base) {
if (size == 0) {
return 0;
} else if (left == null) {
return leaf.compareTo(base) < 0 ? count : 0;
} else {
if (base.compareTo(leaf) <= 0) {
return left.headSum(base);
} else {
return left.count + right.headSum(base);
}
}
}
/**
* @return the first Group in this set
*/
public TDigest.Group first() {
Preconditions.checkState(size > 0, "No first element of empty set");
if (left == null) {
return leaf;
} else {
return left.first();
}
}
/**
* Iteratres through all groups in the tree.
*/
public Iterator iterator() {
return iterator(null);
}
/**
* Iterates through all of the Groups in this tree in ascending order of means
*
* @param start The place to start this subset. Remember that Groups are ordered by mean *and* id.
* @return An iterator that goes through the groups in order of mean and id starting at or after the
* specified Group.
*/
private Iterator iterator(final TDigest.Group start) {
return new AbstractIterator() {
{
stack = new ArrayDeque();
push(GroupTree.this, start);
}
Deque stack;
// recurses down to the leaf that is >= start
// pending right hand branches on the way are put on the stack
private void push(GroupTree z, TDigest.Group start) {
while (z.left != null) {
if (start == null || start.compareTo(z.leaf) < 0) {
// remember we will have to process the right hand branch later
stack.push(z.right);
// note that there is no guarantee that z.left has any good data
z = z.left;
} else {
// if the left hand branch doesn't contain start, then no push
z = z.right;
}
}
// put the leaf value on the stack if it is valid
if (start == null || z.leaf.compareTo(start) >= 0) {
stack.push(z);
}
}
@Override
protected TDigest.Group computeNext() {
GroupTree r = stack.poll();
while (r != null && r.left != null) {
// unpack r onto the stack
push(r, start);
r = stack.poll();
}
// at this point, r == null or r.left == null
// if r == null, stack is empty and we are done
// if r != null, then r.left != null and we have a result
if (r != null) {
return r.leaf;
}
return endOfData();
}
};
}
public void remove(TDigest.Group base) {
Preconditions.checkState(size > 0, "Cannot remove from empty set");
if (size == 1) {
Preconditions.checkArgument(base.compareTo(leaf) == 0, "Element %s not found", base);
count = size = 0;
leaf = null;
} else {
if (base.compareTo(leaf) < 0) {
if (left.size > 1) {
left.remove(base);
count -= base.count();
size--;
rebalance();
} else {
size = right.size;
count = right.count;
depth = right.depth;
leaf = right.leaf;
left = right.left;
right = right.right;
}
} else {
if (right.size > 1) {
right.remove(base);
leaf = right.first();
count -= base.count();
size--;
rebalance();
} else {
size = left.size;
count = left.count;
depth = left.depth;
leaf = left.leaf;
right = left.right;
left = left.left;
}
}
}
}
/**
* @return the largest element less than or equal to base
*/
public TDigest.Group floor(TDigest.Group base) {
if (size == 0) {
return null;
} else {
if (size == 1) {
return base.compareTo(leaf) >= 0 ? leaf : null;
} else {
if (base.compareTo(leaf) < 0) {
return left.floor(base);
} else {
TDigest.Group floor = right.floor(base);
if (floor == null) {
floor = left.last();
}
return floor;
}
}
}
}
public TDigest.Group last() {
Preconditions.checkState(size > 0, "Cannot find last element of empty set");
if (size == 1) {
return leaf;
} else {
return right.last();
}
}
/**
* @return the smallest element greater than or equal to base.
*/
public TDigest.Group ceiling(TDigest.Group base) {
if (size == 0) {
return null;
} else if (size == 1) {
return base.compareTo(leaf) <= 0 ? leaf : null;
} else {
if (base.compareTo(leaf) < 0) {
TDigest.Group r = left.ceiling(base);
if (r == null) {
r = right.first();
}
return r;
} else {
return right.ceiling(base);
}
}
}
/**
* @return the subset of elements equal to or greater than base.
*/
public Iterable tailSet(final TDigest.Group start) {
return new Iterable() {
@Override
public Iterator iterator() {
return GroupTree.this.iterator(start);
}
};
}
public int sum() {
return count;
}
public void checkBalance() {
if (left != null) {
Preconditions.checkState(Math.abs(left.depth() - right.depth()) < 2, "Imbalanced");
int l = left.depth();
int r = right.depth();
Preconditions.checkState(depth == Math.max(l, r) + 1, "Depth doesn't match children");
Preconditions.checkState(size == left.size + right.size, "Sizes don't match children");
Preconditions.checkState(count == left.count + right.count, "Counts don't match children");
Preconditions.checkState(leaf.compareTo(right.first()) == 0, "Split is wrong %.5d != %.5d or %d != %d", leaf.mean(), right.first().mean(), leaf.id(), right.first().id());
left.checkBalance();
right.checkBalance();
}
}
public void print(int depth) {
for (int i = 0; i < depth; i++) {
System.out.printf("| ");
}
int imbalance = Math.abs((left != null ? left.depth : 1) - (right != null ? right.depth : 1));
System.out.printf("%s%s, %d, %d, %d\n", (imbalance > 1 ? "* " : "") + (right != null && leaf.compareTo(right.first()) != 0 ? "+ " : ""), leaf, size, count, this.depth);
if (left != null) {
left.print(depth + 1);
right.print(depth + 1);
}
}
}
stream-lib-2.9.8/src/main/java/com/clearspring/analytics/stream/quantile/IQuantileEstimator.java 0000664 0000000 0000000 00000000232 13531322035 0033040 0 ustar 00root root 0000000 0000000 package com.clearspring.analytics.stream.quantile;
public interface IQuantileEstimator {
void offer(long value);
long getQuantile(double q);
}
stream-lib-2.9.8/src/main/java/com/clearspring/analytics/stream/quantile/QDigest.java 0000664 0000000 0000000 00000027700 13531322035 0030626 0 ustar 00root root 0000000 0000000 package com.clearspring.analytics.stream.quantile;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import it.unimi.dsi.fastutil.Hash;
import it.unimi.dsi.fastutil.longs.Long2LongOpenHashMap;
import it.unimi.dsi.fastutil.longs.LongArrayFIFOQueue;
/**
* Q-Digest datastructure.
*
* Answers approximate quantile queries: actual rank of the result of query(q)
* is in q-eps .. q+eps, where eps = log(sigma)/compressionFactor
* and log(sigma) is ceiling of binary log of the largest value inserted,
* i.e. height of the tree.
*
* Two Q-Digests can be joined (see {@link #unionOf(QDigest, QDigest)}).
*
* Source:
* N.Shrivastava, C.Buragohain, D.Agrawal
* Medians and Beyond: New Aggregation Techniques for Sensor Networks
* http://www.cs.virginia.edu/~son/cs851/papers/ucsb.sensys04.pdf
*
* This is a slightly modified version.
* There is a small problem with the compression algorithm in the paper,
* see https://plus.google.com/u/0/109909935680879695595/posts/768ZZ9Euqz6
*
* So we use a different algorithm here:
*
* When an item is inserted, we compress along the path to root from the item's leaf
* When the structure becomes too large (above the theoretical bound), or
* at "too destructive" operations (e.g. union or rebuild) we compress fully
*
*
* Note that the accuracy of the structure does NOT suffer if "property 2"
* from the paper is violated (in fact, restoring property 2 at any node
* decreases accuracy).
*
* So we can say that we preserve the paper's accuracy and memory consumption claims.
*/
public class QDigest implements IQuantileEstimator {
private static final Comparator RANGES_COMPARATOR = new Comparator() {
@Override
public int compare(long[] ra, long[] rb) {
long rightA = ra[1], rightB = rb[1], sizeA = ra[1] - ra[0], sizeB = rb[1] - rb[0];
if (rightA < rightB) {
return -1;
}
if (rightA > rightB) {
return 1;
}
if (sizeA < sizeB) {
return -1;
}
if (sizeA > sizeB) {
return 1;
}
return 0;
}
};
private static final int MAP_INITIAL_SIZE = Hash.DEFAULT_INITIAL_SIZE;
private static final float MAP_LOAD_FACTOR = Hash.VERY_FAST_LOAD_FACTOR;
private long size;
private long capacity = 1;
private double compressionFactor;
private Long2LongOpenHashMap node2count = new Long2LongOpenHashMap(MAP_INITIAL_SIZE, MAP_LOAD_FACTOR);
public QDigest(double compressionFactor) {
this.compressionFactor = compressionFactor;
}
private long value2leaf(long x) {
return capacity + x;
}
private long leaf2value(long id) {
return id - capacity;
}
private boolean isRoot(long id) {
return id == 1;
}
private boolean isLeaf(long id) {
return id >= capacity;
}
private long sibling(long id) {
return (id % 2 == 0) ? (id + 1) : (id - 1);
}
private long parent(long id) {
return id / 2;
}
private long leftChild(long id) {
return 2 * id;
}
private long rightChild(long id) {
return 2 * id + 1;
}
private long rangeLeft(long id) {
while (!isLeaf(id)) {
id = leftChild(id);
}
return leaf2value(id);
}
private long rangeRight(long id) {
while (!isLeaf(id)) {
id = rightChild(id);
}
return leaf2value(id);
}
@Override
public void offer(long value) {
if (value < 0 || value > Long.MAX_VALUE / 2) {
throw new IllegalArgumentException("Can only accept values in the range 0.." + Long.MAX_VALUE / 2 + ", got " + value);
}
// Rebuild if the value is too large for the current tree height
if (value >= capacity) {
rebuildToCapacity(Long.highestOneBit(value) << 1);
}
long leaf = value2leaf(value);
node2count.addTo(leaf, 1);
size++;
// Always compress at the inserted node, and recompress fully
// if the tree becomes too large.
// This is one sensible strategy which both is fast and keeps
// the tree reasonably small (within the theoretical bound of 3k nodes)
compressUpward(leaf);
if (node2count.size() > 3 * compressionFactor) {
compressFully();
}
}
public static QDigest unionOf(QDigest a, QDigest b) {
if (a.compressionFactor != b.compressionFactor) {
throw new IllegalArgumentException(
"Compression factors must be the same: " +
"left is " + a.compressionFactor + ", " +
"right is " + b.compressionFactor);
}
if (a.capacity > b.capacity) {
return unionOf(b, a);
}
QDigest res = new QDigest(a.compressionFactor);
res.capacity = a.capacity;
res.size = a.size + b.size;
for (long k : a.node2count.keySet()) {
res.node2count.put(k, a.node2count.get(k));
}
if (b.capacity > res.capacity) {
res.rebuildToCapacity(b.capacity);
}
for (long k : b.node2count.keySet()) {
res.node2count.put(k, b.get(k) + res.get(k));
}
res.compressFully();
return res;
}
private void rebuildToCapacity(long newCapacity) {
Long2LongOpenHashMap newNode2count = new Long2LongOpenHashMap(MAP_INITIAL_SIZE, MAP_LOAD_FACTOR);
// rebuild to newLogCapacity.
// This means that our current tree becomes a leftmost subtree
// of the new tree.
// E.g. when rebuilding a tree with logCapacity = 2
// (i.e. storing values in 0..3) to logCapacity = 5 (i.e. 0..31):
// node 1 => 8 (+= 7 = 2^0*(2^3-1))
// nodes 2..3 => 16..17 (+= 14 = 2^1*(2^3-1))
// nodes 4..7 => 32..35 (+= 28 = 2^2*(2^3-1))
// This is easy to see if you draw it on paper.
// Process the keys by "layers" in the original tree.
long scaleR = newCapacity / capacity - 1;
Long[] keys = node2count.keySet().toArray(new Long[node2count.size()]);
Arrays.sort(keys);
long scaleL = 1;
for (long k : keys) {
while (scaleL <= k / 2) {
scaleL <<= 1;
}
newNode2count.put(k + scaleL * scaleR, node2count.get(k));
}
node2count = newNode2count;
capacity = newCapacity;
compressFully();
}
private void compressFully() {
// Restore property 2 at each node.
Long[] allNodes = node2count.keySet().toArray(new Long[node2count.size()]);
for (long node : allNodes) {
// The root node is not compressible: it has no parent and no sibling
if (!isRoot(node)) {
compressDownward(node);
}
}
}
/**
* Restore P2 at node and upward the spine. Note that P2 can vanish
* at some nodes sideways as a result of this. We'll fix that later
* in compressFully when needed.
*/
private void compressUpward(long node) {
double threshold = Math.floor(size / compressionFactor);
long atNode = get(node);
while (!isRoot(node)) {
if (atNode > threshold) {
break;
}
long atSibling = get(sibling(node));
if (atNode + atSibling > threshold) {
break;
}
long atParent = get(parent(node));
if (atNode + atSibling + atParent > threshold) {
break;
}
node2count.addTo(parent(node), atNode + atSibling);
node2count.remove(node);
if (atSibling > 0) {
node2count.remove(sibling(node));
}
node = parent(node);
atNode = atParent + atNode + atSibling;
}
}
/**
* Restore P2 at seedNode and guarantee that no new violations of P2 appeared.
*/
private void compressDownward(long seedNode) {
double threshold = Math.floor(size / compressionFactor);
// P2 check same as above but shorter and slower (and invoked rarely)
LongArrayFIFOQueue q = new LongArrayFIFOQueue();
q.enqueue(seedNode);
while (!q.isEmpty()) {
long node = q.dequeueLong();
long atNode = get(node);
long atSibling = get(sibling(node));
if (atNode == 0 && atSibling == 0) {
continue;
}
long atParent = get(parent(node));
if (atParent + atNode + atSibling > threshold) {
continue;
}
node2count.addTo(parent(node), atNode + atSibling);
node2count.remove(node);
node2count.remove(sibling(node));
// Now P2 could have vanished at the node's and sibling's subtrees since they decreased.
if (!isLeaf(node)) {
q.enqueue(leftChild(node));
q.enqueue(leftChild(sibling(node)));
}
}
}
private long get(long node) {
return node2count.get(node);
}
@Override
public long getQuantile(double q) {
List ranges = toAscRanges();
long s = 0;
for (long[] r : ranges) {
s += r[2];
if (s > q * size) {
return r[1];
}
}
return ranges.get(ranges.size() - 1)[1];
}
public List toAscRanges() {
List ranges = new ArrayList();
for (long key : node2count.keySet()) {
ranges.add(new long[]{rangeLeft(key), rangeRight(key), node2count.get(key)});
}
Collections.sort(ranges, RANGES_COMPARATOR);
return ranges;
}
public String toString() {
List ranges = toAscRanges();
StringBuilder res = new StringBuilder();
for (long[] range : ranges) {
if (res.length() > 0) {
res.append(", ");
}
res.append(range[0]).append(" .. ").append(range[1]).append(": ").append(range[2]);
}
return res.toString();
}
public static byte[] serialize(QDigest d) {
ByteArrayOutputStream bos = new ByteArrayOutputStream();
DataOutputStream s = new DataOutputStream(bos);
try {
s.writeLong(d.size);
s.writeDouble(d.compressionFactor);
s.writeLong(d.capacity);
s.writeInt(d.node2count.size());
for (long k : d.node2count.keySet()) {
s.writeLong(k);
s.writeLong(d.node2count.get(k));
}
s.close();
return bos.toByteArray();
} catch (IOException e) {
// Should never happen
throw new RuntimeException(e);
}
}
public static QDigest deserialize(byte[] b) {
ByteArrayInputStream bis = new ByteArrayInputStream(b);
DataInputStream s = new DataInputStream(bis);
try {
long size = s.readLong();
double compressionFactor = s.readDouble();
long capacity = s.readLong();
int count = s.readInt();
QDigest d = new QDigest(compressionFactor);
d.size = size;
d.capacity = capacity;
for (int i = 0; i < count; ++i) {
long k = s.readLong();
long n = s.readLong();
d.node2count.put(k, n);
}
return d;
} catch (IOException e) {
throw new RuntimeException(e);
}
}
// For debugging purposes.
public long computeActualSize() {
long res = 0;
for (long x : node2count.values()) res += x;
return res;
}
}
stream-lib-2.9.8/src/main/java/com/clearspring/analytics/stream/quantile/TDigest.java 0000664 0000000 0000000 00000043051 13531322035 0030626 0 ustar 00root root 0000000 0000000 /*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.clearspring.analytics.stream.quantile;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Random;
import java.util.concurrent.atomic.AtomicInteger;
import java.nio.ByteBuffer;
import com.clearspring.analytics.util.Lists;
import com.clearspring.analytics.util.Preconditions;
/**
* Adaptive histogram based on something like streaming k-means crossed with Q-digest.
*
* The special characteristics of this algorithm are:
*
* a) smaller summaries than Q-digest
*
* b) works on doubles as well as integers.
*
* c) provides part per million accuracy for extreme quantiles and typically <1000 ppm accuracy for middle quantiles
*
* d) fast
*
* e) simple
*
* f) test coverage > 90%
*
* g) easy to adapt for use with map-reduce
*/
public class TDigest {
private Random gen;
private double compression = 100;
private GroupTree summary = new GroupTree();
private int count = 0;
private boolean recordAllData = false;
/**
* A histogram structure that will record a sketch of a distribution.
*
* @param compression How should accuracy be traded for size? A value of N here will give quantile errors
* almost always less than 3/N with considerably smaller errors expected for extreme
* quantiles. Conversely, you should expect to track about 5 N centroids for this
* accuracy.
*/
public TDigest(double compression) {
this(compression, new Random());
}
public TDigest(double compression, Random random) {
this.compression = compression;
gen = random;
}
/**
* Adds a sample to a histogram.
*
* @param x The value to add.
*/
public void add(double x) {
add(x, 1);
}
/**
* Adds a sample to a histogram.
*
* @param x The value to add.
* @param w The weight of this point.
*/
public void add(double x, int w) {
// note that because of a zero id, this will be sorted *before* any existing Group with the same mean
Group base = createGroup(x, 0);
add(x, w, base);
}
private void add(double x, int w, Group base) {
Group start = summary.floor(base);
if (start == null) {
start = summary.ceiling(base);
}
if (start == null) {
summary.add(Group.createWeighted(x, w, base.data()));
count = w;
} else {
Iterable neighbors = summary.tailSet(start);
double minDistance = Double.MAX_VALUE;
int lastNeighbor = 0;
int i = summary.headCount(start);
for (Group neighbor : neighbors) {
double z = Math.abs(neighbor.mean() - x);
if (z <= minDistance) {
minDistance = z;
lastNeighbor = i;
} else {
break;
}
i++;
}
Group closest = null;
int sum = summary.headSum(start);
i = summary.headCount(start);
double n = 1;
for (Group neighbor : neighbors) {
if (i > lastNeighbor) {
break;
}
double z = Math.abs(neighbor.mean() - x);
double q = (sum + neighbor.count() / 2.0) / count;
double k = 4 * count * q * (1 - q) / compression;
// this slightly clever selection method improves accuracy with lots of repeated points
if (z == minDistance && neighbor.count() + w <= k) {
if (gen.nextDouble() < 1 / n) {
closest = neighbor;
}
n++;
}
sum += neighbor.count();
i++;
}
if (closest == null) {
summary.add(Group.createWeighted(x, w, base.data()));
} else {
summary.remove(closest);
closest.add(x, w, base.data());
summary.add(closest);
}
count += w;
if (summary.size() > 100 * compression) {
// something such as sequential ordering of data points
// has caused a pathological expansion of our summary.
// To fight this, we simply replay the current centroids
// in random order.
// this causes us to forget the diagnostic recording of data points
compress();
}
}
}
public void add(TDigest other) {
List tmp = Lists.newArrayList(other.summary);
Collections.shuffle(tmp, gen);
for (Group group : tmp) {
add(group.mean(), group.count(), group);
}
}
public static TDigest merge(double compression, Iterable subData) {
Preconditions.checkArgument(subData.iterator().hasNext(), "Can't merge 0 digests");
List elements = Lists.newArrayList(subData);
int n = Math.max(1, elements.size() / 4);
TDigest r = new TDigest(compression, elements.get(0).gen);
if (elements.get(0).recordAllData) {
r.recordAllData();
}
for (int i = 0; i < elements.size(); i += n) {
if (n > 1) {
r.add(merge(compression, elements.subList(i, Math.min(i + n, elements.size()))));
} else {
r.add(elements.get(i));
}
}
return r;
}
public void compress() {
compress(summary);
}
private void compress(GroupTree other) {
TDigest reduced = new TDigest(compression, gen);
if (recordAllData) {
reduced.recordAllData();
}
List tmp = Lists.newArrayList(other);
Collections.shuffle(tmp, gen);
for (Group group : tmp) {
reduced.add(group.mean(), group.count(), group);
}
summary = reduced.summary;
}
/**
* Returns the number of samples represented in this histogram. If you want to know how many
* centroids are being used, try centroids().size().
*
* @return the number of samples that have been added.
*/
public int size() {
return count;
}
/**
* @param x the value at which the CDF should be evaluated
* @return the approximate fraction of all samples that were less than or equal to x.
*/
public double cdf(double x) {
GroupTree values = summary;
if (values.size() == 0) {
return Double.NaN;
} else if (values.size() == 1) {
return x < values.first().mean() ? 0 : 1;
} else {
double r = 0;
// we scan a across the centroids
Iterator it = values.iterator();
Group a = it.next();
// b is the look-ahead to the next centroid
Group b = it.next();
// initially, we set left width equal to right width
double left = (b.mean() - a.mean()) / 2;
double right = left;
// scan to next to last element
while (it.hasNext()) {
if (x < a.mean() + right) {
return (r + a.count() * interpolate(x, a.mean() - left, a.mean() + right)) / count;
}
r += a.count();
a = b;
b = it.next();
left = right;
right = (b.mean() - a.mean()) / 2;
}
// for the last element, assume right width is same as left
left = right;
a = b;
if (x < a.mean() + right) {
return (r + a.count() * interpolate(x, a.mean() - left, a.mean() + right)) / count;
} else {
return 1;
}
}
}
/**
* @param q The quantile desired. Can be in the range [0,1].
* @return The minimum value x such that we think that the proportion of samples is <= x is q.
*/
public double quantile(double q) {
GroupTree values = summary;
Preconditions.checkArgument(values.size() > 1);
Iterator it = values.iterator();
Group center = it.next();
Group leading = it.next();
if (!it.hasNext()) {
// only two centroids because of size limits
// both a and b have to have just a single element
double diff = (leading.mean() - center.mean()) / 2;
if (q > 0.75) {
return leading.mean() + diff * (4 * q - 3);
} else {
return center.mean() + diff * (4 * q - 1);
}
} else {
q *= count;
double right = (leading.mean() - center.mean()) / 2;
// we have nothing else to go on so make left hanging width same as right to start
double left = right;
double t = center.count();
while (it.hasNext()) {
if (t + center.count() / 2 >= q) {
// left side of center
return center.mean() - left * 2 * (q - t) / center.count();
} else if (t + leading.count() >= q) {
// right of b but left of the left-most thing beyond
return center.mean() + right * 2.0 * (center.count() - (q - t)) / center.count();
}
t += center.count();
center = leading;
leading = it.next();
left = right;
right = (leading.mean() - center.mean()) / 2;
}
// ran out of data ... assume final width is symmetrical
center = leading;
left = right;
if (t + center.count() / 2 >= q) {
// left side of center
return center.mean() - left * 2 * (q - t) / center.count();
} else if (t + leading.count() >= q) {
// right of center but left of leading
return center.mean() + right * 2.0 * (center.count() - (q - t)) / center.count();
} else {
// shouldn't be possible
return 1;
}
}
}
public int centroidCount() {
return summary.size();
}
public Iterable extends Group> centroids() {
return summary;
}
public double compression() {
return compression;
}
/**
* Sets up so that all centroids will record all data assigned to them. For testing only, really.
*/
public TDigest recordAllData() {
recordAllData = true;
return this;
}
/**
* Returns an upper bound on the number bytes that will be required to represent this histogram.
*/
public int byteSize() {
return 4 + 8 + 4 + summary.size() * 12;
}
/**
* Returns an upper bound on the number of bytes that will be required to represent this histogram in
* the tighter representation.
*/
public int smallByteSize() {
int bound = byteSize();
ByteBuffer buf = ByteBuffer.allocate(bound);
asSmallBytes(buf);
return buf.position();
}
public final static int VERBOSE_ENCODING = 1;
public final static int SMALL_ENCODING = 2;
/**
* Outputs a histogram as bytes using a particularly cheesy encoding.
*/
public void asBytes(ByteBuffer buf) {
buf.putInt(VERBOSE_ENCODING);
buf.putDouble(compression());
buf.putInt(summary.size());
for (Group group : summary) {
buf.putDouble(group.mean());
}
for (Group group : summary) {
buf.putInt(group.count());
}
}
public void asSmallBytes(ByteBuffer buf) {
buf.putInt(SMALL_ENCODING);
buf.putDouble(compression());
buf.putInt(summary.size());
double x = 0;
for (Group group : summary) {
double delta = group.mean() - x;
x = group.mean();
buf.putFloat((float) delta);
}
for (Group group : summary) {
int n = group.count();
encode(buf, n);
}
}
public static void encode(ByteBuffer buf, int n) {
int k = 0;
while (n < 0 || n > 0x7f) {
byte b = (byte) (0x80 | (0x7f & n));
buf.put(b);
n = n >>> 7;
k++;
Preconditions.checkState(k < 6);
}
buf.put((byte) n);
}
public static int decode(ByteBuffer buf) {
int v = buf.get();
int z = 0x7f & v;
int shift = 7;
while ((v & 0x80) != 0) {
Preconditions.checkState(shift <= 28);
v = buf.get();
z += (v & 0x7f) << shift;
shift += 7;
}
return z;
}
/**
* Reads a histogram from a byte buffer
*
* @return The new histogram structure
*/
public static TDigest fromBytes(ByteBuffer buf) {
int encoding = buf.getInt();
if (encoding == VERBOSE_ENCODING) {
double compression = buf.getDouble();
TDigest r = new TDigest(compression);
int n = buf.getInt();
double[] means = new double[n];
for (int i = 0; i < n; i++) {
means[i] = buf.getDouble();
}
for (int i = 0; i < n; i++) {
r.add(means[i], buf.getInt());
}
return r;
} else if (encoding == SMALL_ENCODING) {
double compression = buf.getDouble();
TDigest r = new TDigest(compression);
int n = buf.getInt();
double[] means = new double[n];
double x = 0;
for (int i = 0; i < n; i++) {
double delta = buf.getFloat();
x += delta;
means[i] = x;
}
for (int i = 0; i < n; i++) {
int z = decode(buf);
r.add(means[i], z);
}
return r;
} else {
throw new IllegalStateException("Invalid format for serialized histogram");
}
}
private Group createGroup(double mean, int id) {
return new Group(mean, id, recordAllData);
}
private double interpolate(double x, double x0, double x1) {
return (x - x0) / (x1 - x0);
}
public static class Group implements Comparable {
private static final AtomicInteger uniqueCount = new AtomicInteger(1);
private double centroid = 0;
private int count = 0;
private int id;
private List actualData = null;
private Group(boolean record) {
id = uniqueCount.incrementAndGet();
if (record) {
actualData = Lists.newArrayList();
}
}
public Group(double x) {
this(false);
start(x, uniqueCount.getAndIncrement());
}
public Group(double x, int id) {
this(false);
start(x, id);
}
public Group(double x, int id, boolean record) {
this(record);
start(x, id);
}
private void start(double x, int id) {
this.id = id;
add(x, 1);
}
public void add(double x, int w) {
if (actualData != null) {
actualData.add(x);
}
count += w;
centroid += w * (x - centroid) / count;
}
public double mean() {
return centroid;
}
public int count() {
return count;
}
public int id() {
return id;
}
@Override
public String toString() {
return "Group{" +
"centroid=" + centroid +
", count=" + count +
'}';
}
@Override
public int hashCode() {
return id;
}
@Override
public int compareTo(Group o) {
int r = Double.compare(centroid, o.centroid);
if (r == 0) {
r = id - o.id;
}
return r;
}
public Iterable extends Double> data() {
return actualData;
}
public static Group createWeighted(double x, int w, Iterable extends Double> data) {
Group r = new Group(data != null);
r.add(x, w, data);
return r;
}
private void add(double x, int w, Iterable extends Double> data) {
if (actualData != null) {
if (data != null) {
for (Double old : data) {
actualData.add(old);
}
} else {
actualData.add(x);
}
}
count += w;
centroid += w * (x - centroid) / count;
}
}
}
stream-lib-2.9.8/src/main/java/com/clearspring/analytics/util/ 0000775 0000000 0000000 00000000000 13531322035 0024255 5 ustar 00root root 0000000 0000000 stream-lib-2.9.8/src/main/java/com/clearspring/analytics/util/AbstractIterator.java 0000664 0000000 0000000 00000004416 13531322035 0030402 0 ustar 00root root 0000000 0000000 /*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.clearspring.analytics.util;
import java.util.Iterator;
import java.util.NoSuchElementException;
/**
* Rough and ready clone of the Guava AbstractIterator. I just did this
* to avoid needing to add the guava dependency. It would be better to
* just use quava.
*/
public abstract class AbstractIterator implements Iterator {
private enum State {
NOT_STARTED, DONE, HAS_DATA, EMPTY
}
private T next;
private State currentState = State.NOT_STARTED;
@Override
public boolean hasNext() {
switch (currentState) {
case DONE:
return false;
case NOT_STARTED:
currentState = State.HAS_DATA;
next = computeNext();
break;
case HAS_DATA:
return true;
case EMPTY:
currentState = State.HAS_DATA;
next = computeNext();
break;
}
return currentState != State.DONE;
}
@Override
public T next() {
if (hasNext()) {
T r = next;
currentState = State.EMPTY;
return r;
} else {
throw new NoSuchElementException();
}
}
@Override
public void remove() {
throw new UnsupportedOperationException("Can't remove from an abstract iterator");
}
protected abstract T computeNext();
public T endOfData() {
currentState = State.DONE;
return null;
}
}
stream-lib-2.9.8/src/main/java/com/clearspring/analytics/util/Bits.java 0000664 0000000 0000000 00000003037 13531322035 0026024 0 ustar 00root root 0000000 0000000 /*
* Copyright (C) 2011 Clearspring Technologies, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.clearspring.analytics.util;
import java.io.ByteArrayInputStream;
import java.io.DataInput;
import java.io.DataInputStream;
import java.io.IOException;
public class Bits {
public static int[] getBits(byte[] mBytes) throws IOException {
int bitSize = mBytes.length / 4;
int[] bits = new int[bitSize];
DataInputStream dis = new DataInputStream(new ByteArrayInputStream(mBytes));
for (int i = 0; i < bitSize; i++) {
bits[i] = dis.readInt();
}
return bits;
}
/**
* This method might be better described as
* "byte array to int array" or "data input to int array"
*/
public static int[] getBits(DataInput dataIn, int byteLength) throws IOException {
int bitSize = byteLength / 4;
int[] bits = new int[bitSize];
for (int i = 0; i < bitSize; i++) {
bits[i] = dataIn.readInt();
}
return bits;
}
}
stream-lib-2.9.8/src/main/java/com/clearspring/analytics/util/DoublyLinkedList.java 0000664 0000000 0000000 00000010066 13531322035 0030344 0 ustar 00root root 0000000 0000000 /*
* Copyright (C) 2011 Clearspring Technologies, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.clearspring.analytics.util;
import java.util.ConcurrentModificationException;
import java.util.Iterator;
public class DoublyLinkedList implements Iterable {
protected int size;
protected ListNode2 tail;
protected ListNode2 head;
/**
* Append to head of list
*/
public ListNode2 add(T value) {
ListNode2 node = new ListNode2(value);
if (size++ == 0) {
tail = node;
} else {
node.prev = head;
head.next = node;
}
head = node;
return node;
}
/**
* Prepend to tail of list
*/
public ListNode2 enqueue(T value) {
ListNode2 node = new ListNode2(value);
if (size++ == 0) {
head = node;
} else {
node.next = tail;
tail.prev = node;
}
tail = node;
return node;
}
public void add(ListNode2 node) {
node.prev = head;
node.next = null;
if (size++ == 0) {
tail = node;
} else {
head.next = node;
}
head = node;
}
public ListNode2 addAfter(ListNode2 node, T value) {
ListNode2 newNode = new ListNode2(value);
addAfter(node, newNode);
return newNode;
}
public void addAfter(ListNode2 node, ListNode2 newNode) {
newNode.next = node.next;
newNode.prev = node;
node.next = newNode;
if (newNode.next == null) {
head = newNode;
} else {
newNode.next.prev = newNode;
}
size++;
}
public void remove(ListNode2 node) {
if (node == tail) {
tail = node.next;
} else {
node.prev.next = node.next;
}
if (node == head) {
head = node.prev;
} else {
node.next.prev = node.prev;
}
size--;
}
public int size() {
return size;
}
@Override
public Iterator iterator() {
return new DoublyLinkedListIterator(this);
}
protected class DoublyLinkedListIterator implements Iterator {
protected DoublyLinkedList list;
protected ListNode2 itr;
protected int length;
public DoublyLinkedListIterator(DoublyLinkedList list) {
this.length = list.size;
this.list = list;
this.itr = list.tail;
}
@Override
public boolean hasNext() {
return itr != null;
}
@Override
public T next() {
if (length != list.size) {
throw new ConcurrentModificationException();
}
T next = itr.value;
itr = itr.next;
return next;
}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
}
public T first() {
return tail == null ? null : tail.getValue();
}
public T last() {
return head == null ? null : head.getValue();
}
public ListNode2 head() {
return head;
}
public ListNode2 tail() {
return tail;
}
public boolean isEmpty() {
return size == 0;
}
@SuppressWarnings("unchecked")
public T[] toArray() {
T[] a = (T[]) new Object[size];
int i = 0;
for (T v : this) {
a[i++] = v;
}
return a;
}
}
stream-lib-2.9.8/src/main/java/com/clearspring/analytics/util/ExternalizableUtil.java 0000664 0000000 0000000 00000000770 13531322035 0030733 0 ustar 00root root 0000000 0000000 package com.clearspring.analytics.util;
import java.io.ByteArrayOutputStream;
import java.io.Externalizable;
import java.io.IOException;
import java.io.ObjectOutputStream;
public class ExternalizableUtil {
public static byte[] toBytes(Externalizable o) throws IOException {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
ObjectOutputStream out = new ObjectOutputStream(baos);
o.writeExternal(out);
out.flush();
return baos.toByteArray();
}
}
stream-lib-2.9.8/src/main/java/com/clearspring/analytics/util/IBuilder.java 0000664 0000000 0000000 00000001326 13531322035 0026621 0 ustar 00root root 0000000 0000000 /*
* Copyright (C) 2011 Clearspring Technologies, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.clearspring.analytics.util;
public interface IBuilder {
T build();
int sizeof();
}
stream-lib-2.9.8/src/main/java/com/clearspring/analytics/util/ListNode2.java 0000664 0000000 0000000 00000002112 13531322035 0026717 0 ustar 00root root 0000000 0000000 /*
* Copyright (C) 2011 Clearspring Technologies, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.clearspring.analytics.util;
public class ListNode2 {
protected T value;
protected ListNode2 prev;
protected ListNode2 next;
public ListNode2(T value) {
this.value = value;
}
public ListNode2 getPrev() {
return prev;
}
public ListNode2 getNext() {
return next;
}
public T getValue() {
return value;
}
public void setValue(T value) {
this.value = value;
}
}
stream-lib-2.9.8/src/main/java/com/clearspring/analytics/util/Lists.java 0000664 0000000 0000000 00000002477 13531322035 0026230 0 ustar 00root root 0000000 0000000 /*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.clearspring.analytics.util;
import java.util.ArrayList;
import java.util.List;
/**
* Toy version of the guava class. Only implemented here to avoid adding
* a dependency. It would be better to just depend on guava.
*/
public class Lists {
public static List newArrayList(Iterable source) {
List r = new ArrayList();
for (T x : source) {
r.add(x);
}
return r;
}
public static List newArrayList() {
return new ArrayList();
}
}
stream-lib-2.9.8/src/main/java/com/clearspring/analytics/util/ObyCount.java 0000664 0000000 0000000 00000006046 13531322035 0026670 0 ustar 00root root 0000000 0000000 /*
* Copyright (C) 2011 Clearspring Technologies, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.clearspring.analytics.util;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import com.clearspring.analytics.stream.cardinality.HyperLogLogPlus;
/**
* Simple cardinality estimation command line utility
*
* Usage:
* > obycount [update-rate]
*
* update-rate: output results after every update-rate elements/lines
*
* Example:
* > cat elements.txt | obycount
*/
public class ObyCount {
public static void usage() {
System.err.println
(
"obycount [update-rate]\n" +
"\n" +
"update-rate: output results after every update-rate elements/lines" +
"\n" +
"Example:" +
"> cat elements.txt | obycount" +
"\n"
);
System.exit(-1);
}
public static void main(String[] args) throws IOException {
long updateRate = -1;
long count = 0;
if (args.length > 0) {
try {
updateRate = Long.parseLong(args[0]);
} catch (NumberFormatException e) {
System.err.print("Bad update rate: '" + args[0] + "' Update rate must be an integer.");
usage();
}
}
BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
HyperLogLogPlus card = new HyperLogLogPlus(14, 25);
String line = null;
while ((line = in.readLine()) != null) {
card.offer(line);
count++;
if (updateRate > 0 && count % updateRate == 0) {
System.out.println(formatSummary(count, card.cardinality()));
}
}
System.out.println(formatSummary(count, card.cardinality()));
}
protected static String formatSummary(long count, long cardinality) {
String cntStr = Long.toString(count);
int len = cntStr.length();
int l1 = Math.max(len, 10);
int l2 = Math.max(len, 20);
String fmt = "%" + l1 + "s %" + l2 + "s";
StringBuilder sb = new StringBuilder();
sb.append(String.format(fmt, "Item Count", "Cardinality Estimate")).append('\n');
sb.append(String.format(fmt, TopK.string('-', l1), TopK.string('-', l2))).append('\n');
sb.append(String.format(fmt, count, cardinality)).append('\n');
return sb.toString();
}
}
stream-lib-2.9.8/src/main/java/com/clearspring/analytics/util/Pair.java 0000664 0000000 0000000 00000003451 13531322035 0026016 0 ustar 00root root 0000000 0000000 /**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.clearspring.analytics.util;
public class Pair {
public final T1 left;
public final T2 right;
public Pair(T1 left, T2 right) {
this.left = left;
this.right = right;
}
@Override
public final int hashCode() {
int hashCode = 31 + (left == null ? 0 : left.hashCode());
return 31 * hashCode + (right == null ? 0 : right.hashCode());
}
@Override
public final boolean equals(Object o) {
if (!(o instanceof Pair)) {
return false;
}
Pair that = (Pair) o;
// handles nulls properly
return equal(left, that.left) && equal(right, that.right);
}
// From Apache Licensed guava:
private boolean equal(Object a, Object b) {
return a == b || (a != null && a.equals(b));
}
@Override
public String toString() {
return "(" + left + "," + right + ")";
}
public static Pair create(X x, Y y) {
return new Pair(x, y);
}
}
stream-lib-2.9.8/src/main/java/com/clearspring/analytics/util/Preconditions.java 0000664 0000000 0000000 00000003462 13531322035 0027745 0 ustar 00root root 0000000 0000000 /*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.clearspring.analytics.util;
/**
* Toy version of the guava class. Only implemented here to avoid the
* extra depenency.
*/
public class Preconditions {
public static void checkState(boolean condition, String msg) {
if (!condition) {
throw new IllegalStateException(msg);
}
}
public static void checkArgument(boolean condition) {
if (!condition) {
throw new IllegalArgumentException();
}
}
public static void checkState(boolean condition) {
if (!condition) {
throw new IllegalStateException();
}
}
public static void checkArgument(boolean condition, String format, Object... args) {
if (!condition) {
throw new IllegalArgumentException(String.format(format, args));
}
}
public static void checkState(boolean condition, String format, Object... args) {
if (!condition) {
throw new IllegalStateException(String.format(format, args));
}
}
}
stream-lib-2.9.8/src/main/java/com/clearspring/analytics/util/TopK.java 0000664 0000000 0000000 00000011235 13531322035 0025777 0 ustar 00root root 0000000 0000000 /*
* Copyright (C) 2011 Clearspring Technologies, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.clearspring.analytics.util;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.List;
import com.clearspring.analytics.stream.Counter;
import com.clearspring.analytics.stream.StreamSummary;
/**
* Simple TopK command line utility
*
* Usage:
* > topk [capacity] [update-rate]
*
* capacity : size of top / k (defaults to 1000)
* update-rate: output results after every update-rate elements/lines
*
* Example:
* > cat elements.txt | topk 10
*/
public class TopK {
public static void usage() {
System.err.println
(
"topk [capacity] [update-rate]\n" +
"\n" +
"capacity : size of top / k (defaults to 1000)" +
"update-rate: output results after every update-rate elements/lines" +
"\n" +
"Example:" +
"> cat elements.txt | topk 10" +
"\n"
);
System.exit(-1);
}
public static void main(String[] args) throws IOException {
long updateRate = -1;
long count = 0;
int capacity = 1000;
if (args.length > 0) {
try {
capacity = Integer.parseInt(args[0]);
} catch (NumberFormatException e) {
System.err.print("Bad capacity: '" + args[0] + "' Capacity must be an integer.");
usage();
}
}
if (args.length > 1) {
try {
updateRate = Long.parseLong(args[1]);
} catch (NumberFormatException e) {
System.err.print("Bade update rate: '" + args[1] + "' Update rate must be an integer.");
usage();
}
}
StreamSummary topk = new StreamSummary(capacity);
BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
String line = null;
while ((line = in.readLine()) != null) {
topk.offer(line);
count++;
if (updateRate > 0 && count % updateRate == 0) {
System.out.println(formatSummary(topk));
System.out.println("Item count: " + count);
System.out.println();
}
}
System.out.println(formatSummary(topk));
System.out.println("Item count: " + count);
}
public static String formatSummary(StreamSummary topk) {
StringBuilder sb = new StringBuilder();
List> counters = topk.topK(topk.getCapacity());
String itemHeader = "item";
String countHeader = "count";
String errorHeader = "error";
int maxItemLen = itemHeader.length();
int maxCountLen = countHeader.length();
int maxErrorLen = errorHeader.length();
for (Counter counter : counters) {
maxItemLen = Math.max(counter.getItem().length(), maxItemLen);
maxCountLen = Math.max(Long.toString(counter.getCount()).length(), maxCountLen);
maxErrorLen = Math.max(Long.toString(counter.getError()).length(), maxErrorLen);
}
sb.append(String.format("%" + maxItemLen + "s %" + maxCountLen + "s %" + maxErrorLen + "s", itemHeader, countHeader, errorHeader));
sb.append('\n');
sb.append(String.format("%" + maxItemLen + "s %" + maxCountLen + "s %" + maxErrorLen + "s", string('-', maxItemLen), string('-', maxCountLen), string('-', maxErrorLen)));
sb.append('\n');
for (Counter counter : counters) {
sb.append(String.format("%" + maxItemLen + "s %" + maxCountLen + "d %" + maxErrorLen + "d", counter.getItem(), counter.getCount(), counter.getError()));
sb.append('\n');
}
return sb.toString();
}
public static String string(char c, int len) {
StringBuilder sb = new StringBuilder(len);
for (int i = 0; i < len; i++) {
sb.append(c);
}
return sb.toString();
}
}
stream-lib-2.9.8/src/main/java/com/clearspring/analytics/util/UnsignedIntComparator.java 0000664 0000000 0000000 00000000516 13531322035 0031401 0 ustar 00root root 0000000 0000000 package com.clearspring.analytics.util;
import java.util.Comparator;
public class UnsignedIntComparator implements Comparator {
@Override
public int compare(byte[] left, byte[] right) {
int l = Varint.readUnsignedVarInt(left);
int r = Varint.readUnsignedVarInt(right);
return l - r;
}
} stream-lib-2.9.8/src/main/java/com/clearspring/analytics/util/Varint.java 0000664 0000000 0000000 00000021320 13531322035 0026361 0 ustar 00root root 0000000 0000000 /**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.clearspring.analytics.util;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
/**
* Encodes signed and unsigned values using a common variable-length
* scheme, found for example in
*
* Google's Protocol Buffers . It uses fewer bytes to encode smaller values,
* but will use slightly more bytes to encode large values.
*
* Signed values are further encoded using so-called zig-zag encoding
* in order to make them "compatible" with variable-length encoding.
*/
public final class Varint {
private Varint() {
}
/**
* Encodes a value using the variable-length encoding from
*
* Google Protocol Buffers . It uses zig-zag encoding to efficiently
* encode signed values. If values are known to be nonnegative,
* {@link #writeUnsignedVarLong(long, DataOutput)} should be used.
*
* @param value value to encode
* @param out to write bytes to
* @throws IOException if {@link DataOutput} throws {@link IOException}
*/
public static void writeSignedVarLong(long value, DataOutput out) throws IOException {
// Great trick from http://code.google.com/apis/protocolbuffers/docs/encoding.html#types
writeUnsignedVarLong((value << 1) ^ (value >> 63), out);
}
/**
* Encodes a value using the variable-length encoding from
*
* Google Protocol Buffers . Zig-zag is not used, so input must not be negative.
* If values can be negative, use {@link #writeSignedVarLong(long, DataOutput)}
* instead. This method treats negative input as like a large unsigned value.
*
* @param value value to encode
* @param out to write bytes to
* @throws IOException if {@link DataOutput} throws {@link IOException}
*/
public static void writeUnsignedVarLong(long value, DataOutput out) throws IOException {
while ((value & 0xFFFFFFFFFFFFFF80L) != 0L) {
out.writeByte(((int) value & 0x7F) | 0x80);
value >>>= 7;
}
out.writeByte((int) value & 0x7F);
}
/**
* @see #writeSignedVarLong(long, DataOutput)
*/
public static void writeSignedVarInt(int value, DataOutput out) throws IOException {
// Great trick from http://code.google.com/apis/protocolbuffers/docs/encoding.html#types
writeUnsignedVarInt((value << 1) ^ (value >> 31), out);
}
/**
* @see #writeUnsignedVarLong(long, DataOutput)
*/
public static void writeUnsignedVarInt(int value, DataOutput out) throws IOException {
while ((value & 0xFFFFFF80) != 0L) {
out.writeByte((value & 0x7F) | 0x80);
value >>>= 7;
}
out.writeByte(value & 0x7F);
}
public static byte[] writeSignedVarInt(int value) {
// Great trick from http://code.google.com/apis/protocolbuffers/docs/encoding.html#types
return writeUnsignedVarInt((value << 1) ^ (value >> 31));
}
/**
* @see #writeUnsignedVarLong(long, DataOutput)
*
* This one does not use streams and is much faster.
* Makes a single object each time, and that object is a primitive array.
*/
public static byte[] writeUnsignedVarInt(int value) {
byte[] byteArrayList = new byte[10];
int i = 0;
while ((value & 0xFFFFFF80) != 0L) {
byteArrayList[i++] = ((byte) ((value & 0x7F) | 0x80));
value >>>= 7;
}
byteArrayList[i] = ((byte) (value & 0x7F));
byte[] out = new byte[i + 1];
for (; i >= 0; i--) {
out[i] = byteArrayList[i];
}
return out;
}
/**
* @param in to read bytes from
* @return decode value
* @throws IOException if {@link DataInput} throws {@link IOException}
* @throws IllegalArgumentException if variable-length value does not terminate
* after 9 bytes have been read
* @see #writeSignedVarLong(long, DataOutput)
*/
public static long readSignedVarLong(DataInput in) throws IOException {
long raw = readUnsignedVarLong(in);
// This undoes the trick in writeSignedVarLong()
long temp = (((raw << 63) >> 63) ^ raw) >> 1;
// This extra step lets us deal with the largest signed values by treating
// negative results from read unsigned methods as like unsigned values
// Must re-flip the top bit if the original read value had it set.
return temp ^ (raw & (1L << 63));
}
/**
* @param in to read bytes from
* @return decode value
* @throws IOException if {@link DataInput} throws {@link IOException}
* @throws IllegalArgumentException if variable-length value does not terminate
* after 9 bytes have been read
* @see #writeUnsignedVarLong(long, DataOutput)
*/
public static long readUnsignedVarLong(DataInput in) throws IOException {
long value = 0L;
int i = 0;
long b;
while (((b = in.readByte()) & 0x80L) != 0) {
value |= (b & 0x7F) << i;
i += 7;
if (i > 63) {
throw new IllegalArgumentException("Variable length quantity is too long");
}
}
return value | (b << i);
}
/**
* @throws IllegalArgumentException if variable-length value does not terminate
* after 5 bytes have been read
* @throws IOException if {@link DataInput} throws {@link IOException}
* @see #readSignedVarLong(DataInput)
*/
public static int readSignedVarInt(DataInput in) throws IOException {
int raw = readUnsignedVarInt(in);
// This undoes the trick in writeSignedVarInt()
int temp = (((raw << 31) >> 31) ^ raw) >> 1;
// This extra step lets us deal with the largest signed values by treating
// negative results from read unsigned methods as like unsigned values.
// Must re-flip the top bit if the original read value had it set.
return temp ^ (raw & (1 << 31));
}
/**
* @throws IllegalArgumentException if variable-length value does not terminate
* after 5 bytes have been read
* @throws IOException if {@link DataInput} throws {@link IOException}
* @see #readUnsignedVarLong(DataInput)
*/
public static int readUnsignedVarInt(DataInput in) throws IOException {
int value = 0;
int i = 0;
int b;
while (((b = in.readByte()) & 0x80) != 0) {
value |= (b & 0x7F) << i;
i += 7;
if (i > 35) {
throw new IllegalArgumentException("Variable length quantity is too long");
}
}
return value | (b << i);
}
public static int readSignedVarInt(byte[] bytes) {
int raw = readUnsignedVarInt(bytes);
// This undoes the trick in writeSignedVarInt()
int temp = (((raw << 31) >> 31) ^ raw) >> 1;
// This extra step lets us deal with the largest signed values by treating
// negative results from read unsigned methods as like unsigned values.
// Must re-flip the top bit if the original read value had it set.
return temp ^ (raw & (1 << 31));
}
public static int readUnsignedVarInt(byte[] bytes) {
int value = 0;
int i = 0;
byte rb = Byte.MIN_VALUE;
for (byte b : bytes) {
rb = b;
if ((b & 0x80) == 0) {
break;
}
value |= (b & 0x7f) << i;
i += 7;
if (i > 35) {
throw new IllegalArgumentException("Variable length quantity is too long");
}
}
return value | (rb << i);
}
} stream-lib-2.9.8/src/main/java/com/clearspring/experimental/ 0000775 0000000 0000000 00000000000 13531322035 0024006 5 ustar 00root root 0000000 0000000 stream-lib-2.9.8/src/main/java/com/clearspring/experimental/stream/ 0000775 0000000 0000000 00000000000 13531322035 0025301 5 ustar 00root root 0000000 0000000 stream-lib-2.9.8/src/main/java/com/clearspring/experimental/stream/cardinality/ 0000775 0000000 0000000 00000000000 13531322035 0027604 5 ustar 00root root 0000000 0000000 stream-lib-2.9.8/src/main/java/com/clearspring/experimental/stream/cardinality/HyperBitBit.java 0000664 0000000 0000000 00000007630 13531322035 0032642 0 ustar 00root root 0000000 0000000 /*
* Copyright (C) 2011 Clearspring Technologies, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.clearspring.experimental.stream.cardinality;
import com.clearspring.analytics.hash.MurmurHash;
import com.clearspring.analytics.stream.cardinality.ICardinality;
import com.clearspring.analytics.stream.cardinality.CardinalityMergeException;
import java.io.IOException;
/**
* Java implementation of HyperBitBit (HBB) algorithm as seen on the presentation
* by Robert Sedgewick:
*
* https://www.cs.princeton.edu/~rs/talks/AC11-Cardinality.pdf
*
* HBB aims to beat HyperLogLog.
* From the talk, on practical data:
* - HyperBitBit, for N < 2^64,
* - Uses 128 + 6 bits. (in this implementation case 128 + 8)
* - Estimates cardinality within 10% of the actual.
*
* The algorithm still need some improvements.
* - If you insert twice the same element the structure can change (not as in HLL)
* - For small cardinalities it does not work AT ALL.
* - The constatn 5.4 used in the cardinality estimation formula should be refined
* with real world applications feedback
*
* Even so, HyperBitBit has the necessary characteristics to become
* a better algorithm than HyperLogLog:
* - Makes one pass through the stream.
* - Uses a few dozen machine instructions per value
* - Uses a few hundred bits
* - Achieves 10% relative accuracy or better
*
* Any feedback to improve the algorithm in its weak points will be welcome.
*
*/
public class HyperBitBit implements ICardinality {
int lgN;
long sketch;
long sketch2;
/**
* Create a new HyperBitBit instance.
*
* Remember that it does not work well for small cardinalities!
*/
public HyperBitBit() {
lgN = 5;
sketch = 0;
sketch2 = 0;
}
@Override
public boolean offer(Object o) {
final long x = MurmurHash.hash64(o);
return offerHashed(x);
}
@Override
public boolean offerHashed(long hashedLong) {
long k = (hashedLong << 58) >> 58;
// Calculate the position of the leftmost 1-bit.
int r = Long.numberOfLeadingZeros(hashedLong >> 6) - 6;
boolean modified = false;
if (r > lgN) {
modified = true;
sketch = sketch | 1L << k;
}
if (r > lgN+1) {
modified = true;
sketch2 = sketch2 | 1L << k;
}
if (Long.bitCount(sketch) > 31) {
modified = true;
sketch = sketch2;
sketch2 = 0;
++lgN;
}
return modified;
}
@Override
public boolean offerHashed(int hashedInt) {
throw new UnsupportedOperationException();
}
@Override
public long cardinality() {
double exponent = lgN + 5.4 + Long.bitCount(sketch)/32.0;
return (long) Math.pow(2, exponent);
}
@Override
public int sizeof() {
return 0;
}
@Override
public byte[] getBytes() throws IOException {
return new byte[0];
}
@Override
public ICardinality merge(ICardinality... estimators) throws CardinalityMergeException {
throw new HyperBitBitMergeException("Cannot merge estimators of HyperBitBit class");
}
@SuppressWarnings("serial")
static class HyperBitBitMergeException extends CardinalityMergeException {
public HyperBitBitMergeException(String message) {
super(message);
}
}
}
stream-lib-2.9.8/src/test/ 0000775 0000000 0000000 00000000000 13531322035 0015334 5 ustar 00root root 0000000 0000000 stream-lib-2.9.8/src/test/java/ 0000775 0000000 0000000 00000000000 13531322035 0016255 5 ustar 00root root 0000000 0000000 stream-lib-2.9.8/src/test/java/com/ 0000775 0000000 0000000 00000000000 13531322035 0017033 5 ustar 00root root 0000000 0000000 stream-lib-2.9.8/src/test/java/com/clearspring/ 0000775 0000000 0000000 00000000000 13531322035 0021344 5 ustar 00root root 0000000 0000000 stream-lib-2.9.8/src/test/java/com/clearspring/analytics/ 0000775 0000000 0000000 00000000000 13531322035 0023333 5 ustar 00root root 0000000 0000000 stream-lib-2.9.8/src/test/java/com/clearspring/analytics/TestUtils.java 0000664 0000000 0000000 00000001752 13531322035 0026143 0 ustar 00root root 0000000 0000000 package com.clearspring.analytics;
import java.io.*;
public class TestUtils {
public static byte[] serialize(Serializable obj) throws IOException {
ByteArrayOutputStream baos = new ByteArrayOutputStream(512);
ObjectOutputStream out = null;
try {
// stream closed in the finally
out = new ObjectOutputStream(baos);
out.writeObject(obj);
} finally {
if (out != null) {
out.close();
}
}
return baos.toByteArray();
}
public static Object deserialize(byte[] bytes) throws ClassNotFoundException, IOException {
ByteArrayInputStream bais = new ByteArrayInputStream(bytes);
ObjectInputStream in = null;
try {
// stream closed in the finally
in = new ObjectInputStream(bais);
return in.readObject();
} finally {
if (in != null) {
in.close();
}
}
}
}
stream-lib-2.9.8/src/test/java/com/clearspring/analytics/hash/ 0000775 0000000 0000000 00000000000 13531322035 0024256 5 ustar 00root root 0000000 0000000 stream-lib-2.9.8/src/test/java/com/clearspring/analytics/hash/TestLookup3Hash.java 0000664 0000000 0000000 00000007516 13531322035 0030132 0 ustar 00root root 0000000 0000000 package com.clearspring.analytics.hash;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.util.Random;
import org.junit.Test;
import static org.junit.Assert.assertEquals;
/**
* Tests for lookup3ycs hash functions
*
* @author yonik
*/
public class TestLookup3Hash {
// Test that the java version produces the same output as the C version
@Test
public void testEqualsLOOKUP3() {
int[] hashes = new int[]{0xc4c20dd5, 0x3ab04cc3, 0xebe874a3, 0x0e770ef3, 0xec321498, 0x73845e86, 0x8a2db728, 0x03c313bb, 0xfe5b9199, 0x95965125, 0xcbc4e7c2};
/*** the hash values were generated by adding the following to lookup3.c
*
* char* s = "hello world";
* int len = strlen(s);
* uint32_t a[len];
* for (int i=0; i