pax_global_header 0000666 0000000 0000000 00000000064 13542657257 0014532 g ustar 00root root 0000000 0000000 52 comment=4676818d7478f72f5041418f5afbb15a5080dbb7 roaring-0.4.21/ 0000775 0000000 0000000 00000000000 13542657257 0013257 5 ustar 00root root 0000000 0000000 roaring-0.4.21/.drone.yml 0000664 0000000 0000000 00000000562 13542657257 0015172 0 ustar 00root root 0000000 0000000 kind: pipeline name: default workspace: base: /go path: src/github.com/RoaringBitmap/roaring steps: - name: test image: golang commands: - go get -t - go test - go test -race -run TestConcurrent* - go build -tags appengine - go test -tags appengine - GOARCH=386 go build - GOARCH=386 go test - GOARCH=arm go build - GOARCH=arm64 go build roaring-0.4.21/.gitignore 0000664 0000000 0000000 00000000131 13542657257 0015242 0 ustar 00root root 0000000 0000000 *~ roaring-fuzz.zip workdir coverage.out testdata/all3.classic testdata/all3.msgp.snappy roaring-0.4.21/.gitmodules 0000664 0000000 0000000 00000000000 13542657257 0015422 0 ustar 00root root 0000000 0000000 roaring-0.4.21/.travis.yml 0000664 0000000 0000000 00000001345 13542657257 0015373 0 ustar 00root root 0000000 0000000 language: go sudo: false install: - go get -t github.com/RoaringBitmap/roaring - go get -t golang.org/x/tools/cmd/cover - go get -t github.com/mattn/goveralls - go get -t github.com/mschoch/smat notifications: email: false go: - "1.7.x" - "1.8.x" - "1.9.x" - "1.10.x" - "1.11.x" - "1.12.x" - tip # whitelist branches: only: - master script: - goveralls -v -service travis-ci -ignore arraycontainer_gen.go,bitmapcontainer_gen.go,rle16_gen.go,rle_gen.go,roaringarray_gen.go,rle.go || go test - go test -race -run TestConcurrent* - go build -tags appengine - go test -tags appengine - GOARCH=arm64 go build - GOARCH=386 go build - GOARCH=386 go test - GOARCH=arm go build - GOARCH=arm64 go build matrix: allow_failures: - go: tip roaring-0.4.21/AUTHORS 0000664 0000000 0000000 00000000434 13542657257 0014330 0 ustar 00root root 0000000 0000000 # This is the official list of roaring authors for copyright purposes. Todd Gruben (@tgruben), Daniel Lemire (@lemire), Elliot Murphy (@statik), Bob Potter (@bpot), Tyson Maly (@tvmaly), Will Glynn (@willglynn), Brent Pedersen (@brentp) Maciej Biłas (@maciej), Joe Nall (@joenall) roaring-0.4.21/CONTRIBUTORS 0000664 0000000 0000000 00000000606 13542657257 0015141 0 ustar 00root root 0000000 0000000 # This is the official list of roaring contributors Todd Gruben (@tgruben), Daniel Lemire (@lemire), Elliot Murphy (@statik), Bob Potter (@bpot), Tyson Maly (@tvmaly), Will Glynn (@willglynn), Brent Pedersen (@brentp), Jason E. Aten (@glycerine), Vali Malinoiu (@0x4139), Forud Ghafouri (@fzerorubigd), Joe Nall (@joenall), (@fredim), Edd Robinson (@e-dard), Alexander Petrov (@alldroll) roaring-0.4.21/LICENSE 0000664 0000000 0000000 00000031341 13542657257 0014266 0 ustar 00root root 0000000 0000000 Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright 2016 by the authors Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ================================================================================ Portions of runcontainer.go are from the Go standard library, which is licensed under: Copyright (c) 2009 The Go Authors. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of Google Inc. nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. roaring-0.4.21/LICENSE-2.0.txt 0000664 0000000 0000000 00000026121 13542657257 0015401 0 ustar 00root root 0000000 0000000 Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright 2016 by the authors Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. roaring-0.4.21/Makefile 0000664 0000000 0000000 00000006114 13542657257 0014721 0 ustar 00root root 0000000 0000000 .PHONY: help all test format fmtcheck vet lint qa deps clean nuke ser fetch-real-roaring-datasets # Display general help about this command help: @echo "" @echo "The following commands are available:" @echo "" @echo " make qa : Run all the tests" @echo " make test : Run the unit tests" @echo "" @echo " make format : Format the source code" @echo " make fmtcheck : Check if the source code has been formatted" @echo " make vet : Check for suspicious constructs" @echo " make lint : Check for style errors" @echo "" @echo " make deps : Get the dependencies" @echo " make clean : Remove any build artifact" @echo " make nuke : Deletes any intermediate file" @echo "" @echo " make fuzz-smat : Fuzzy testing with smat" @echo " make fuzz-stream : Fuzzy testing with stream deserialization" @echo " make fuzz-buffer : Fuzzy testing with buffer deserialization" @echo "" # Alias for help target all: help test: go test go test -race -run TestConcurrent* # Format the source code format: @find ./ -type f -name "*.go" -exec gofmt -w {} \; # Check if the source code has been formatted fmtcheck: @mkdir -p target @find ./ -type f -name "*.go" -exec gofmt -d {} \; | tee target/format.diff @test ! -s target/format.diff || { echo "ERROR: the source code has not been formatted - please use 'make format' or 'gofmt'"; exit 1; } # Check for syntax errors vet: GOPATH=$(GOPATH) go vet ./... # Check for style errors lint: GOPATH=$(GOPATH) PATH=$(GOPATH)/bin:$(PATH) golint ./... # Alias to run all quality-assurance checks qa: fmtcheck test vet lint # --- INSTALL --- # Get the dependencies deps: GOPATH=$(GOPATH) go get github.com/stretchr/testify GOPATH=$(GOPATH) go get github.com/willf/bitset GOPATH=$(GOPATH) go get github.com/golang/lint/golint GOPATH=$(GOPATH) go get github.com/mschoch/smat GOPATH=$(GOPATH) go get github.com/dvyukov/go-fuzz/go-fuzz GOPATH=$(GOPATH) go get github.com/dvyukov/go-fuzz/go-fuzz-build GOPATH=$(GOPATH) go get github.com/glycerine/go-unsnap-stream GOPATH=$(GOPATH) go get github.com/philhofer/fwd GOPATH=$(GOPATH) go get github.com/jtolds/gls fuzz-smat: go test -tags=gofuzz -run=TestGenerateSmatCorpus go-fuzz-build -func FuzzSmat github.com/RoaringBitmap/roaring go-fuzz -bin=./roaring-fuzz.zip -workdir=workdir/ -timeout=200 fuzz-stream: go-fuzz-build -func FuzzSerializationStream github.com/RoaringBitmap/roaring go-fuzz -bin=./roaring-fuzz.zip -workdir=workdir/ -timeout=200 fuzz-buffer: go-fuzz-build -func FuzzSerializationBuffer github.com/RoaringBitmap/roaring go-fuzz -bin=./roaring-fuzz.zip -workdir=workdir/ -timeout=200 # Remove any build artifact clean: GOPATH=$(GOPATH) go clean ./... # Deletes any intermediate file nuke: rm -rf ./target GOPATH=$(GOPATH) go clean -i ./... ser: go generate cover: go test -coverprofile=coverage.out go tool cover -html=coverage.out fetch-real-roaring-datasets: # pull github.com/RoaringBitmap/real-roaring-datasets -> testdata/real-roaring-datasets git submodule init git submodule update roaring-0.4.21/README.md 0000664 0000000 0000000 00000020576 13542657257 0014550 0 ustar 00root root 0000000 0000000 roaring [](https://travis-ci.org/RoaringBitmap/roaring) [](https://coveralls.io/github/RoaringBitmap/roaring?branch=master) [](https://godoc.org/github.com/RoaringBitmap/roaring) [](https://goreportcard.com/report/github.com/RoaringBitmap/roaring) [](https://cloud.drone.io/RoaringBitmap/roaring) ============= This is a go version of the Roaring bitmap data structure. Roaring bitmaps are used by several major systems such as [Apache Lucene][lucene] and derivative systems such as [Solr][solr] and [Elasticsearch][elasticsearch], [Apache Druid (Incubating)][druid], [LinkedIn Pinot][pinot], [Netflix Atlas][atlas], [Apache Spark][spark], [OpenSearchServer][opensearchserver], [Cloud Torrent][cloudtorrent], [Whoosh][whoosh], [Pilosa][pilosa], [Microsoft Visual Studio Team Services (VSTS)][vsts], and eBay's [Apache Kylin][kylin]. [lucene]: https://lucene.apache.org/ [solr]: https://lucene.apache.org/solr/ [elasticsearch]: https://www.elastic.co/products/elasticsearch [druid]: https://druid.apache.org/ [spark]: https://spark.apache.org/ [opensearchserver]: http://www.opensearchserver.com [cloudtorrent]: https://github.com/jpillora/cloud-torrent [whoosh]: https://bitbucket.org/mchaput/whoosh/wiki/Home [pilosa]: https://www.pilosa.com/ [kylin]: http://kylin.apache.org/ [pinot]: http://github.com/linkedin/pinot/wiki [vsts]: https://www.visualstudio.com/team-services/ [atlas]: https://github.com/Netflix/atlas Roaring bitmaps are found to work well in many important applications: > Use Roaring for bitmap compression whenever possible. Do not use other bitmap compression methods ([Wang et al., SIGMOD 2017](http://db.ucsd.edu/wp-content/uploads/2017/03/sidm338-wangA.pdf)) The ``roaring`` Go library is used by * [Cloud Torrent](https://github.com/jpillora/cloud-torrent): a self-hosted remote torrent client * [runv](https://github.com/hyperhq/runv): an Hypervisor-based runtime for the Open Containers Initiative * [InfluxDB](https://www.influxdata.com) * [Pilosa](https://www.pilosa.com/) * [Bleve](http://www.blevesearch.com) This library is used in production in several systems, it is part of the [Awesome Go collection](https://awesome-go.com). There are also [Java](https://github.com/RoaringBitmap/RoaringBitmap) and [C/C++](https://github.com/RoaringBitmap/CRoaring) versions. The Java, C, C++ and Go version are binary compatible: e.g, you can save bitmaps from a Java program and load them back in Go, and vice versa. We have a [format specification](https://github.com/RoaringBitmap/RoaringFormatSpec). This code is licensed under Apache License, Version 2.0 (ASL2.0). Copyright 2016-... by the authors. ### References - Daniel Lemire, Owen Kaser, Nathan Kurz, Luca Deri, Chris O'Hara, François Saint-Jacques, Gregory Ssi-Yan-Kai, Roaring Bitmaps: Implementation of an Optimized Software Library, Software: Practice and Experience 48 (4), 2018 [arXiv:1709.07821](https://arxiv.org/abs/1709.07821) - Samy Chambi, Daniel Lemire, Owen Kaser, Robert Godin, Better bitmap performance with Roaring bitmaps, Software: Practice and Experience 46 (5), 2016. http://arxiv.org/abs/1402.6407 This paper used data from http://lemire.me/data/realroaring2014.html - Daniel Lemire, Gregory Ssi-Yan-Kai, Owen Kaser, Consistently faster and smaller compressed bitmaps with Roaring, Software: Practice and Experience 46 (11), 2016. http://arxiv.org/abs/1603.06549 ### Dependencies Dependencies are fetched automatically by giving the `-t` flag to `go get`. they include - github.com/willf/bitset - github.com/mschoch/smat - github.com/glycerine/go-unsnap-stream - github.com/philhofer/fwd - github.com/jtolds/gls Note that the smat library requires Go 1.6 or better. #### Installation - go get -t github.com/RoaringBitmap/roaring ### Example Here is a simplified but complete example: ```go package main import ( "fmt" "github.com/RoaringBitmap/roaring" "bytes" ) func main() { // example inspired by https://github.com/fzandona/goroar fmt.Println("==roaring==") rb1 := roaring.BitmapOf(1, 2, 3, 4, 5, 100, 1000) fmt.Println(rb1.String()) rb2 := roaring.BitmapOf(3, 4, 1000) fmt.Println(rb2.String()) rb3 := roaring.New() fmt.Println(rb3.String()) fmt.Println("Cardinality: ", rb1.GetCardinality()) fmt.Println("Contains 3? ", rb1.Contains(3)) rb1.And(rb2) rb3.Add(1) rb3.Add(5) rb3.Or(rb1) // computes union of the three bitmaps in parallel using 4 workers roaring.ParOr(4, rb1, rb2, rb3) // computes intersection of the three bitmaps in parallel using 4 workers roaring.ParAnd(4, rb1, rb2, rb3) // prints 1, 3, 4, 5, 1000 i := rb3.Iterator() for i.HasNext() { fmt.Println(i.Next()) } fmt.Println() // next we include an example of serialization buf := new(bytes.Buffer) rb1.WriteTo(buf) // we omit error handling newrb:= roaring.New() newrb.ReadFrom(buf) if rb1.Equals(newrb) { fmt.Println("I wrote the content to a byte stream and read it back.") } // you can iterate over bitmaps using ReverseIterator(), Iterator, ManyIterator() } ``` If you wish to use serialization and handle errors, you might want to consider the following sample of code: ```go rb := BitmapOf(1, 2, 3, 4, 5, 100, 1000) buf := new(bytes.Buffer) size,err:=rb.WriteTo(buf) if err != nil { t.Errorf("Failed writing") } newrb:= New() size,err=newrb.ReadFrom(buf) if err != nil { t.Errorf("Failed reading") } if ! rb.Equals(newrb) { t.Errorf("Cannot retrieve serialized version") } ``` Given N integers in [0,x), then the serialized size in bytes of a Roaring bitmap should never exceed this bound: `` 8 + 9 * ((long)x+65535)/65536 + 2 * N `` That is, given a fixed overhead for the universe size (x), Roaring bitmaps never use more than 2 bytes per integer. You can call ``BoundSerializedSizeInBytes`` for a more precise estimate. ### Documentation Current documentation is available at http://godoc.org/github.com/RoaringBitmap/roaring ### Goroutine safety In general, it should not generally be considered safe to access the same bitmaps using different goroutines--they are left unsynchronized for performance. Should you want to access a Bitmap from more than one goroutine, you should provide synchronization. Typically this is done by using channels to pass the *Bitmap around (in Go style; so there is only ever one owner), or by using `sync.Mutex` to serialize operations on Bitmaps. ### Coverage We test our software. For a report on our test coverage, see https://coveralls.io/github/RoaringBitmap/roaring?branch=master ### Benchmark Type go test -bench Benchmark -run - To run benchmarks on [Real Roaring Datasets](https://github.com/RoaringBitmap/real-roaring-datasets) run the following: ```sh go get github.com/RoaringBitmap/real-roaring-datasets BENCH_REAL_DATA=1 go test -bench BenchmarkRealData -run - ``` ### Iterative use You can use roaring with gore: - go get -u github.com/motemen/gore - Make sure that ``$GOPATH/bin`` is in your ``$PATH``. - go get github.com/RoaringBitmap/roaring ```go $ gore gore version 0.2.6 :help for help gore> :import github.com/RoaringBitmap/roaring gore> x:=roaring.New() gore> x.Add(1) gore> x.String() "{1}" ``` ### Fuzzy testing You can help us test further the library with fuzzy testing: go get github.com/dvyukov/go-fuzz/go-fuzz go get github.com/dvyukov/go-fuzz/go-fuzz-build go test -tags=gofuzz -run=TestGenerateSmatCorpus go-fuzz-build github.com/RoaringBitmap/roaring go-fuzz -bin=./roaring-fuzz.zip -workdir=workdir/ -timeout=200 Let it run, and if the # of crashers is > 0, check out the reports in the workdir where you should be able to find the panic goroutine stack traces. ### Alternative in Go There is a Go version wrapping the C/C++ implementation https://github.com/RoaringBitmap/gocroaring For an alternative implementation in Go, see https://github.com/fzandona/goroar The two versions were written independently. ### Mailing list/discussion group https://groups.google.com/forum/#!forum/roaring-bitmaps roaring-0.4.21/aggregation_test.go 0000664 0000000 0000000 00000013510 13542657257 0017134 0 ustar 00root root 0000000 0000000 package roaring // to run just these tests: go test -run TestParAggregations import ( "fmt" "github.com/stretchr/testify/assert" "testing" ) func testAggregations(t *testing.T, and func(bitmaps ...*Bitmap) *Bitmap, or func(bitmaps ...*Bitmap) *Bitmap, xor func(bitmaps ...*Bitmap) *Bitmap) { t.Run("simple case", func(t *testing.T) { rb1 := NewBitmap() rb2 := NewBitmap() rb1.Add(1) rb2.Add(2) assertAggregation(t, 0, and, rb1, rb2) assertAggregation(t, 2, or, rb1, rb2) assertAggregation(t, 2, xor, rb1, rb2) }) t.Run("aggregate nothing", func(t *testing.T) { assertAggregation(t, 0, and) assertAggregation(t, 0, or) assertAggregation(t, 0, xor) }) t.Run("single bitmap", func(t *testing.T) { rb := BitmapOf(1, 2, 3) assertAggregation(t, 3, and, rb) assertAggregation(t, 3, or, rb) assertAggregation(t, 3, xor, rb) }) t.Run("empty and single elem bitmaps", func(t *testing.T) { rb1 := NewBitmap() rb2 := BitmapOf(1) assertAggregation(t, 0, and, rb1, rb2) assertAggregation(t, 1, or, rb1, rb2) assertAggregation(t, 1, xor, rb1, rb2) }) t.Run("two single elem disjoint sets", func(t *testing.T) { rb1 := BitmapOf(1) rb2 := BitmapOf(2) assertAggregation(t, 0, and, rb1, rb2) assertAggregation(t, 2, or, rb1, rb2) }) t.Run("3 bitmaps with CoW set (not in order of definition)", func(t *testing.T) { rb1 := NewBitmap() rb2 := NewBitmap() rb3 := NewBitmap() rb1.SetCopyOnWrite(true) rb2.SetCopyOnWrite(true) rb3.SetCopyOnWrite(true) rb1.Add(1) rb1.Add(100000) rb2.Add(200000) rb3.Add(1) rb3.Add(300000) assertAggregation(t, 0, and, rb2, rb1, rb3) assertAggregation(t, 4, or, rb2, rb1, rb3) assertAggregation(t, 3, xor, rb2, rb1, rb3) }) t.Run("3 bitmaps (not in order of definition)", func(t *testing.T) { rb1 := NewBitmap() rb2 := NewBitmap() rb3 := NewBitmap() rb1.Add(1) rb1.Add(100000) rb2.Add(200000) rb3.Add(1) rb3.Add(300000) assertAggregation(t, 0, and, rb2, rb1, rb3) assertAggregation(t, 4, or, rb2, rb1, rb3) assertAggregation(t, 3, xor, rb2, rb1, rb3) }) t.Run("3 bitmaps", func(t *testing.T) { rb1 := NewBitmap() rb2 := NewBitmap() rb3 := NewBitmap() rb1.Add(1) rb1.Add(100000) rb2.Add(200000) rb3.Add(1) rb3.Add(300000) assertAggregation(t, 0, and, rb1, rb2, rb3) assertAggregation(t, 4, or, rb1, rb2, rb3) assertAggregation(t, 3, xor, rb1, rb2, rb3) }) t.Run("3 bitmaps with CoW set", func(t *testing.T) { rb1 := NewBitmap() rb2 := NewBitmap() rb3 := NewBitmap() rb1.SetCopyOnWrite(true) rb2.SetCopyOnWrite(true) rb3.SetCopyOnWrite(true) rb1.Add(1) rb1.Add(100000) rb2.Add(200000) rb3.Add(1) rb3.Add(300000) assertAggregation(t, 0, and, rb1, rb2, rb3) assertAggregation(t, 4, or, rb1, rb2, rb3) assertAggregation(t, 3, xor, rb1, rb2, rb3) }) t.Run("advanced case", func(t *testing.T) { rb1 := NewBitmap() rb2 := NewBitmap() rb3 := NewBitmap() for i := uint32(0); i < 1000000; i += 3 { rb1.Add(i) } for i := uint32(0); i < 1000000; i += 7 { rb2.Add(i) } for i := uint32(0); i < 1000000; i += 1001 { rb3.Add(i) } for i := uint32(1000000); i < 2000000; i += 1001 { rb1.Add(i) } for i := uint32(1000000); i < 2000000; i += 3 { rb2.Add(i) } for i := uint32(1000000); i < 2000000; i += 7 { rb3.Add(i) } rb1.Or(rb2) rb1.Or(rb3) bigand := And(And(rb1, rb2), rb3) bigxor := Xor(Xor(rb1, rb2), rb3) if or != nil { assert.True(t, or(rb1, rb2, rb3).Equals(rb1)) } if and != nil { assert.True(t, and(rb1, rb2, rb3).Equals(bigand)) } if xor != nil { assert.True(t, xor(rb1, rb2, rb3).Equals(bigxor)) } }) t.Run("advanced case with runs", func(t *testing.T) { rb1 := NewBitmap() rb2 := NewBitmap() rb3 := NewBitmap() for i := uint32(500); i < 75000; i++ { rb1.Add(i) } for i := uint32(0); i < 1000000; i += 7 { rb2.Add(i) } for i := uint32(0); i < 1000000; i += 1001 { rb3.Add(i) } for i := uint32(1000000); i < 2000000; i += 1001 { rb1.Add(i) } for i := uint32(1000000); i < 2000000; i += 3 { rb2.Add(i) } for i := uint32(1000000); i < 2000000; i += 7 { rb3.Add(i) } rb1.RunOptimize() rb1.Or(rb2) rb1.Or(rb3) bigand := And(And(rb1, rb2), rb3) bigxor := Xor(Xor(rb1, rb2), rb3) if or != nil { assert.True(t, or(rb1, rb2, rb3).Equals(rb1)) } if and != nil { assert.True(t, and(rb1, rb2, rb3).Equals(bigand)) } if xor != nil { assert.True(t, xor(rb1, rb2, rb3).Equals(bigxor)) } }) t.Run("issue 178", func(t *testing.T) { ba1 := []uint32{3585028, 65901253, 143441994, 211160474, 286511937, 356744840, 434332509, 502812785, 576097614, 646557334, 714794241, 775083485, 833704249, 889329147, 941367043} ba2 := []uint32{17883, 54494426, 113908938, 174519827, 235465665, 296685741, 357644666, 420192495, 476104304, 523046142, 577855081, 634889665, 692460635, 751350463, 809989192, 863494316, 919127240} r1 := BitmapOf(ba1...) r2 := BitmapOf(ba2...) assertAggregation(t, 32, or, r1, r2) }) } func assertAggregation(t *testing.T, expected uint64, aggr func(bitmaps ...*Bitmap) *Bitmap, bitmaps ...*Bitmap) { if aggr != nil { assert.Equal(t, aggr(bitmaps...).GetCardinality(), expected) } } func TestParAggregations(t *testing.T) { for _, p := range [...]int{1, 2, 4} { andFunc := func(bitmaps ...*Bitmap) *Bitmap { return ParAnd(p, bitmaps...) } orFunc := func(bitmaps ...*Bitmap) *Bitmap { return ParOr(p, bitmaps...) } t.Run(fmt.Sprintf("par%d", p), func(t *testing.T) { testAggregations(t, andFunc, orFunc, nil) }) } } func TestParHeapAggregations(t *testing.T) { orFunc := func(bitmaps ...*Bitmap) *Bitmap { return ParHeapOr(0, bitmaps...) } testAggregations(t, nil, orFunc, nil) } func TestFastAggregations(t *testing.T) { testAggregations(t, FastAnd, FastOr, nil) } func TestHeapAggregations(t *testing.T) { testAggregations(t, nil, HeapOr, HeapXor) } roaring-0.4.21/arraycontainer.go 0000664 0000000 0000000 00000060406 13542657257 0016635 0 ustar 00root root 0000000 0000000 package roaring import ( "fmt" ) //go:generate msgp -unexported type arrayContainer struct { content []uint16 } func (ac *arrayContainer) String() string { s := "{" for it := ac.getShortIterator(); it.hasNext(); { s += fmt.Sprintf("%v, ", it.next()) } return s + "}" } func (ac *arrayContainer) fillLeastSignificant16bits(x []uint32, i int, mask uint32) { for k := 0; k < len(ac.content); k++ { x[k+i] = uint32(ac.content[k]) | mask } } func (ac *arrayContainer) getShortIterator() shortPeekable { return &shortIterator{ac.content, 0} } func (ac *arrayContainer) getReverseIterator() shortIterable { return &reverseIterator{ac.content, len(ac.content) - 1} } func (ac *arrayContainer) getManyIterator() manyIterable { return &shortIterator{ac.content, 0} } func (ac *arrayContainer) minimum() uint16 { return ac.content[0] // assume not empty } func (ac *arrayContainer) maximum() uint16 { return ac.content[len(ac.content)-1] // assume not empty } func (ac *arrayContainer) getSizeInBytes() int { return ac.getCardinality() * 2 } func (ac *arrayContainer) serializedSizeInBytes() int { return ac.getCardinality() * 2 } func arrayContainerSizeInBytes(card int) int { return card * 2 } // add the values in the range [firstOfRange,endx) func (ac *arrayContainer) iaddRange(firstOfRange, endx int) container { if firstOfRange >= endx { return ac } indexstart := binarySearch(ac.content, uint16(firstOfRange)) if indexstart < 0 { indexstart = -indexstart - 1 } indexend := binarySearch(ac.content, uint16(endx-1)) if indexend < 0 { indexend = -indexend - 1 } else { indexend++ } rangelength := endx - firstOfRange newcardinality := indexstart + (ac.getCardinality() - indexend) + rangelength if newcardinality > arrayDefaultMaxSize { a := ac.toBitmapContainer() return a.iaddRange(firstOfRange, endx) } if cap(ac.content) < newcardinality { tmp := make([]uint16, newcardinality, newcardinality) copy(tmp[:indexstart], ac.content[:indexstart]) copy(tmp[indexstart+rangelength:], ac.content[indexend:]) ac.content = tmp } else { ac.content = ac.content[:newcardinality] copy(ac.content[indexstart+rangelength:], ac.content[indexend:]) } for k := 0; k < rangelength; k++ { ac.content[k+indexstart] = uint16(firstOfRange + k) } return ac } // remove the values in the range [firstOfRange,endx) func (ac *arrayContainer) iremoveRange(firstOfRange, endx int) container { if firstOfRange >= endx { return ac } indexstart := binarySearch(ac.content, uint16(firstOfRange)) if indexstart < 0 { indexstart = -indexstart - 1 } indexend := binarySearch(ac.content, uint16(endx-1)) if indexend < 0 { indexend = -indexend - 1 } else { indexend++ } rangelength := indexend - indexstart answer := ac copy(answer.content[indexstart:], ac.content[indexstart+rangelength:]) answer.content = answer.content[:ac.getCardinality()-rangelength] return answer } // flip the values in the range [firstOfRange,endx) func (ac *arrayContainer) not(firstOfRange, endx int) container { if firstOfRange >= endx { return ac.clone() } return ac.notClose(firstOfRange, endx-1) // remove everything in [firstOfRange,endx-1] } // flip the values in the range [firstOfRange,lastOfRange] func (ac *arrayContainer) notClose(firstOfRange, lastOfRange int) container { if firstOfRange > lastOfRange { // unlike add and remove, not uses an inclusive range [firstOfRange,lastOfRange] return ac.clone() } // determine the span of array indices to be affected^M startIndex := binarySearch(ac.content, uint16(firstOfRange)) if startIndex < 0 { startIndex = -startIndex - 1 } lastIndex := binarySearch(ac.content, uint16(lastOfRange)) if lastIndex < 0 { lastIndex = -lastIndex - 2 } currentValuesInRange := lastIndex - startIndex + 1 spanToBeFlipped := lastOfRange - firstOfRange + 1 newValuesInRange := spanToBeFlipped - currentValuesInRange cardinalityChange := newValuesInRange - currentValuesInRange newCardinality := len(ac.content) + cardinalityChange if newCardinality > arrayDefaultMaxSize { return ac.toBitmapContainer().not(firstOfRange, lastOfRange+1) } answer := newArrayContainer() answer.content = make([]uint16, newCardinality, newCardinality) //a hack for sure copy(answer.content, ac.content[:startIndex]) outPos := startIndex inPos := startIndex valInRange := firstOfRange for ; valInRange <= lastOfRange && inPos <= lastIndex; valInRange++ { if uint16(valInRange) != ac.content[inPos] { answer.content[outPos] = uint16(valInRange) outPos++ } else { inPos++ } } for ; valInRange <= lastOfRange; valInRange++ { answer.content[outPos] = uint16(valInRange) outPos++ } for i := lastIndex + 1; i < len(ac.content); i++ { answer.content[outPos] = ac.content[i] outPos++ } answer.content = answer.content[:newCardinality] return answer } func (ac *arrayContainer) equals(o container) bool { srb, ok := o.(*arrayContainer) if ok { // Check if the containers are the same object. if ac == srb { return true } if len(srb.content) != len(ac.content) { return false } for i, v := range ac.content { if v != srb.content[i] { return false } } return true } // use generic comparison bCard := o.getCardinality() aCard := ac.getCardinality() if bCard != aCard { return false } ait := ac.getShortIterator() bit := o.getShortIterator() for ait.hasNext() { if bit.next() != ait.next() { return false } } return true } func (ac *arrayContainer) toBitmapContainer() *bitmapContainer { bc := newBitmapContainer() bc.loadData(ac) return bc } func (ac *arrayContainer) iadd(x uint16) (wasNew bool) { // Special case adding to the end of the container. l := len(ac.content) if l > 0 && l < arrayDefaultMaxSize && ac.content[l-1] < x { ac.content = append(ac.content, x) return true } loc := binarySearch(ac.content, x) if loc < 0 { s := ac.content i := -loc - 1 s = append(s, 0) copy(s[i+1:], s[i:]) s[i] = x ac.content = s return true } return false } func (ac *arrayContainer) iaddReturnMinimized(x uint16) container { // Special case adding to the end of the container. l := len(ac.content) if l > 0 && l < arrayDefaultMaxSize && ac.content[l-1] < x { ac.content = append(ac.content, x) return ac } loc := binarySearch(ac.content, x) if loc < 0 { if len(ac.content) >= arrayDefaultMaxSize { a := ac.toBitmapContainer() a.iadd(x) return a } s := ac.content i := -loc - 1 s = append(s, 0) copy(s[i+1:], s[i:]) s[i] = x ac.content = s } return ac } // iremoveReturnMinimized is allowed to change the return type to minimize storage. func (ac *arrayContainer) iremoveReturnMinimized(x uint16) container { ac.iremove(x) return ac } func (ac *arrayContainer) iremove(x uint16) bool { loc := binarySearch(ac.content, x) if loc >= 0 { s := ac.content s = append(s[:loc], s[loc+1:]...) ac.content = s return true } return false } func (ac *arrayContainer) remove(x uint16) container { out := &arrayContainer{make([]uint16, len(ac.content))} copy(out.content, ac.content[:]) loc := binarySearch(out.content, x) if loc >= 0 { s := out.content s = append(s[:loc], s[loc+1:]...) out.content = s } return out } func (ac *arrayContainer) or(a container) container { switch x := a.(type) { case *arrayContainer: return ac.orArray(x) case *bitmapContainer: return x.orArray(ac) case *runContainer16: if x.isFull() { return x.clone() } return x.orArray(ac) } panic("unsupported container type") } func (ac *arrayContainer) orCardinality(a container) int { switch x := a.(type) { case *arrayContainer: return ac.orArrayCardinality(x) case *bitmapContainer: return x.orArrayCardinality(ac) case *runContainer16: return x.orArrayCardinality(ac) } panic("unsupported container type") } func (ac *arrayContainer) ior(a container) container { switch x := a.(type) { case *arrayContainer: return ac.iorArray(x) case *bitmapContainer: return a.(*bitmapContainer).orArray(ac) //return ac.iorBitmap(x) // note: this does not make sense case *runContainer16: if x.isFull() { return x.clone() } return ac.iorRun16(x) } panic("unsupported container type") } func (ac *arrayContainer) iorArray(value2 *arrayContainer) container { value1 := ac len1 := value1.getCardinality() len2 := value2.getCardinality() maxPossibleCardinality := len1 + len2 if maxPossibleCardinality > arrayDefaultMaxSize { // it could be a bitmap! bc := newBitmapContainer() for k := 0; k < len(value2.content); k++ { v := value2.content[k] i := uint(v) >> 6 mask := uint64(1) << (v % 64) bc.bitmap[i] |= mask } for k := 0; k < len(ac.content); k++ { v := ac.content[k] i := uint(v) >> 6 mask := uint64(1) << (v % 64) bc.bitmap[i] |= mask } bc.cardinality = int(popcntSlice(bc.bitmap)) if bc.cardinality <= arrayDefaultMaxSize { return bc.toArrayContainer() } return bc } if maxPossibleCardinality > cap(value1.content) { newcontent := make([]uint16, 0, maxPossibleCardinality) copy(newcontent[len2:maxPossibleCardinality], ac.content[0:len1]) ac.content = newcontent } else { copy(ac.content[len2:maxPossibleCardinality], ac.content[0:len1]) } nl := union2by2(value1.content[len2:maxPossibleCardinality], value2.content, ac.content) ac.content = ac.content[:nl] // reslice to match actual used capacity return ac } // Note: such code does not make practical sense, except for lazy evaluations func (ac *arrayContainer) iorBitmap(bc2 *bitmapContainer) container { bc1 := ac.toBitmapContainer() bc1.iorBitmap(bc2) *ac = *newArrayContainerFromBitmap(bc1) return ac } func (ac *arrayContainer) iorRun16(rc *runContainer16) container { bc1 := ac.toBitmapContainer() bc2 := rc.toBitmapContainer() bc1.iorBitmap(bc2) *ac = *newArrayContainerFromBitmap(bc1) return ac } func (ac *arrayContainer) lazyIOR(a container) container { switch x := a.(type) { case *arrayContainer: return ac.lazyIorArray(x) case *bitmapContainer: return ac.lazyIorBitmap(x) case *runContainer16: if x.isFull() { return x.clone() } return ac.lazyIorRun16(x) } panic("unsupported container type") } func (ac *arrayContainer) lazyIorArray(ac2 *arrayContainer) container { // TODO actually make this lazy return ac.iorArray(ac2) } func (ac *arrayContainer) lazyIorBitmap(bc *bitmapContainer) container { // TODO actually make this lazy return ac.iorBitmap(bc) } func (ac *arrayContainer) lazyIorRun16(rc *runContainer16) container { // TODO actually make this lazy return ac.iorRun16(rc) } func (ac *arrayContainer) lazyOR(a container) container { switch x := a.(type) { case *arrayContainer: return ac.lazyorArray(x) case *bitmapContainer: return a.lazyOR(ac) case *runContainer16: if x.isFull() { return x.clone() } return x.orArray(ac) } panic("unsupported container type") } func (ac *arrayContainer) orArray(value2 *arrayContainer) container { value1 := ac maxPossibleCardinality := value1.getCardinality() + value2.getCardinality() if maxPossibleCardinality > arrayDefaultMaxSize { // it could be a bitmap! bc := newBitmapContainer() for k := 0; k < len(value2.content); k++ { v := value2.content[k] i := uint(v) >> 6 mask := uint64(1) << (v % 64) bc.bitmap[i] |= mask } for k := 0; k < len(ac.content); k++ { v := ac.content[k] i := uint(v) >> 6 mask := uint64(1) << (v % 64) bc.bitmap[i] |= mask } bc.cardinality = int(popcntSlice(bc.bitmap)) if bc.cardinality <= arrayDefaultMaxSize { return bc.toArrayContainer() } return bc } answer := newArrayContainerCapacity(maxPossibleCardinality) nl := union2by2(value1.content, value2.content, answer.content) answer.content = answer.content[:nl] // reslice to match actual used capacity return answer } func (ac *arrayContainer) orArrayCardinality(value2 *arrayContainer) int { return union2by2Cardinality(ac.content, value2.content) } func (ac *arrayContainer) lazyorArray(value2 *arrayContainer) container { value1 := ac maxPossibleCardinality := value1.getCardinality() + value2.getCardinality() if maxPossibleCardinality > arrayLazyLowerBound { // it could be a bitmap!^M bc := newBitmapContainer() for k := 0; k < len(value2.content); k++ { v := value2.content[k] i := uint(v) >> 6 mask := uint64(1) << (v % 64) bc.bitmap[i] |= mask } for k := 0; k < len(ac.content); k++ { v := ac.content[k] i := uint(v) >> 6 mask := uint64(1) << (v % 64) bc.bitmap[i] |= mask } bc.cardinality = invalidCardinality return bc } answer := newArrayContainerCapacity(maxPossibleCardinality) nl := union2by2(value1.content, value2.content, answer.content) answer.content = answer.content[:nl] // reslice to match actual used capacity return answer } func (ac *arrayContainer) and(a container) container { switch x := a.(type) { case *arrayContainer: return ac.andArray(x) case *bitmapContainer: return x.and(ac) case *runContainer16: if x.isFull() { return ac.clone() } return x.andArray(ac) } panic("unsupported container type") } func (ac *arrayContainer) andCardinality(a container) int { switch x := a.(type) { case *arrayContainer: return ac.andArrayCardinality(x) case *bitmapContainer: return x.andCardinality(ac) case *runContainer16: return x.andArrayCardinality(ac) } panic("unsupported container type") } func (ac *arrayContainer) intersects(a container) bool { switch x := a.(type) { case *arrayContainer: return ac.intersectsArray(x) case *bitmapContainer: return x.intersects(ac) case *runContainer16: return x.intersects(ac) } panic("unsupported container type") } func (ac *arrayContainer) iand(a container) container { switch x := a.(type) { case *arrayContainer: return ac.iandArray(x) case *bitmapContainer: return ac.iandBitmap(x) case *runContainer16: if x.isFull() { return ac } return x.andArray(ac) } panic("unsupported container type") } func (ac *arrayContainer) iandBitmap(bc *bitmapContainer) container { pos := 0 c := ac.getCardinality() for k := 0; k < c; k++ { // branchless v := ac.content[k] ac.content[pos] = v pos += int(bc.bitValue(v)) } ac.content = ac.content[:pos] return ac } func (ac *arrayContainer) xor(a container) container { switch x := a.(type) { case *arrayContainer: return ac.xorArray(x) case *bitmapContainer: return a.xor(ac) case *runContainer16: return x.xorArray(ac) } panic("unsupported container type") } func (ac *arrayContainer) xorArray(value2 *arrayContainer) container { value1 := ac totalCardinality := value1.getCardinality() + value2.getCardinality() if totalCardinality > arrayDefaultMaxSize { // it could be a bitmap! bc := newBitmapContainer() for k := 0; k < len(value2.content); k++ { v := value2.content[k] i := uint(v) >> 6 bc.bitmap[i] ^= (uint64(1) << (v % 64)) } for k := 0; k < len(ac.content); k++ { v := ac.content[k] i := uint(v) >> 6 bc.bitmap[i] ^= (uint64(1) << (v % 64)) } bc.computeCardinality() if bc.cardinality <= arrayDefaultMaxSize { return bc.toArrayContainer() } return bc } desiredCapacity := totalCardinality answer := newArrayContainerCapacity(desiredCapacity) length := exclusiveUnion2by2(value1.content, value2.content, answer.content) answer.content = answer.content[:length] return answer } func (ac *arrayContainer) andNot(a container) container { switch x := a.(type) { case *arrayContainer: return ac.andNotArray(x) case *bitmapContainer: return ac.andNotBitmap(x) case *runContainer16: return ac.andNotRun16(x) } panic("unsupported container type") } func (ac *arrayContainer) andNotRun16(rc *runContainer16) container { acb := ac.toBitmapContainer() rcb := rc.toBitmapContainer() return acb.andNotBitmap(rcb) } func (ac *arrayContainer) iandNot(a container) container { switch x := a.(type) { case *arrayContainer: return ac.iandNotArray(x) case *bitmapContainer: return ac.iandNotBitmap(x) case *runContainer16: return ac.iandNotRun16(x) } panic("unsupported container type") } func (ac *arrayContainer) iandNotRun16(rc *runContainer16) container { rcb := rc.toBitmapContainer() acb := ac.toBitmapContainer() acb.iandNotBitmapSurely(rcb) *ac = *(acb.toArrayContainer()) return ac } func (ac *arrayContainer) andNotArray(value2 *arrayContainer) container { value1 := ac desiredcapacity := value1.getCardinality() answer := newArrayContainerCapacity(desiredcapacity) length := difference(value1.content, value2.content, answer.content) answer.content = answer.content[:length] return answer } func (ac *arrayContainer) iandNotArray(value2 *arrayContainer) container { length := difference(ac.content, value2.content, ac.content) ac.content = ac.content[:length] return ac } func (ac *arrayContainer) andNotBitmap(value2 *bitmapContainer) container { desiredcapacity := ac.getCardinality() answer := newArrayContainerCapacity(desiredcapacity) answer.content = answer.content[:desiredcapacity] pos := 0 for _, v := range ac.content { answer.content[pos] = v pos += 1 - int(value2.bitValue(v)) } answer.content = answer.content[:pos] return answer } func (ac *arrayContainer) andBitmap(value2 *bitmapContainer) container { desiredcapacity := ac.getCardinality() answer := newArrayContainerCapacity(desiredcapacity) answer.content = answer.content[:desiredcapacity] pos := 0 for _, v := range ac.content { answer.content[pos] = v pos += int(value2.bitValue(v)) } answer.content = answer.content[:pos] return answer } func (ac *arrayContainer) iandNotBitmap(value2 *bitmapContainer) container { pos := 0 for _, v := range ac.content { ac.content[pos] = v pos += 1 - int(value2.bitValue(v)) } ac.content = ac.content[:pos] return ac } func copyOf(array []uint16, size int) []uint16 { result := make([]uint16, size) for i, x := range array { if i == size { break } result[i] = x } return result } // flip the values in the range [firstOfRange,endx) func (ac *arrayContainer) inot(firstOfRange, endx int) container { if firstOfRange >= endx { return ac } return ac.inotClose(firstOfRange, endx-1) // remove everything in [firstOfRange,endx-1] } // flip the values in the range [firstOfRange,lastOfRange] func (ac *arrayContainer) inotClose(firstOfRange, lastOfRange int) container { if firstOfRange > lastOfRange { // unlike add and remove, not uses an inclusive range [firstOfRange,lastOfRange] return ac } // determine the span of array indices to be affected startIndex := binarySearch(ac.content, uint16(firstOfRange)) if startIndex < 0 { startIndex = -startIndex - 1 } lastIndex := binarySearch(ac.content, uint16(lastOfRange)) if lastIndex < 0 { lastIndex = -lastIndex - 1 - 1 } currentValuesInRange := lastIndex - startIndex + 1 spanToBeFlipped := lastOfRange - firstOfRange + 1 newValuesInRange := spanToBeFlipped - currentValuesInRange buffer := make([]uint16, newValuesInRange) cardinalityChange := newValuesInRange - currentValuesInRange newCardinality := len(ac.content) + cardinalityChange if cardinalityChange > 0 { if newCardinality > len(ac.content) { if newCardinality > arrayDefaultMaxSize { bcRet := ac.toBitmapContainer() bcRet.inot(firstOfRange, lastOfRange+1) *ac = *bcRet.toArrayContainer() return bcRet } ac.content = copyOf(ac.content, newCardinality) } base := lastIndex + 1 copy(ac.content[lastIndex+1+cardinalityChange:], ac.content[base:base+len(ac.content)-1-lastIndex]) ac.negateRange(buffer, startIndex, lastIndex, firstOfRange, lastOfRange+1) } else { // no expansion needed ac.negateRange(buffer, startIndex, lastIndex, firstOfRange, lastOfRange+1) if cardinalityChange < 0 { for i := startIndex + newValuesInRange; i < newCardinality; i++ { ac.content[i] = ac.content[i-cardinalityChange] } } } ac.content = ac.content[:newCardinality] return ac } func (ac *arrayContainer) negateRange(buffer []uint16, startIndex, lastIndex, startRange, lastRange int) { // compute the negation into buffer outPos := 0 inPos := startIndex // value here always >= valInRange, // until it is exhausted // n.b., we can start initially exhausted. valInRange := startRange for ; valInRange < lastRange && inPos <= lastIndex; valInRange++ { if uint16(valInRange) != ac.content[inPos] { buffer[outPos] = uint16(valInRange) outPos++ } else { inPos++ } } // if there are extra items (greater than the biggest // pre-existing one in range), buffer them for ; valInRange < lastRange; valInRange++ { buffer[outPos] = uint16(valInRange) outPos++ } if outPos != len(buffer) { panic("negateRange: internal bug") } for i, item := range buffer { ac.content[i+startIndex] = item } } func (ac *arrayContainer) isFull() bool { return false } func (ac *arrayContainer) andArray(value2 *arrayContainer) container { desiredcapacity := minOfInt(ac.getCardinality(), value2.getCardinality()) answer := newArrayContainerCapacity(desiredcapacity) length := intersection2by2( ac.content, value2.content, answer.content) answer.content = answer.content[:length] return answer } func (ac *arrayContainer) andArrayCardinality(value2 *arrayContainer) int { return intersection2by2Cardinality( ac.content, value2.content) } func (ac *arrayContainer) intersectsArray(value2 *arrayContainer) bool { return intersects2by2( ac.content, value2.content) } func (ac *arrayContainer) iandArray(value2 *arrayContainer) container { length := intersection2by2( ac.content, value2.content, ac.content) ac.content = ac.content[:length] return ac } func (ac *arrayContainer) getCardinality() int { return len(ac.content) } func (ac *arrayContainer) rank(x uint16) int { answer := binarySearch(ac.content, x) if answer >= 0 { return answer + 1 } return -answer - 1 } func (ac *arrayContainer) selectInt(x uint16) int { return int(ac.content[x]) } func (ac *arrayContainer) clone() container { ptr := arrayContainer{make([]uint16, len(ac.content))} copy(ptr.content, ac.content[:]) return &ptr } func (ac *arrayContainer) contains(x uint16) bool { return binarySearch(ac.content, x) >= 0 } func (ac *arrayContainer) loadData(bitmapContainer *bitmapContainer) { ac.content = make([]uint16, bitmapContainer.cardinality, bitmapContainer.cardinality) bitmapContainer.fillArray(ac.content) } func newArrayContainer() *arrayContainer { p := new(arrayContainer) return p } func newArrayContainerFromBitmap(bc *bitmapContainer) *arrayContainer { ac := &arrayContainer{} ac.loadData(bc) return ac } func newArrayContainerCapacity(size int) *arrayContainer { p := new(arrayContainer) p.content = make([]uint16, 0, size) return p } func newArrayContainerSize(size int) *arrayContainer { p := new(arrayContainer) p.content = make([]uint16, size, size) return p } func newArrayContainerRange(firstOfRun, lastOfRun int) *arrayContainer { valuesInRange := lastOfRun - firstOfRun + 1 this := newArrayContainerCapacity(valuesInRange) for i := 0; i < valuesInRange; i++ { this.content = append(this.content, uint16(firstOfRun+i)) } return this } func (ac *arrayContainer) numberOfRuns() (nr int) { n := len(ac.content) var runlen uint16 var cur, prev uint16 switch n { case 0: return 0 case 1: return 1 default: for i := 1; i < n; i++ { prev = ac.content[i-1] cur = ac.content[i] if cur == prev+1 { runlen++ } else { if cur < prev { panic("then fundamental arrayContainer assumption of sorted ac.content was broken") } if cur == prev { panic("then fundamental arrayContainer assumption of deduplicated content was broken") } else { nr++ runlen = 0 } } } nr++ } return } // convert to run or array *if needed* func (ac *arrayContainer) toEfficientContainer() container { numRuns := ac.numberOfRuns() sizeAsRunContainer := runContainer16SerializedSizeInBytes(numRuns) sizeAsBitmapContainer := bitmapContainerSizeInBytes() card := ac.getCardinality() sizeAsArrayContainer := arrayContainerSizeInBytes(card) if sizeAsRunContainer <= minOfInt(sizeAsBitmapContainer, sizeAsArrayContainer) { return newRunContainer16FromArray(ac) } if card <= arrayDefaultMaxSize { return ac } return ac.toBitmapContainer() } func (ac *arrayContainer) containerType() contype { return arrayContype } func (ac *arrayContainer) addOffset(x uint16) []container { low := &arrayContainer{} high := &arrayContainer{} for _, val := range ac.content { y := uint32(val) + uint32(x) if highbits(y) > 0 { high.content = append(high.content, lowbits(y)) } else { low.content = append(low.content, lowbits(y)) } } return []container{low, high} } roaring-0.4.21/arraycontainer_gen.go 0000664 0000000 0000000 00000005573 13542657257 0017472 0 ustar 00root root 0000000 0000000 package roaring // NOTE: THIS FILE WAS PRODUCED BY THE // MSGP CODE GENERATION TOOL (github.com/tinylib/msgp) // DO NOT EDIT import "github.com/tinylib/msgp/msgp" // Deprecated: DecodeMsg implements msgp.Decodable func (z *arrayContainer) DecodeMsg(dc *msgp.Reader) (err error) { var field []byte _ = field var zbzg uint32 zbzg, err = dc.ReadMapHeader() if err != nil { return } for zbzg > 0 { zbzg-- field, err = dc.ReadMapKeyPtr() if err != nil { return } switch msgp.UnsafeString(field) { case "content": var zbai uint32 zbai, err = dc.ReadArrayHeader() if err != nil { return } if cap(z.content) >= int(zbai) { z.content = (z.content)[:zbai] } else { z.content = make([]uint16, zbai) } for zxvk := range z.content { z.content[zxvk], err = dc.ReadUint16() if err != nil { return } } default: err = dc.Skip() if err != nil { return } } } return } // Deprecated: EncodeMsg implements msgp.Encodable func (z *arrayContainer) EncodeMsg(en *msgp.Writer) (err error) { // map header, size 1 // write "content" err = en.Append(0x81, 0xa7, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74) if err != nil { return err } err = en.WriteArrayHeader(uint32(len(z.content))) if err != nil { return } for zxvk := range z.content { err = en.WriteUint16(z.content[zxvk]) if err != nil { return } } return } // Deprecated: MarshalMsg implements msgp.Marshaler func (z *arrayContainer) MarshalMsg(b []byte) (o []byte, err error) { o = msgp.Require(b, z.Msgsize()) // map header, size 1 // string "content" o = append(o, 0x81, 0xa7, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74) o = msgp.AppendArrayHeader(o, uint32(len(z.content))) for zxvk := range z.content { o = msgp.AppendUint16(o, z.content[zxvk]) } return } // Deprecated: UnmarshalMsg implements msgp.Unmarshaler func (z *arrayContainer) UnmarshalMsg(bts []byte) (o []byte, err error) { var field []byte _ = field var zcmr uint32 zcmr, bts, err = msgp.ReadMapHeaderBytes(bts) if err != nil { return } for zcmr > 0 { zcmr-- field, bts, err = msgp.ReadMapKeyZC(bts) if err != nil { return } switch msgp.UnsafeString(field) { case "content": var zajw uint32 zajw, bts, err = msgp.ReadArrayHeaderBytes(bts) if err != nil { return } if cap(z.content) >= int(zajw) { z.content = (z.content)[:zajw] } else { z.content = make([]uint16, zajw) } for zxvk := range z.content { z.content[zxvk], bts, err = msgp.ReadUint16Bytes(bts) if err != nil { return } } default: bts, err = msgp.Skip(bts) if err != nil { return } } } o = bts return } // Deprecated: Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message func (z *arrayContainer) Msgsize() (s int) { s = 1 + 8 + msgp.ArrayHeaderSize + (len(z.content) * (msgp.Uint16Size)) return } roaring-0.4.21/arraycontainer_gen_test.go 0000664 0000000 0000000 00000004575 13542657257 0020532 0 ustar 00root root 0000000 0000000 package roaring // NOTE: THIS FILE WAS PRODUCED BY THE // MSGP CODE GENERATION TOOL (github.com/tinylib/msgp) // DO NOT EDIT import ( "bytes" "testing" "github.com/tinylib/msgp/msgp" ) func TestMarshalUnmarshalarrayContainer(t *testing.T) { v := arrayContainer{} bts, err := v.MarshalMsg(nil) if err != nil { t.Fatal(err) } left, err := v.UnmarshalMsg(bts) if err != nil { t.Fatal(err) } if len(left) > 0 { t.Errorf("%d bytes left over after UnmarshalMsg(): %q", len(left), left) } left, err = msgp.Skip(bts) if err != nil { t.Fatal(err) } if len(left) > 0 { t.Errorf("%d bytes left over after Skip(): %q", len(left), left) } } func BenchmarkMarshalMsgarrayContainer(b *testing.B) { v := arrayContainer{} b.ReportAllocs() b.ResetTimer() for i := 0; i < b.N; i++ { v.MarshalMsg(nil) } } func BenchmarkAppendMsgarrayContainer(b *testing.B) { v := arrayContainer{} bts := make([]byte, 0, v.Msgsize()) bts, _ = v.MarshalMsg(bts[0:0]) b.SetBytes(int64(len(bts))) b.ReportAllocs() b.ResetTimer() for i := 0; i < b.N; i++ { bts, _ = v.MarshalMsg(bts[0:0]) } } func BenchmarkUnmarshalarrayContainer(b *testing.B) { v := arrayContainer{} bts, _ := v.MarshalMsg(nil) b.ReportAllocs() b.SetBytes(int64(len(bts))) b.ResetTimer() for i := 0; i < b.N; i++ { _, err := v.UnmarshalMsg(bts) if err != nil { b.Fatal(err) } } } func TestEncodeDecodearrayContainer(t *testing.T) { v := arrayContainer{} var buf bytes.Buffer msgp.Encode(&buf, &v) m := v.Msgsize() if buf.Len() > m { t.Logf("WARNING: Msgsize() for %v is inaccurate", v) } vn := arrayContainer{} err := msgp.Decode(&buf, &vn) if err != nil { t.Error(err) } buf.Reset() msgp.Encode(&buf, &v) err = msgp.NewReader(&buf).Skip() if err != nil { t.Error(err) } } func BenchmarkEncodearrayContainer(b *testing.B) { v := arrayContainer{} var buf bytes.Buffer msgp.Encode(&buf, &v) b.SetBytes(int64(buf.Len())) en := msgp.NewWriter(msgp.Nowhere) b.ReportAllocs() b.ResetTimer() for i := 0; i < b.N; i++ { v.EncodeMsg(en) } en.Flush() } func BenchmarkDecodearrayContainer(b *testing.B) { v := arrayContainer{} var buf bytes.Buffer msgp.Encode(&buf, &v) b.SetBytes(int64(buf.Len())) rd := msgp.NewEndlessReader(buf.Bytes(), b) dc := msgp.NewReader(rd) b.ReportAllocs() b.ResetTimer() for i := 0; i < b.N; i++ { err := v.DecodeMsg(dc) if err != nil { b.Fatal(err) } } } roaring-0.4.21/arraycontainer_test.go 0000664 0000000 0000000 00000021157 13542657257 0017674 0 ustar 00root root 0000000 0000000 package roaring // to run just these tests: go test -run TestArrayContainer* import ( "math/rand" "testing" "github.com/stretchr/testify/assert" ) func TestArrayContainerTransition(t *testing.T) { v := container(newArrayContainer()) for i := 0; i < arrayDefaultMaxSize; i++ { v = v.iaddReturnMinimized(uint16(i)) } assert.Equal(t, arrayDefaultMaxSize, v.getCardinality()) assert.IsType(t, newArrayContainer(), v) for i := 0; i < arrayDefaultMaxSize; i++ { v = v.iaddReturnMinimized(uint16(i)) } assert.Equal(t, arrayDefaultMaxSize, v.getCardinality()) assert.IsType(t, newArrayContainer(), v) v = v.iaddReturnMinimized(uint16(arrayDefaultMaxSize)) assert.Equal(t, arrayDefaultMaxSize+1, v.getCardinality()) assert.IsType(t, newBitmapContainer(), v) v = v.iremoveReturnMinimized(uint16(arrayDefaultMaxSize)) assert.Equal(t, arrayDefaultMaxSize, v.getCardinality()) assert.IsType(t, newArrayContainer(), v) } func TestArrayContainerRank(t *testing.T) { v := container(newArrayContainer()) v = v.iaddReturnMinimized(10) v = v.iaddReturnMinimized(100) v = v.iaddReturnMinimized(1000) assert.Equal(t, 3, v.getCardinality()) for i := 0; i <= arrayDefaultMaxSize; i++ { thisrank := v.rank(uint16(i)) if i < 10 { assert.Equalf(t, 0, thisrank, "At %d should be zero but is %d", i, thisrank) } else if i < 100 { assert.Equalf(t, 1, thisrank, "At %d should be one but is %d", i, thisrank) } else if i < 1000 { assert.Equalf(t, 2, thisrank, "At %d should be two but is %d", i, thisrank) } else { assert.Equalf(t, 3, thisrank, "At %d should be three but is %d", i, thisrank) } } } func TestArrayOffset(t *testing.T) { nums := []uint16{10, 100, 1000} expected := make([]int, len(nums)) offtest := uint16(65000) v := container(newArrayContainer()) for i, n := range nums { v = v.iaddReturnMinimized(n) expected[i] = int(n) + int(offtest) } w := v.addOffset(offtest) w0card := w[0].getCardinality() w1card := w[1].getCardinality() assert.Equal(t, 3, w0card+w1card) wout := make([]int, len(nums)) for i := 0; i < w0card; i++ { wout[i] = w[0].selectInt(uint16(i)) } for i := 0; i < w1card; i++ { wout[i+w0card] = w[1].selectInt(uint16(i)) + 65536 } for i, x := range wout { assert.Equal(t, expected[i], x) } } func TestArrayContainerMassiveSetAndGet(t *testing.T) { v := container(newArrayContainer()) for j := 0; j <= arrayDefaultMaxSize; j++ { v = v.iaddReturnMinimized(uint16(j)) assert.Equal(t, 1+j, v.getCardinality()) success := true i := 0 for ; i <= arrayDefaultMaxSize && success; i++ { if i <= j { success = v.contains(uint16(i)) } else { success = !v.contains(uint16(i)) } } assert.Truef(t, success, "failed at %d iteration", i) } } func TestArrayContainerUnsupportedType(t *testing.T) { a := container(newArrayContainer()) testContainerPanics(t, a) b := container(newBitmapContainer()) testContainerPanics(t, b) } func testContainerPanics(t *testing.T, c container) { f := &struct { arrayContainer }{} assert.Panics(t, func() { c.or(f) }) assert.Panics(t, func() { c.ior(f) }) assert.Panics(t, func() { c.lazyIOR(f) }) assert.Panics(t, func() { c.lazyOR(f) }) assert.Panics(t, func() { c.and(f) }) assert.Panics(t, func() { c.intersects(f) }) assert.Panics(t, func() { c.iand(f) }) assert.Panics(t, func() { c.xor(f) }) assert.Panics(t, func() { c.andNot(f) }) assert.Panics(t, func() { c.iandNot(f) }) } func TestArrayContainerNumberOfRuns025(t *testing.T) { seed := int64(42) rand.Seed(seed) trials := []trial{ {n: 1000, percentFill: .1, ntrial: 10}, /* trial{n: 100, percentFill: .5, ntrial: 10}, trial{n: 100, percentFill: .01, ntrial: 10}, trial{n: 100, percentFill: .99, ntrial: 10}, */ } tester := func(tr trial) { for j := 0; j < tr.ntrial; j++ { ma := make(map[int]bool) n := tr.n a := []uint16{} draw := int(float64(n) * tr.percentFill) for i := 0; i < draw; i++ { r0 := rand.Intn(n) a = append(a, uint16(r0)) ma[r0] = true } // RunContainer computes this automatically rc := newRunContainer16FromVals(false, a...) rcNr := rc.numberOfRuns() // vs arrayContainer ac := newArrayContainer() for k := range ma { ac.iadd(uint16(k)) } acNr := ac.numberOfRuns() assert.Equal(t, acNr, rcNr) // get coverage of arrayContainer coners... assert.Equal(t, 2*len(ma), ac.serializedSizeInBytes()) assert.NotPanics(t, func() { ac.iaddRange(2, 1) }) assert.NotPanics(t, func() { ac.iremoveRange(2, 1) }) ac.iremoveRange(0, 2) ac.iremoveRange(0, 2) delete(ma, 0) delete(ma, 1) assert.Equal(t, len(ma), ac.getCardinality()) ac.iadd(0) ac.iadd(1) ac.iadd(2) ma[0] = true ma[1] = true ma[2] = true newguy := ac.not(0, 3).(*arrayContainer) assert.False(t, newguy.contains(0)) assert.False(t, newguy.contains(1)) assert.False(t, newguy.contains(2)) newguy.notClose(0, 2) newguy.remove(2) newguy.remove(2) newguy.ior(ac) messedUp := newArrayContainer() assert.Equal(t, 0, messedUp.numberOfRuns()) // messed up messedUp.content = []uint16{1, 1} assert.Panics(t, func() { messedUp.numberOfRuns() }) messedUp.content = []uint16{2, 1} assert.Panics(t, func() { messedUp.numberOfRuns() }) shouldBeBit := newArrayContainer() for i := 0; i < arrayDefaultMaxSize+1; i++ { shouldBeBit.iadd(uint16(i * 2)) } bit := shouldBeBit.toEfficientContainer() _, isBit := bit.(*bitmapContainer) assert.True(t, isBit) } } for i := range trials { tester(trials[i]) } } func TestArrayContainerIaddRangeNearMax068(t *testing.T) { iv := []interval16{newInterval16Range(65525, 65527), newInterval16Range(65530, 65530), newInterval16Range(65534, 65535)} rc := newRunContainer16TakeOwnership(iv) ac2 := rc.toArrayContainer() assert.True(t, ac2.equals(rc)) assert.True(t, rc.equals(ac2)) ac := newArrayContainer() endx := int(MaxUint16) + 1 first := endx - 3 ac.iaddRange(first-20, endx-20) ac.iaddRange(first-6, endx-6) ac.iaddRange(first, endx) assert.Equal(t, 9, ac.getCardinality()) } func TestArrayContainerEtc070(t *testing.T) { iv := []interval16{newInterval16Range(65525, 65527), newInterval16Range(65530, 65530), newInterval16Range(65534, 65535)} rc := newRunContainer16TakeOwnership(iv) ac := rc.toArrayContainer() // not when nothing to do just returns a clone assert.True(t, ac.equals(ac.not(0, 0))) assert.True(t, ac.equals(ac.notClose(1, 0))) // not will promote to bitmapContainer if card is big enough ac = newArrayContainer() ac.inot(0, MaxUint16+1) rc = newRunContainer16Range(0, MaxUint16) assert.True(t, rc.equals(ac)) // comparing two array containers with different card ac2 := newArrayContainer() assert.False(t, ac2.equals(ac)) // comparing two arrays with same card but different content ac3 := newArrayContainer() ac4 := newArrayContainer() ac3.iadd(1) ac3.iadd(2) ac4.iadd(1) assert.False(t, ac3.equals(ac4)) // compare array vs other with different card assert.False(t, ac3.equals(rc)) // compare array vs other, same card, different content rc = newRunContainer16Range(0, 0) assert.False(t, ac4.equals(rc)) // remove from middle of array ac5 := newArrayContainer() ac5.iaddRange(0, 10) assert.Equal(t, 10, ac5.getCardinality()) ac6 := ac5.remove(5) assert.Equal(t, 9, ac6.getCardinality()) // lazyorArray that converts to bitmap ac5.iaddRange(0, arrayLazyLowerBound-1) ac6.iaddRange(arrayLazyLowerBound, 2*arrayLazyLowerBound-2) ac6a := ac6.(*arrayContainer) bc := ac5.lazyorArray(ac6a) _, isBitmap := bc.(*bitmapContainer) assert.True(t, isBitmap) // andBitmap ac = newArrayContainer() ac.iaddRange(0, 10) bc9 := newBitmapContainer() bc9.iaddRange(0, 5) and := ac.andBitmap(bc9) assert.Equal(t, 5, and.getCardinality()) // numberOfRuns with 1 member ac10 := newArrayContainer() ac10.iadd(1) assert.Equal(t, 1, ac10.numberOfRuns()) } func TestArrayContainerIand(t *testing.T) { a := NewBitmap() a.AddRange(0, 200000) b := BitmapOf(50, 100000, 150000) b.And(a) r := b.ToArray() assert.Len(t, r, 3) assert.EqualValues(t, 50, r[0]) assert.EqualValues(t, 100000, r[1]) assert.EqualValues(t, 150000, r[2]) } func TestArrayIteratorPeekNext(t *testing.T) { testContainerIteratorPeekNext(t, newArrayContainer()) } func TestArrayIteratorAdvance(t *testing.T) { testContainerIteratorAdvance(t, newArrayContainer()) } // go test -bench BenchmarkShortIteratorAdvance -run - func BenchmarkShortIteratorAdvanceArray(b *testing.B) { benchmarkContainerIteratorAdvance(b, newArrayContainer()) } // go test -bench BenchmarkShortIteratorNext -run - func BenchmarkShortIteratorNextArray(b *testing.B) { benchmarkContainerIteratorNext(b, newArrayContainer()) } roaring-0.4.21/benchmark_test.go 0000664 0000000 0000000 00000041316 13542657257 0016604 0 ustar 00root root 0000000 0000000 package roaring import ( "bytes" "fmt" "math/rand" "runtime" "testing" "github.com/willf/bitset" ) // BENCHMARKS, to run them type "go test -bench Benchmark -run -" // go test -bench BenchmarkOrs -benchmem -run - func BenchmarkOrs(b *testing.B) { bms := []*Bitmap{} maxCount := 50 domain := 100000000 bitmapCount := 100 for i := 0; i < bitmapCount; i++ { newBm := NewBitmap() count := rand.Intn(maxCount) + 5 for j := 0; j < count; j++ { v := uint32(rand.Intn(domain)) newBm.Add(v) } bms = append(bms, newBm) } var twotwocard uint64 var fastcard uint64 var nextcard uint64 b.Run("two-by-two", func(b *testing.B) { for n := 0; n < b.N; n++ { newBm := NewBitmap() for _, bm := range bms { newBm.Or(bm) } twotwocard = newBm.GetCardinality() } b.StopTimer() }) b.Run("fast", func(b *testing.B) { for n := 0; n < b.N; n++ { newBm := FastOr(bms...) fastcard = newBm.GetCardinality() } b.StopTimer() }) b.Run("next/add", func(b *testing.B) { buf := make([]uint32, 100) for n := 0; n < b.N; n++ { newBm := NewBitmap() for _, bm := range bms { iter := bm.ManyIterator() for vs := iter.NextMany(buf); vs != 0; vs = iter.NextMany(buf) { newBm.AddMany(buf[:vs]) } } nextcard = newBm.GetCardinality() } b.StopTimer() }) if fastcard != nextcard { b.Fatalf("Cardinalities don't match: %d, %d", fastcard, nextcard) } if fastcard != twotwocard { b.Fatalf("Cardinalities don't match: %d, %d", fastcard, twotwocard) } } var Rb *Bitmap func BenchmarkNewBitmap(b *testing.B) { b.ReportAllocs() for i := 0; i < b.N; i++ { Rb = New() } } var emptyArray []byte func BenchmarkEmptyArray(b *testing.B) { for i := 0; i < b.N; i++ { emptyArray = make([]byte, 0) } } var c9 uint // go test -bench BenchmarkMemoryUsage -run - func BenchmarkMemoryUsage(b *testing.B) { b.StopTimer() bitmaps := make([]*Bitmap, 0, 10) incr := uint32(1 << 16) max := uint32(1<<32 - 1) for x := 0; x < 10; x++ { rb := NewBitmap() var i uint32 for i = 0; i <= max-incr; i += incr { rb.Add(i) } bitmaps = append(bitmaps, rb) } var stats runtime.MemStats runtime.ReadMemStats(&stats) b.Logf("HeapInUse: %d, HeapObjects: %d", stats.HeapInuse, stats.HeapObjects) b.StartTimer() } // go test -bench BenchmarkIntersection -run - func BenchmarkIntersectionBitset(b *testing.B) { b.StopTimer() r := rand.New(rand.NewSource(0)) s1 := bitset.New(0) sz := 150000 initsize := 65000 for i := 0; i < initsize; i++ { s1.Set(uint(r.Int31n(int32(sz)))) } s2 := bitset.New(0) sz = 100000000 initsize = 65000 for i := 0; i < initsize; i++ { s2.Set(uint(r.Int31n(int32(sz)))) } b.StartTimer() card := uint(0) for j := 0; j < b.N; j++ { s3 := s1.Intersection(s2) card = card + s3.Count() } } // go test -bench BenchmarkIntersection -run - func BenchmarkIntersectionRoaring(b *testing.B) { b.StopTimer() r := rand.New(rand.NewSource(0)) s1 := NewBitmap() sz := 150000 initsize := 65000 for i := 0; i < initsize; i++ { s1.Add(uint32(r.Int31n(int32(sz)))) } s2 := NewBitmap() sz = 100000000 initsize = 65000 for i := 0; i < initsize; i++ { s2.Add(uint32(r.Int31n(int32(sz)))) } b.StartTimer() card := uint64(0) for j := 0; j < b.N; j++ { s3 := And(s1, s2) card = card + s3.GetCardinality() } } // go test -bench BenchmarkIntersectionCardinalityRoaring -run - func BenchmarkIntersectionCardinalityRoaring(b *testing.B) { b.StopTimer() r := rand.New(rand.NewSource(0)) s1 := NewBitmap() sz := 150000 initsize := 65000 for i := 0; i < initsize; i++ { s1.Add(uint32(r.Int31n(int32(sz)))) } s2 := NewBitmap() sz = 100000000 initsize = 65000 for i := 0; i < initsize; i++ { s2.Add(uint32(r.Int31n(int32(sz)))) } b.StartTimer() card := uint64(0) for j := 0; j < b.N; j++ { card += s1.AndCardinality(s2) } } // go test -bench BenchmarkUnion -run - func BenchmarkUnionBitset(b *testing.B) { b.StopTimer() r := rand.New(rand.NewSource(0)) s1 := bitset.New(0) sz := 150000 initsize := 65000 for i := 0; i < initsize; i++ { s1.Set(uint(r.Int31n(int32(sz)))) } s2 := bitset.New(0) sz = 100000000 initsize = 65000 for i := 0; i < initsize; i++ { s2.Set(uint(r.Int31n(int32(sz)))) } b.StartTimer() card := uint(0) for j := 0; j < b.N; j++ { s3 := s1.Union(s2) card = card + s3.Count() } } // go test -bench BenchmarkUnion -run - func BenchmarkUnionRoaring(b *testing.B) { b.StopTimer() r := rand.New(rand.NewSource(0)) s1 := NewBitmap() sz := 150000 initsize := 65000 for i := 0; i < initsize; i++ { s1.Add(uint32(r.Int31n(int32(sz)))) } s2 := NewBitmap() sz = 100000000 initsize = 65000 for i := 0; i < initsize; i++ { s2.Add(uint32(r.Int31n(int32(sz)))) } b.StartTimer() card := uint64(0) for j := 0; j < b.N; j++ { s3 := Or(s1, s2) card = card + s3.GetCardinality() } } // go test -bench BenchmarkSize -run - func BenchmarkSizeBitset(b *testing.B) { b.StopTimer() r := rand.New(rand.NewSource(0)) s1 := bitset.New(0) sz := 150000 initsize := 65000 for i := 0; i < initsize; i++ { s1.Set(uint(r.Int31n(int32(sz)))) } s2 := bitset.New(0) sz = 100000000 initsize = 65000 for i := 0; i < initsize; i++ { s2.Set(uint(r.Int31n(int32(sz)))) } fmt.Printf("%.1f MB ", float32(s1.BinaryStorageSize()+s2.BinaryStorageSize())/(1024.0*1024)) } // go test -bench BenchmarkSize -run - func BenchmarkSizeRoaring(b *testing.B) { b.StopTimer() r := rand.New(rand.NewSource(0)) s1 := NewBitmap() sz := 150000 initsize := 65000 for i := 0; i < initsize; i++ { s1.Add(uint32(r.Int31n(int32(sz)))) } s2 := NewBitmap() sz = 100000000 initsize = 65000 for i := 0; i < initsize; i++ { s2.Add(uint32(r.Int31n(int32(sz)))) } fmt.Printf("%.1f MB ", float32(s1.GetSerializedSizeInBytes()+s2.GetSerializedSizeInBytes())/(1024.0*1024)) } // go test -bench BenchmarkSet -run - func BenchmarkSetRoaring(b *testing.B) { b.StopTimer() r := rand.New(rand.NewSource(0)) sz := 1000000 s := NewBitmap() b.StartTimer() for i := 0; i < b.N; i++ { s.Add(uint32(r.Int31n(int32(sz)))) } } func BenchmarkSetBitset(b *testing.B) { b.StopTimer() r := rand.New(rand.NewSource(0)) sz := 1000000 s := bitset.New(0) b.StartTimer() for i := 0; i < b.N; i++ { s.Set(uint(r.Int31n(int32(sz)))) } } // go test -bench BenchmarkGetTest -run - func BenchmarkGetTestRoaring(b *testing.B) { b.StopTimer() r := rand.New(rand.NewSource(0)) sz := 1000000 initsize := 50000 s := NewBitmap() for i := 0; i < initsize; i++ { s.Add(uint32(r.Int31n(int32(sz)))) } b.StartTimer() for i := 0; i < b.N; i++ { s.Contains(uint32(r.Int31n(int32(sz)))) } } func BenchmarkGetTestBitSet(b *testing.B) { b.StopTimer() r := rand.New(rand.NewSource(0)) sz := 1000000 initsize := 50000 s := bitset.New(0) for i := 0; i < initsize; i++ { s.Set(uint(r.Int31n(int32(sz)))) } b.StartTimer() for i := 0; i < b.N; i++ { s.Test(uint(r.Int31n(int32(sz)))) } } // go test -bench BenchmarkCount -run - func BenchmarkCountRoaring(b *testing.B) { b.StopTimer() r := rand.New(rand.NewSource(0)) s := NewBitmap() sz := 1000000 initsize := 50000 for i := 0; i < initsize; i++ { s.Add(uint32(r.Int31n(int32(sz)))) } b.StartTimer() for i := 0; i < b.N; i++ { s.GetCardinality() } } func BenchmarkCountBitset(b *testing.B) { b.StopTimer() r := rand.New(rand.NewSource(0)) s := bitset.New(0) sz := 1000000 initsize := 50000 for i := 0; i < initsize; i++ { s.Set(uint(r.Int31n(int32(sz)))) } b.StartTimer() for i := 0; i < b.N; i++ { s.Count() } } // go test -bench BenchmarkIterate -run - func BenchmarkIterateRoaring(b *testing.B) { b.StopTimer() r := rand.New(rand.NewSource(0)) s := NewBitmap() sz := 150000 initsize := 65000 for i := 0; i < initsize; i++ { s.Add(uint32(r.Int31n(int32(sz)))) } b.StartTimer() for j := 0; j < b.N; j++ { c9 = uint(0) i := s.Iterator() for i.HasNext() { i.Next() c9++ } } } // go test -bench BenchmarkSparseIterate -run - func BenchmarkSparseIterateRoaring(b *testing.B) { b.StopTimer() r := rand.New(rand.NewSource(0)) s := NewBitmap() sz := 100000000 initsize := 65000 for i := 0; i < initsize; i++ { s.Add(uint32(r.Int31n(int32(sz)))) } b.StartTimer() for j := 0; j < b.N; j++ { c9 = uint(0) i := s.Iterator() for i.HasNext() { i.Next() c9++ } } } // go test -bench BenchmarkSparseAdvance -run - func BenchmarkSparseAdvanceRoaring(b *testing.B) { b.StopTimer() s := NewBitmap() initsize := 65000 for i := 0; i < initsize; i++ { s.Add(uint32(i)) } for _, gap := range []int{1, 2, 65, 650} { b.Run(fmt.Sprintf("advance from %d", gap), func(b *testing.B) { b.ReportAllocs() b.StartTimer() diff := uint32(0) for n := 0; n < b.N; n++ { val := uint32((gap * n) % initsize) i := s.Iterator() i.AdvanceIfNeeded(val) diff += i.PeekNext() - val } b.StopTimer() if diff != 0 { b.Fatalf("Expected diff 0, got %d", diff) } }) } } // go test -bench BenchmarkSparseAdvance -run - func BenchmarkSparseAdvanceOnHugeData(b *testing.B) { b.ReportAllocs() s := NewBitmap() initsize := 6500000 sz := 100000000 r := rand.New(rand.NewSource(0)) for i := 0; i < initsize; i++ { s.Add(uint32(r.Int31n(int32(sz)))) } b.ResetTimer() for n := 0; n < b.N; n++ { val := uint32(n) i := s.Iterator() i.AdvanceIfNeeded(val) } } // go test -bench BenchmarkSparseAdvance -run - func BenchmarkSparseAdvanceSequentially(b *testing.B) { b.StopTimer() s := NewBitmap() initsize := 65000 for i := 0; i < initsize; i++ { s.Add(uint32(i)) } for _, gap := range []int{1, 2, 65, 650} { b.Run(fmt.Sprintf("advance from %d", gap), func(b *testing.B) { b.ReportAllocs() b.StartTimer() diff := uint32(0) for n := 0; n < b.N; n++ { val := uint32((gap * n) % initsize) i := s.Iterator() for i.HasNext() && i.PeekNext() < val { i.Next() } diff += i.PeekNext() - val } b.StopTimer() if diff != 0 { b.Fatalf("Expected diff 0, got %d", diff) } }) } } // go test -bench BenchmarkIterate -run - func BenchmarkIterateBitset(b *testing.B) { b.StopTimer() r := rand.New(rand.NewSource(0)) s := bitset.New(0) sz := 150000 initsize := 65000 for i := 0; i < initsize; i++ { s.Set(uint(r.Int31n(int32(sz)))) } b.StartTimer() for j := 0; j < b.N; j++ { c9 = uint(0) for i, e := s.NextSet(0); e; i, e = s.NextSet(i + 1) { c9++ } } } // go test -bench BenchmarkSparseContains -run - func BenchmarkSparseContains(b *testing.B) { b.StopTimer() r := rand.New(rand.NewSource(0)) s := NewBitmap() sz := 10000000 initsize := 65000 for i := 0; i < initsize; i++ { s.Add(uint32(r.Int31n(int32(sz)))) } var a [1024]uint32 for i := 0; i < 1024; i++ { a[i] = uint32(r.Int31n(int32(sz))) } b.StartTimer() for j := 0; j < b.N; j++ { c9 = uint(0) for i := 0; i < 1024; i++ { if s.Contains(a[i]) { c9++ } } } } // go test -bench BenchmarkSparseIterate -run - func BenchmarkSparseIterateBitset(b *testing.B) { b.StopTimer() r := rand.New(rand.NewSource(0)) s := bitset.New(0) sz := 100000000 initsize := 65000 for i := 0; i < initsize; i++ { s.Set(uint(r.Int31n(int32(sz)))) } b.StartTimer() for j := 0; j < b.N; j++ { c9 = uint(0) for i, e := s.NextSet(0); e; i, e = s.NextSet(i + 1) { c9++ } } } func BenchmarkSerializationSparse(b *testing.B) { b.ReportAllocs() b.StopTimer() r := rand.New(rand.NewSource(0)) s := NewBitmap() sz := 100000000 initsize := 65000 for i := 0; i < initsize; i++ { s.Add(uint32(r.Int31n(int32(sz)))) } buf := make([]byte, 0, s.GetSerializedSizeInBytes()) b.StartTimer() for j := 0; j < b.N; j++ { w := bytes.NewBuffer(buf[:0]) s.WriteTo(w) } } func BenchmarkSerializationMid(b *testing.B) { b.ReportAllocs() b.StopTimer() r := rand.New(rand.NewSource(0)) s := NewBitmap() sz := 10000000 initsize := 65000 for i := 0; i < initsize; i++ { s.Add(uint32(r.Int31n(int32(sz)))) } buf := make([]byte, 0, s.GetSerializedSizeInBytes()) b.StartTimer() for j := 0; j < b.N; j++ { w := bytes.NewBuffer(buf[:0]) s.WriteTo(w) } } func BenchmarkSerializationDense(b *testing.B) { b.ReportAllocs() b.StopTimer() r := rand.New(rand.NewSource(0)) s := NewBitmap() sz := 150000 initsize := 65000 for i := 0; i < initsize; i++ { s.Add(uint32(r.Int31n(int32(sz)))) } buf := make([]byte, 0, s.GetSerializedSizeInBytes()) b.StartTimer() for j := 0; j < b.N; j++ { w := bytes.NewBuffer(buf[:0]) s.WriteTo(w) } } func BenchmarkMarshalBinary(b *testing.B) { r := rand.New(rand.NewSource(0)) s := NewBitmap() sz := 10000000 initsize := 65000 for i := 0; i < initsize; i++ { s.Add(uint32(r.Int31n(int32(sz)))) } b.ReportAllocs() b.ResetTimer() for j := 0; j < b.N; j++ { s.MarshalBinary() } } func BenchmarkUnmarshalBinary(b *testing.B) { r := rand.New(rand.NewSource(0)) s := NewBitmap() sz := 10000000 initsize := 65000 for i := 0; i < initsize; i++ { s.Add(uint32(r.Int31n(int32(sz)))) } data, _ := s.MarshalBinary() b.ReportAllocs() b.ResetTimer() for j := 0; j < b.N; j++ { ub := NewBitmap() ub.UnmarshalBinary(data) } } func BenchmarkEqualsSparse(b *testing.B) { b.StopTimer() r := rand.New(rand.NewSource(0)) s := NewBitmap() t := NewBitmap() sz := 100000000 initsize := 65000 for i := 0; i < initsize; i++ { n := uint32(r.Int31n(int32(sz))) s.Add(n) t.Add(n) } b.StartTimer() for j := 0; j < b.N; j++ { s.Equals(t) } } func BenchmarkEqualsClone(b *testing.B) { b.StopTimer() r := rand.New(rand.NewSource(0)) s := NewBitmap() sz := 100000000 initsize := 65000 for i := 0; i < initsize; i++ { s.Add(uint32(r.Int31n(int32(sz)))) } t := s.Clone() b.StartTimer() for j := 0; j < b.N; j++ { s.Equals(t) } } // go test -bench BenchmarkNexts -benchmem -run - func BenchmarkNexts(b *testing.B) { for _, gap := range []uint32{1, 2, 4, 8, 16, 32, 64, 256, 1024, 8096} { rrs := make([]uint32, 500000) v := uint32(0) for i := range rrs { rrs[i] = v v += gap } bm := NewBitmap() bm.AddMany(rrs) var totnext uint64 var totnextmany uint64 density := float32(100) / float32(gap) densityStr := fmt.Sprintf("__%f%%", density) b.Run("next"+densityStr, func(b *testing.B) { for n := 0; n < b.N; n++ { totnext = 0 iter := bm.Iterator() for iter.HasNext() { v := iter.Next() totnext += uint64(v) } } b.StopTimer() }) b.Run("nextmany"+densityStr, func(b *testing.B) { for n := 0; n < b.N; n++ { totnextmany = 0 iter := bm.ManyIterator() // worst case, in practice will reuse buffers across many roars buf := make([]uint32, 4096) for j := iter.NextMany(buf); j != 0; j = iter.NextMany(buf) { for i := 0; i < j; i++ { totnextmany += uint64(buf[i]) } } } b.StopTimer() }) if totnext != totnextmany { b.Fatalf("Cardinalities don't match: %d, %d", totnext, totnextmany) } } } // go test -bench BenchmarkRLENexts -benchmem -run - func BenchmarkNextsRLE(b *testing.B) { var totadd uint64 var totaddmany uint64 bm := NewBitmap() bm.AddRange(0, 1000000) b.Run("next", func(b *testing.B) { for n := 0; n < b.N; n++ { totadd = 0 iter := bm.Iterator() for iter.HasNext() { v := iter.Next() totadd += uint64(v) } } b.StopTimer() }) b.Run("nextmany", func(b *testing.B) { for n := 0; n < b.N; n++ { totaddmany = 0 iter := bm.ManyIterator() // worst case, in practice will reuse buffers across many roars buf := make([]uint32, 2048) for j := iter.NextMany(buf); j != 0; j = iter.NextMany(buf) { for i := 0; i < j; i++ { totaddmany += uint64(buf[i]) } } } b.StopTimer() }) if totadd != totaddmany { b.Fatalf("Cardinalities don't match: %d, %d", totadd, totaddmany) } } func BenchmarkXor(b *testing.B) { b.StopTimer() r := rand.New(rand.NewSource(0)) s := NewBitmap() sz := 100000000 initsize := 65000 for i := 0; i < initsize; i++ { n := uint32(r.Int31n(int32(sz))) s.Add(n) } x2 := NewBitmap() for i := 0; i < initsize; i++ { n := uint32(r.Int31n(int32(sz))) x2.Add(n) } b.StartTimer() for j := 0; j < b.N; j++ { s.Clone().Xor(x2) } } func BenchmarkXorLopsided(b *testing.B) { b.StopTimer() r := rand.New(rand.NewSource(0)) s := NewBitmap() sz := 100000000 initsize := 65000 for i := 0; i < initsize; i++ { n := uint32(r.Int31n(int32(sz))) s.Add(n) } x2 := NewBitmap() for i := 0; i < 32; i++ { n := uint32(r.Int31n(int32(sz))) x2.Add(n) } b.StartTimer() for j := 0; j < b.N; j++ { s.Clone().Xor(x2) } } func BenchmarkBitmapReuseWithoutClear(b *testing.B) { for j := 0; j < b.N; j++ { s := NewBitmap() for i := 0; i < 100000; i++ { s.Add(uint32(i * 4096)) } } } func BenchmarkBitmapReuseWithClear(b *testing.B) { s := NewBitmap() for i := 0; i < 100000; i++ { s.Add(uint32(i * 4096)) } b.ResetTimer() for j := 0; j < b.N; j++ { s.Clear() // reuse the same bitmap for i := 0; i < 100000; i++ { s.Add(uint32(i * 4096)) } } } roaring-0.4.21/bitmapcontainer.go 0000664 0000000 0000000 00000063651 13542657257 0017000 0 ustar 00root root 0000000 0000000 package roaring import ( "fmt" "unsafe" ) //go:generate msgp -unexported type bitmapContainer struct { cardinality int bitmap []uint64 } func (bc bitmapContainer) String() string { var s string for it := bc.getShortIterator(); it.hasNext(); { s += fmt.Sprintf("%v, ", it.next()) } return s } func newBitmapContainer() *bitmapContainer { p := new(bitmapContainer) size := (1 << 16) / 64 p.bitmap = make([]uint64, size, size) return p } func newBitmapContainerwithRange(firstOfRun, lastOfRun int) *bitmapContainer { bc := newBitmapContainer() bc.cardinality = lastOfRun - firstOfRun + 1 if bc.cardinality == maxCapacity { fill(bc.bitmap, uint64(0xffffffffffffffff)) } else { firstWord := firstOfRun / 64 lastWord := lastOfRun / 64 zeroPrefixLength := uint64(firstOfRun & 63) zeroSuffixLength := uint64(63 - (lastOfRun & 63)) fillRange(bc.bitmap, firstWord, lastWord+1, uint64(0xffffffffffffffff)) bc.bitmap[firstWord] ^= ((uint64(1) << zeroPrefixLength) - 1) blockOfOnes := (uint64(1) << zeroSuffixLength) - 1 maskOnLeft := blockOfOnes << (uint64(64) - zeroSuffixLength) bc.bitmap[lastWord] ^= maskOnLeft } return bc } func (bc *bitmapContainer) minimum() uint16 { for i := 0; i < len(bc.bitmap); i++ { w := bc.bitmap[i] if w != 0 { r := countTrailingZeros(w) return uint16(r + i*64) } } return MaxUint16 } // i should be non-zero func clz(i uint64) int { n := 1 x := uint32(i >> 32) if x == 0 { n += 32 x = uint32(i) } if x>>16 == 0 { n += 16 x = x << 16 } if x>>24 == 0 { n += 8 x = x << 8 } if x>>28 == 0 { n += 4 x = x << 4 } if x>>30 == 0 { n += 2 x = x << 2 } return n - int(x>>31) } func (bc *bitmapContainer) maximum() uint16 { for i := len(bc.bitmap); i > 0; i-- { w := bc.bitmap[i-1] if w != 0 { r := clz(w) return uint16((i-1)*64 + 63 - r) } } return uint16(0) } type bitmapContainerShortIterator struct { ptr *bitmapContainer i int } func (bcsi *bitmapContainerShortIterator) next() uint16 { j := bcsi.i bcsi.i = bcsi.ptr.NextSetBit(bcsi.i + 1) return uint16(j) } func (bcsi *bitmapContainerShortIterator) hasNext() bool { return bcsi.i >= 0 } func (bcsi *bitmapContainerShortIterator) peekNext() uint16 { return uint16(bcsi.i) } func (bcsi *bitmapContainerShortIterator) advanceIfNeeded(minval uint16) { if bcsi.hasNext() && bcsi.peekNext() < minval { bcsi.i = bcsi.ptr.NextSetBit(int(minval)) } } func newBitmapContainerShortIterator(a *bitmapContainer) *bitmapContainerShortIterator { return &bitmapContainerShortIterator{a, a.NextSetBit(0)} } func (bc *bitmapContainer) getShortIterator() shortPeekable { return newBitmapContainerShortIterator(bc) } type reverseBitmapContainerShortIterator struct { ptr *bitmapContainer i int } func (bcsi *reverseBitmapContainerShortIterator) next() uint16 { if bcsi.i == -1 { panic("reverseBitmapContainerShortIterator.next() going beyond what is available") } j := bcsi.i bcsi.i = bcsi.ptr.PrevSetBit(bcsi.i - 1) return uint16(j) } func (bcsi *reverseBitmapContainerShortIterator) hasNext() bool { return bcsi.i >= 0 } func newReverseBitmapContainerShortIterator(a *bitmapContainer) *reverseBitmapContainerShortIterator { if a.cardinality == 0 { return &reverseBitmapContainerShortIterator{a, -1} } return &reverseBitmapContainerShortIterator{a, int(a.maximum())} } func (bc *bitmapContainer) getReverseIterator() shortIterable { return newReverseBitmapContainerShortIterator(bc) } type bitmapContainerManyIterator struct { ptr *bitmapContainer base int bitset uint64 } func (bcmi *bitmapContainerManyIterator) nextMany(hs uint32, buf []uint32) int { n := 0 base := bcmi.base bitset := bcmi.bitset for n < len(buf) { if bitset == 0 { base++ if base >= len(bcmi.ptr.bitmap) { bcmi.base = base bcmi.bitset = bitset return n } bitset = bcmi.ptr.bitmap[base] continue } t := bitset & -bitset buf[n] = uint32(((base * 64) + int(popcount(t-1)))) | hs n = n + 1 bitset ^= t } bcmi.base = base bcmi.bitset = bitset return n } func newBitmapContainerManyIterator(a *bitmapContainer) *bitmapContainerManyIterator { return &bitmapContainerManyIterator{a, -1, 0} } func (bc *bitmapContainer) getManyIterator() manyIterable { return newBitmapContainerManyIterator(bc) } func (bc *bitmapContainer) getSizeInBytes() int { return len(bc.bitmap) * 8 // + bcBaseBytes } func (bc *bitmapContainer) serializedSizeInBytes() int { //return bc.Msgsize()// NOO! This breaks GetSerializedSizeInBytes return len(bc.bitmap) * 8 } const bcBaseBytes = int(unsafe.Sizeof(bitmapContainer{})) // bitmapContainer doesn't depend on card, always fully allocated func bitmapContainerSizeInBytes() int { return bcBaseBytes + (1<<16)/8 } func bitmapEquals(a, b []uint64) bool { if len(a) != len(b) { return false } for i, v := range a { if v != b[i] { return false } } return true } func (bc *bitmapContainer) fillLeastSignificant16bits(x []uint32, i int, mask uint32) { // TODO: should be written as optimized assembly pos := i base := mask for k := 0; k < len(bc.bitmap); k++ { bitset := bc.bitmap[k] for bitset != 0 { t := bitset & -bitset x[pos] = base + uint32(popcount(t-1)) pos++ bitset ^= t } base += 64 } } func (bc *bitmapContainer) equals(o container) bool { srb, ok := o.(*bitmapContainer) if ok { if srb.cardinality != bc.cardinality { return false } return bitmapEquals(bc.bitmap, srb.bitmap) } // use generic comparison if bc.getCardinality() != o.getCardinality() { return false } ait := o.getShortIterator() bit := bc.getShortIterator() for ait.hasNext() { if bit.next() != ait.next() { return false } } return true } func (bc *bitmapContainer) iaddReturnMinimized(i uint16) container { bc.iadd(i) if bc.isFull() { return newRunContainer16Range(0, MaxUint16) } return bc } func (bc *bitmapContainer) iadd(i uint16) bool { x := int(i) previous := bc.bitmap[x/64] mask := uint64(1) << (uint(x) % 64) newb := previous | mask bc.bitmap[x/64] = newb bc.cardinality += int((previous ^ newb) >> (uint(x) % 64)) return newb != previous } func (bc *bitmapContainer) iremoveReturnMinimized(i uint16) container { if bc.iremove(i) { if bc.cardinality == arrayDefaultMaxSize { return bc.toArrayContainer() } } return bc } // iremove returns true if i was found. func (bc *bitmapContainer) iremove(i uint16) bool { if bc.contains(i) { bc.cardinality-- bc.bitmap[i/64] &^= (uint64(1) << (i % 64)) return true } return false } func (bc *bitmapContainer) isFull() bool { return bc.cardinality == int(MaxUint16)+1 } func (bc *bitmapContainer) getCardinality() int { return bc.cardinality } func (bc *bitmapContainer) clone() container { ptr := bitmapContainer{bc.cardinality, make([]uint64, len(bc.bitmap))} copy(ptr.bitmap, bc.bitmap[:]) return &ptr } // add all values in range [firstOfRange,lastOfRange) func (bc *bitmapContainer) iaddRange(firstOfRange, lastOfRange int) container { bc.cardinality += setBitmapRangeAndCardinalityChange(bc.bitmap, firstOfRange, lastOfRange) return bc } // remove all values in range [firstOfRange,lastOfRange) func (bc *bitmapContainer) iremoveRange(firstOfRange, lastOfRange int) container { bc.cardinality += resetBitmapRangeAndCardinalityChange(bc.bitmap, firstOfRange, lastOfRange) if bc.getCardinality() <= arrayDefaultMaxSize { return bc.toArrayContainer() } return bc } // flip all values in range [firstOfRange,endx) func (bc *bitmapContainer) inot(firstOfRange, endx int) container { if endx-firstOfRange == maxCapacity { flipBitmapRange(bc.bitmap, firstOfRange, endx) bc.cardinality = maxCapacity - bc.cardinality } else if endx-firstOfRange > maxCapacity/2 { flipBitmapRange(bc.bitmap, firstOfRange, endx) bc.computeCardinality() } else { bc.cardinality += flipBitmapRangeAndCardinalityChange(bc.bitmap, firstOfRange, endx) } if bc.getCardinality() <= arrayDefaultMaxSize { return bc.toArrayContainer() } return bc } // flip all values in range [firstOfRange,endx) func (bc *bitmapContainer) not(firstOfRange, endx int) container { answer := bc.clone() return answer.inot(firstOfRange, endx) } func (bc *bitmapContainer) or(a container) container { switch x := a.(type) { case *arrayContainer: return bc.orArray(x) case *bitmapContainer: return bc.orBitmap(x) case *runContainer16: if x.isFull() { return x.clone() } return x.orBitmapContainer(bc) } panic("unsupported container type") } func (bc *bitmapContainer) orCardinality(a container) int { switch x := a.(type) { case *arrayContainer: return bc.orArrayCardinality(x) case *bitmapContainer: return bc.orBitmapCardinality(x) case *runContainer16: return x.orBitmapContainerCardinality(bc) } panic("unsupported container type") } func (bc *bitmapContainer) ior(a container) container { switch x := a.(type) { case *arrayContainer: return bc.iorArray(x) case *bitmapContainer: return bc.iorBitmap(x) case *runContainer16: if x.isFull() { return x.clone() } for i := range x.iv { bc.iaddRange(int(x.iv[i].start), int(x.iv[i].last())+1) } if bc.isFull() { return newRunContainer16Range(0, MaxUint16) } //bc.computeCardinality() return bc } panic(fmt.Errorf("unsupported container type %T", a)) } func (bc *bitmapContainer) lazyIOR(a container) container { switch x := a.(type) { case *arrayContainer: return bc.lazyIORArray(x) case *bitmapContainer: return bc.lazyIORBitmap(x) case *runContainer16: if x.isFull() { return x.clone() } // Manually inlined setBitmapRange function bitmap := bc.bitmap for _, iv := range x.iv { start := int(iv.start) end := int(iv.last()) + 1 if start >= end { continue } firstword := start / 64 endword := (end - 1) / 64 if firstword == endword { bitmap[firstword] |= (^uint64(0) << uint(start%64)) & (^uint64(0) >> (uint(-end) % 64)) continue } bitmap[firstword] |= ^uint64(0) << uint(start%64) for i := firstword + 1; i < endword; i++ { bitmap[i] = ^uint64(0) } bitmap[endword] |= ^uint64(0) >> (uint(-end) % 64) } bc.cardinality = invalidCardinality return bc } panic("unsupported container type") } func (bc *bitmapContainer) lazyOR(a container) container { switch x := a.(type) { case *arrayContainer: return bc.lazyORArray(x) case *bitmapContainer: return bc.lazyORBitmap(x) case *runContainer16: if x.isFull() { return x.clone() } // TODO: implement lazy OR return x.orBitmapContainer(bc) } panic("unsupported container type") } func (bc *bitmapContainer) orArray(value2 *arrayContainer) container { answer := bc.clone().(*bitmapContainer) c := value2.getCardinality() for k := 0; k < c; k++ { v := value2.content[k] i := uint(v) >> 6 bef := answer.bitmap[i] aft := bef | (uint64(1) << (v % 64)) answer.bitmap[i] = aft answer.cardinality += int((bef - aft) >> 63) } return answer } func (bc *bitmapContainer) orArrayCardinality(value2 *arrayContainer) int { answer := 0 c := value2.getCardinality() for k := 0; k < c; k++ { // branchless: v := value2.content[k] i := uint(v) >> 6 bef := bc.bitmap[i] aft := bef | (uint64(1) << (v % 64)) answer += int((bef - aft) >> 63) } return answer } func (bc *bitmapContainer) orBitmap(value2 *bitmapContainer) container { answer := newBitmapContainer() for k := 0; k < len(answer.bitmap); k++ { answer.bitmap[k] = bc.bitmap[k] | value2.bitmap[k] } answer.computeCardinality() if answer.isFull() { return newRunContainer16Range(0, MaxUint16) } return answer } func (bc *bitmapContainer) orBitmapCardinality(value2 *bitmapContainer) int { return int(popcntOrSlice(bc.bitmap, value2.bitmap)) } func (bc *bitmapContainer) andBitmapCardinality(value2 *bitmapContainer) int { return int(popcntAndSlice(bc.bitmap, value2.bitmap)) } func (bc *bitmapContainer) computeCardinality() { bc.cardinality = int(popcntSlice(bc.bitmap)) } func (bc *bitmapContainer) iorArray(ac *arrayContainer) container { for k := range ac.content { vc := ac.content[k] i := uint(vc) >> 6 bef := bc.bitmap[i] aft := bef | (uint64(1) << (vc % 64)) bc.bitmap[i] = aft bc.cardinality += int((bef - aft) >> 63) } if bc.isFull() { return newRunContainer16Range(0, MaxUint16) } return bc } func (bc *bitmapContainer) iorBitmap(value2 *bitmapContainer) container { answer := bc answer.cardinality = 0 for k := 0; k < len(answer.bitmap); k++ { answer.bitmap[k] = bc.bitmap[k] | value2.bitmap[k] } answer.computeCardinality() if bc.isFull() { return newRunContainer16Range(0, MaxUint16) } return answer } func (bc *bitmapContainer) lazyIORArray(value2 *arrayContainer) container { answer := bc c := value2.getCardinality() for k := 0; k+3 < c; k += 4 { content := (*[4]uint16)(unsafe.Pointer(&value2.content[k])) vc0 := content[0] i0 := uint(vc0) >> 6 answer.bitmap[i0] = answer.bitmap[i0] | (uint64(1) << (vc0 % 64)) vc1 := content[1] i1 := uint(vc1) >> 6 answer.bitmap[i1] = answer.bitmap[i1] | (uint64(1) << (vc1 % 64)) vc2 := content[2] i2 := uint(vc2) >> 6 answer.bitmap[i2] = answer.bitmap[i2] | (uint64(1) << (vc2 % 64)) vc3 := content[3] i3 := uint(vc3) >> 6 answer.bitmap[i3] = answer.bitmap[i3] | (uint64(1) << (vc3 % 64)) } for k := c &^ 3; k < c; k++ { vc := value2.content[k] i := uint(vc) >> 6 answer.bitmap[i] = answer.bitmap[i] | (uint64(1) << (vc % 64)) } answer.cardinality = invalidCardinality return answer } func (bc *bitmapContainer) lazyORArray(value2 *arrayContainer) container { answer := bc.clone().(*bitmapContainer) return answer.lazyIORArray(value2) } func (bc *bitmapContainer) lazyIORBitmap(value2 *bitmapContainer) container { answer := bc for k := 0; k < len(answer.bitmap); k++ { answer.bitmap[k] = bc.bitmap[k] | value2.bitmap[k] } bc.cardinality = invalidCardinality return answer } func (bc *bitmapContainer) lazyORBitmap(value2 *bitmapContainer) container { answer := bc.clone().(*bitmapContainer) return answer.lazyIORBitmap(value2) } func (bc *bitmapContainer) xor(a container) container { switch x := a.(type) { case *arrayContainer: return bc.xorArray(x) case *bitmapContainer: return bc.xorBitmap(x) case *runContainer16: return x.xorBitmap(bc) } panic("unsupported container type") } func (bc *bitmapContainer) xorArray(value2 *arrayContainer) container { answer := bc.clone().(*bitmapContainer) c := value2.getCardinality() for k := 0; k < c; k++ { vc := value2.content[k] index := uint(vc) >> 6 abi := answer.bitmap[index] mask := uint64(1) << (vc % 64) answer.cardinality += 1 - 2*int((abi&mask)>>(vc%64)) answer.bitmap[index] = abi ^ mask } if answer.cardinality <= arrayDefaultMaxSize { return answer.toArrayContainer() } return answer } func (bc *bitmapContainer) rank(x uint16) int { // TODO: rewrite in assembly leftover := (uint(x) + 1) & 63 if leftover == 0 { return int(popcntSlice(bc.bitmap[:(uint(x)+1)/64])) } return int(popcntSlice(bc.bitmap[:(uint(x)+1)/64]) + popcount(bc.bitmap[(uint(x)+1)/64]<<(64-leftover))) } func (bc *bitmapContainer) selectInt(x uint16) int { remaining := x for k := 0; k < len(bc.bitmap); k++ { w := popcount(bc.bitmap[k]) if uint16(w) > remaining { return k*64 + selectBitPosition(bc.bitmap[k], int(remaining)) } remaining -= uint16(w) } return -1 } func (bc *bitmapContainer) xorBitmap(value2 *bitmapContainer) container { newCardinality := int(popcntXorSlice(bc.bitmap, value2.bitmap)) if newCardinality > arrayDefaultMaxSize { answer := newBitmapContainer() for k := 0; k < len(answer.bitmap); k++ { answer.bitmap[k] = bc.bitmap[k] ^ value2.bitmap[k] } answer.cardinality = newCardinality if answer.isFull() { return newRunContainer16Range(0, MaxUint16) } return answer } ac := newArrayContainerSize(newCardinality) fillArrayXOR(ac.content, bc.bitmap, value2.bitmap) ac.content = ac.content[:newCardinality] return ac } func (bc *bitmapContainer) and(a container) container { switch x := a.(type) { case *arrayContainer: return bc.andArray(x) case *bitmapContainer: return bc.andBitmap(x) case *runContainer16: if x.isFull() { return bc.clone() } return x.andBitmapContainer(bc) } panic("unsupported container type") } func (bc *bitmapContainer) andCardinality(a container) int { switch x := a.(type) { case *arrayContainer: return bc.andArrayCardinality(x) case *bitmapContainer: return bc.andBitmapCardinality(x) case *runContainer16: return x.andBitmapContainerCardinality(bc) } panic("unsupported container type") } func (bc *bitmapContainer) intersects(a container) bool { switch x := a.(type) { case *arrayContainer: return bc.intersectsArray(x) case *bitmapContainer: return bc.intersectsBitmap(x) case *runContainer16: return x.intersects(bc) } panic("unsupported container type") } func (bc *bitmapContainer) iand(a container) container { switch x := a.(type) { case *arrayContainer: return bc.iandArray(x) case *bitmapContainer: return bc.iandBitmap(x) case *runContainer16: if x.isFull() { return bc.clone() } return bc.iandRun16(x) } panic("unsupported container type") } func (bc *bitmapContainer) iandRun16(rc *runContainer16) container { rcb := newBitmapContainerFromRun(rc) return bc.iandBitmap(rcb) } func (bc *bitmapContainer) iandArray(ac *arrayContainer) container { acb := ac.toBitmapContainer() return bc.iandBitmap(acb) } func (bc *bitmapContainer) andArray(value2 *arrayContainer) *arrayContainer { answer := newArrayContainerCapacity(len(value2.content)) answer.content = answer.content[:cap(answer.content)] c := value2.getCardinality() pos := 0 for k := 0; k < c; k++ { v := value2.content[k] answer.content[pos] = v pos += int(bc.bitValue(v)) } answer.content = answer.content[:pos] return answer } func (bc *bitmapContainer) andArrayCardinality(value2 *arrayContainer) int { c := value2.getCardinality() pos := 0 for k := 0; k < c; k++ { v := value2.content[k] pos += int(bc.bitValue(v)) } return pos } func (bc *bitmapContainer) getCardinalityInRange(start, end uint) int { if start >= end { return 0 } firstword := start / 64 endword := (end - 1) / 64 const allones = ^uint64(0) if firstword == endword { return int(popcount(bc.bitmap[firstword] & ((allones << (start % 64)) & (allones >> ((64 - end) & 63))))) } answer := popcount(bc.bitmap[firstword] & (allones << (start % 64))) answer += popcntSlice(bc.bitmap[firstword+1 : endword]) answer += popcount(bc.bitmap[endword] & (allones >> ((64 - end) & 63))) return int(answer) } func (bc *bitmapContainer) andBitmap(value2 *bitmapContainer) container { newcardinality := int(popcntAndSlice(bc.bitmap, value2.bitmap)) if newcardinality > arrayDefaultMaxSize { answer := newBitmapContainer() for k := 0; k < len(answer.bitmap); k++ { answer.bitmap[k] = bc.bitmap[k] & value2.bitmap[k] } answer.cardinality = newcardinality return answer } ac := newArrayContainerSize(newcardinality) fillArrayAND(ac.content, bc.bitmap, value2.bitmap) ac.content = ac.content[:newcardinality] //not sure why i need this return ac } func (bc *bitmapContainer) intersectsArray(value2 *arrayContainer) bool { c := value2.getCardinality() for k := 0; k < c; k++ { v := value2.content[k] if bc.contains(v) { return true } } return false } func (bc *bitmapContainer) intersectsBitmap(value2 *bitmapContainer) bool { for k := 0; k < len(bc.bitmap); k++ { if (bc.bitmap[k] & value2.bitmap[k]) != 0 { return true } } return false } func (bc *bitmapContainer) iandBitmap(value2 *bitmapContainer) container { newcardinality := int(popcntAndSlice(bc.bitmap, value2.bitmap)) for k := 0; k < len(bc.bitmap); k++ { bc.bitmap[k] = bc.bitmap[k] & value2.bitmap[k] } bc.cardinality = newcardinality if newcardinality <= arrayDefaultMaxSize { return newArrayContainerFromBitmap(bc) } return bc } func (bc *bitmapContainer) andNot(a container) container { switch x := a.(type) { case *arrayContainer: return bc.andNotArray(x) case *bitmapContainer: return bc.andNotBitmap(x) case *runContainer16: return bc.andNotRun16(x) } panic("unsupported container type") } func (bc *bitmapContainer) andNotRun16(rc *runContainer16) container { rcb := rc.toBitmapContainer() return bc.andNotBitmap(rcb) } func (bc *bitmapContainer) iandNot(a container) container { switch x := a.(type) { case *arrayContainer: return bc.iandNotArray(x) case *bitmapContainer: return bc.iandNotBitmapSurely(x) case *runContainer16: return bc.iandNotRun16(x) } panic("unsupported container type") } func (bc *bitmapContainer) iandNotArray(ac *arrayContainer) container { acb := ac.toBitmapContainer() return bc.iandNotBitmapSurely(acb) } func (bc *bitmapContainer) iandNotRun16(rc *runContainer16) container { rcb := rc.toBitmapContainer() return bc.iandNotBitmapSurely(rcb) } func (bc *bitmapContainer) andNotArray(value2 *arrayContainer) container { answer := bc.clone().(*bitmapContainer) c := value2.getCardinality() for k := 0; k < c; k++ { vc := value2.content[k] i := uint(vc) >> 6 oldv := answer.bitmap[i] newv := oldv &^ (uint64(1) << (vc % 64)) answer.bitmap[i] = newv answer.cardinality -= int((oldv ^ newv) >> (vc % 64)) } if answer.cardinality <= arrayDefaultMaxSize { return answer.toArrayContainer() } return answer } func (bc *bitmapContainer) andNotBitmap(value2 *bitmapContainer) container { newCardinality := int(popcntMaskSlice(bc.bitmap, value2.bitmap)) if newCardinality > arrayDefaultMaxSize { answer := newBitmapContainer() for k := 0; k < len(answer.bitmap); k++ { answer.bitmap[k] = bc.bitmap[k] &^ value2.bitmap[k] } answer.cardinality = newCardinality return answer } ac := newArrayContainerSize(newCardinality) fillArrayANDNOT(ac.content, bc.bitmap, value2.bitmap) return ac } func (bc *bitmapContainer) iandNotBitmapSurely(value2 *bitmapContainer) container { newCardinality := int(popcntMaskSlice(bc.bitmap, value2.bitmap)) for k := 0; k < len(bc.bitmap); k++ { bc.bitmap[k] = bc.bitmap[k] &^ value2.bitmap[k] } bc.cardinality = newCardinality if bc.getCardinality() <= arrayDefaultMaxSize { return bc.toArrayContainer() } return bc } func (bc *bitmapContainer) contains(i uint16) bool { //testbit x := uint(i) w := bc.bitmap[x>>6] mask := uint64(1) << (x & 63) return (w & mask) != 0 } func (bc *bitmapContainer) bitValue(i uint16) uint64 { x := uint(i) w := bc.bitmap[x>>6] return (w >> (x & 63)) & 1 } func (bc *bitmapContainer) loadData(arrayContainer *arrayContainer) { bc.cardinality = arrayContainer.getCardinality() c := arrayContainer.getCardinality() for k := 0; k < c; k++ { x := arrayContainer.content[k] i := int(x) / 64 bc.bitmap[i] |= (uint64(1) << uint(x%64)) } } func (bc *bitmapContainer) toArrayContainer() *arrayContainer { ac := &arrayContainer{} ac.loadData(bc) return ac } func (bc *bitmapContainer) fillArray(container []uint16) { //TODO: rewrite in assembly pos := 0 base := 0 for k := 0; k < len(bc.bitmap); k++ { bitset := bc.bitmap[k] for bitset != 0 { t := bitset & -bitset container[pos] = uint16((base + int(popcount(t-1)))) pos = pos + 1 bitset ^= t } base += 64 } } func (bc *bitmapContainer) NextSetBit(i int) int { x := i / 64 if x >= len(bc.bitmap) { return -1 } w := bc.bitmap[x] w = w >> uint(i%64) if w != 0 { return i + countTrailingZeros(w) } x++ for ; x < len(bc.bitmap); x++ { if bc.bitmap[x] != 0 { return (x * 64) + countTrailingZeros(bc.bitmap[x]) } } return -1 } func (bc *bitmapContainer) PrevSetBit(i int) int { if i < 0 { return -1 } x := i / 64 if x >= len(bc.bitmap) { return -1 } w := bc.bitmap[x] b := i % 64 w = w << uint(63-b) if w != 0 { return i - countLeadingZeros(w) } x-- for ; x >= 0; x-- { if bc.bitmap[x] != 0 { return (x * 64) + 63 - countLeadingZeros(bc.bitmap[x]) } } return -1 } // reference the java implementation // https://github.com/RoaringBitmap/RoaringBitmap/blob/master/src/main/java/org/roaringbitmap/BitmapContainer.java#L875-L892 // func (bc *bitmapContainer) numberOfRuns() int { if bc.cardinality == 0 { return 0 } var numRuns uint64 nextWord := bc.bitmap[0] for i := 0; i < len(bc.bitmap)-1; i++ { word := nextWord nextWord = bc.bitmap[i+1] numRuns += popcount((^word)&(word<<1)) + ((word >> 63) &^ nextWord) } word := nextWord numRuns += popcount((^word) & (word << 1)) if (word & 0x8000000000000000) != 0 { numRuns++ } return int(numRuns) } // convert to run or array *if needed* func (bc *bitmapContainer) toEfficientContainer() container { numRuns := bc.numberOfRuns() sizeAsRunContainer := runContainer16SerializedSizeInBytes(numRuns) sizeAsBitmapContainer := bitmapContainerSizeInBytes() card := bc.getCardinality() sizeAsArrayContainer := arrayContainerSizeInBytes(card) if sizeAsRunContainer <= minOfInt(sizeAsBitmapContainer, sizeAsArrayContainer) { return newRunContainer16FromBitmapContainer(bc) } if card <= arrayDefaultMaxSize { return bc.toArrayContainer() } return bc } func newBitmapContainerFromRun(rc *runContainer16) *bitmapContainer { if len(rc.iv) == 1 { return newBitmapContainerwithRange(int(rc.iv[0].start), int(rc.iv[0].last())) } bc := newBitmapContainer() for i := range rc.iv { setBitmapRange(bc.bitmap, int(rc.iv[i].start), int(rc.iv[i].last())+1) bc.cardinality += int(rc.iv[i].last()) + 1 - int(rc.iv[i].start) } //bc.computeCardinality() return bc } func (bc *bitmapContainer) containerType() contype { return bitmapContype } func (bc *bitmapContainer) addOffset(x uint16) []container { low := newBitmapContainer() high := newBitmapContainer() b := uint32(x) >> 6 i := uint32(x) % 64 end := uint32(1024) - b if i == 0 { copy(low.bitmap[b:], bc.bitmap[:end]) copy(high.bitmap[:b], bc.bitmap[end:]) } else { low.bitmap[b] = bc.bitmap[0] << i for k := uint32(1); k < end; k++ { newval := bc.bitmap[k] << i if newval == 0 { newval = bc.bitmap[k-1] >> (64 - i) } low.bitmap[b+k] = newval } for k := end; k < 1024; k++ { newval := bc.bitmap[k] << i if newval == 0 { newval = bc.bitmap[k-1] >> (64 - i) } high.bitmap[k-end] = newval } high.bitmap[b] = bc.bitmap[1023] >> (64 - i) } low.computeCardinality() high.computeCardinality() return []container{low, high} } roaring-0.4.21/bitmapcontainer_gen.go 0000664 0000000 0000000 00000021515 13542657257 0017622 0 ustar 00root root 0000000 0000000 package roaring // NOTE: THIS FILE WAS PRODUCED BY THE // MSGP CODE GENERATION TOOL (github.com/tinylib/msgp) // DO NOT EDIT import "github.com/tinylib/msgp/msgp" // Deprecated: DecodeMsg implements msgp.Decodable func (z *bitmapContainer) DecodeMsg(dc *msgp.Reader) (err error) { var field []byte _ = field var zbzg uint32 zbzg, err = dc.ReadMapHeader() if err != nil { return } for zbzg > 0 { zbzg-- field, err = dc.ReadMapKeyPtr() if err != nil { return } switch msgp.UnsafeString(field) { case "cardinality": z.cardinality, err = dc.ReadInt() if err != nil { return } case "bitmap": var zbai uint32 zbai, err = dc.ReadArrayHeader() if err != nil { return } if cap(z.bitmap) >= int(zbai) { z.bitmap = (z.bitmap)[:zbai] } else { z.bitmap = make([]uint64, zbai) } for zxvk := range z.bitmap { z.bitmap[zxvk], err = dc.ReadUint64() if err != nil { return } } default: err = dc.Skip() if err != nil { return } } } return } // Deprecated: EncodeMsg implements msgp.Encodable func (z *bitmapContainer) EncodeMsg(en *msgp.Writer) (err error) { // map header, size 2 // write "cardinality" err = en.Append(0x82, 0xab, 0x63, 0x61, 0x72, 0x64, 0x69, 0x6e, 0x61, 0x6c, 0x69, 0x74, 0x79) if err != nil { return err } err = en.WriteInt(z.cardinality) if err != nil { return } // write "bitmap" err = en.Append(0xa6, 0x62, 0x69, 0x74, 0x6d, 0x61, 0x70) if err != nil { return err } err = en.WriteArrayHeader(uint32(len(z.bitmap))) if err != nil { return } for zxvk := range z.bitmap { err = en.WriteUint64(z.bitmap[zxvk]) if err != nil { return } } return } // Deprecated: MarshalMsg implements msgp.Marshaler func (z *bitmapContainer) MarshalMsg(b []byte) (o []byte, err error) { o = msgp.Require(b, z.Msgsize()) // map header, size 2 // string "cardinality" o = append(o, 0x82, 0xab, 0x63, 0x61, 0x72, 0x64, 0x69, 0x6e, 0x61, 0x6c, 0x69, 0x74, 0x79) o = msgp.AppendInt(o, z.cardinality) // string "bitmap" o = append(o, 0xa6, 0x62, 0x69, 0x74, 0x6d, 0x61, 0x70) o = msgp.AppendArrayHeader(o, uint32(len(z.bitmap))) for zxvk := range z.bitmap { o = msgp.AppendUint64(o, z.bitmap[zxvk]) } return } // Deprecated: UnmarshalMsg implements msgp.Unmarshaler func (z *bitmapContainer) UnmarshalMsg(bts []byte) (o []byte, err error) { var field []byte _ = field var zcmr uint32 zcmr, bts, err = msgp.ReadMapHeaderBytes(bts) if err != nil { return } for zcmr > 0 { zcmr-- field, bts, err = msgp.ReadMapKeyZC(bts) if err != nil { return } switch msgp.UnsafeString(field) { case "cardinality": z.cardinality, bts, err = msgp.ReadIntBytes(bts) if err != nil { return } case "bitmap": var zajw uint32 zajw, bts, err = msgp.ReadArrayHeaderBytes(bts) if err != nil { return } if cap(z.bitmap) >= int(zajw) { z.bitmap = (z.bitmap)[:zajw] } else { z.bitmap = make([]uint64, zajw) } for zxvk := range z.bitmap { z.bitmap[zxvk], bts, err = msgp.ReadUint64Bytes(bts) if err != nil { return } } default: bts, err = msgp.Skip(bts) if err != nil { return } } } o = bts return } // Deprecated: Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message func (z *bitmapContainer) Msgsize() (s int) { s = 1 + 12 + msgp.IntSize + 7 + msgp.ArrayHeaderSize + (len(z.bitmap) * (msgp.Uint64Size)) return } // Deprecated: DecodeMsg implements msgp.Decodable func (z *bitmapContainerShortIterator) DecodeMsg(dc *msgp.Reader) (err error) { var field []byte _ = field var zhct uint32 zhct, err = dc.ReadMapHeader() if err != nil { return } for zhct > 0 { zhct-- field, err = dc.ReadMapKeyPtr() if err != nil { return } switch msgp.UnsafeString(field) { case "ptr": if dc.IsNil() { err = dc.ReadNil() if err != nil { return } z.ptr = nil } else { if z.ptr == nil { z.ptr = new(bitmapContainer) } var zcua uint32 zcua, err = dc.ReadMapHeader() if err != nil { return } for zcua > 0 { zcua-- field, err = dc.ReadMapKeyPtr() if err != nil { return } switch msgp.UnsafeString(field) { case "cardinality": z.ptr.cardinality, err = dc.ReadInt() if err != nil { return } case "bitmap": var zxhx uint32 zxhx, err = dc.ReadArrayHeader() if err != nil { return } if cap(z.ptr.bitmap) >= int(zxhx) { z.ptr.bitmap = (z.ptr.bitmap)[:zxhx] } else { z.ptr.bitmap = make([]uint64, zxhx) } for zwht := range z.ptr.bitmap { z.ptr.bitmap[zwht], err = dc.ReadUint64() if err != nil { return } } default: err = dc.Skip() if err != nil { return } } } } case "i": z.i, err = dc.ReadInt() if err != nil { return } default: err = dc.Skip() if err != nil { return } } } return } // Deprecated: EncodeMsg implements msgp.Encodable func (z *bitmapContainerShortIterator) EncodeMsg(en *msgp.Writer) (err error) { // map header, size 2 // write "ptr" err = en.Append(0x82, 0xa3, 0x70, 0x74, 0x72) if err != nil { return err } if z.ptr == nil { err = en.WriteNil() if err != nil { return } } else { // map header, size 2 // write "cardinality" err = en.Append(0x82, 0xab, 0x63, 0x61, 0x72, 0x64, 0x69, 0x6e, 0x61, 0x6c, 0x69, 0x74, 0x79) if err != nil { return err } err = en.WriteInt(z.ptr.cardinality) if err != nil { return } // write "bitmap" err = en.Append(0xa6, 0x62, 0x69, 0x74, 0x6d, 0x61, 0x70) if err != nil { return err } err = en.WriteArrayHeader(uint32(len(z.ptr.bitmap))) if err != nil { return } for zwht := range z.ptr.bitmap { err = en.WriteUint64(z.ptr.bitmap[zwht]) if err != nil { return } } } // write "i" err = en.Append(0xa1, 0x69) if err != nil { return err } err = en.WriteInt(z.i) if err != nil { return } return } // Deprecated: MarshalMsg implements msgp.Marshaler func (z *bitmapContainerShortIterator) MarshalMsg(b []byte) (o []byte, err error) { o = msgp.Require(b, z.Msgsize()) // map header, size 2 // string "ptr" o = append(o, 0x82, 0xa3, 0x70, 0x74, 0x72) if z.ptr == nil { o = msgp.AppendNil(o) } else { // map header, size 2 // string "cardinality" o = append(o, 0x82, 0xab, 0x63, 0x61, 0x72, 0x64, 0x69, 0x6e, 0x61, 0x6c, 0x69, 0x74, 0x79) o = msgp.AppendInt(o, z.ptr.cardinality) // string "bitmap" o = append(o, 0xa6, 0x62, 0x69, 0x74, 0x6d, 0x61, 0x70) o = msgp.AppendArrayHeader(o, uint32(len(z.ptr.bitmap))) for zwht := range z.ptr.bitmap { o = msgp.AppendUint64(o, z.ptr.bitmap[zwht]) } } // string "i" o = append(o, 0xa1, 0x69) o = msgp.AppendInt(o, z.i) return } // Deprecated: UnmarshalMsg implements msgp.Unmarshaler func (z *bitmapContainerShortIterator) UnmarshalMsg(bts []byte) (o []byte, err error) { var field []byte _ = field var zlqf uint32 zlqf, bts, err = msgp.ReadMapHeaderBytes(bts) if err != nil { return } for zlqf > 0 { zlqf-- field, bts, err = msgp.ReadMapKeyZC(bts) if err != nil { return } switch msgp.UnsafeString(field) { case "ptr": if msgp.IsNil(bts) { bts, err = msgp.ReadNilBytes(bts) if err != nil { return } z.ptr = nil } else { if z.ptr == nil { z.ptr = new(bitmapContainer) } var zdaf uint32 zdaf, bts, err = msgp.ReadMapHeaderBytes(bts) if err != nil { return } for zdaf > 0 { zdaf-- field, bts, err = msgp.ReadMapKeyZC(bts) if err != nil { return } switch msgp.UnsafeString(field) { case "cardinality": z.ptr.cardinality, bts, err = msgp.ReadIntBytes(bts) if err != nil { return } case "bitmap": var zpks uint32 zpks, bts, err = msgp.ReadArrayHeaderBytes(bts) if err != nil { return } if cap(z.ptr.bitmap) >= int(zpks) { z.ptr.bitmap = (z.ptr.bitmap)[:zpks] } else { z.ptr.bitmap = make([]uint64, zpks) } for zwht := range z.ptr.bitmap { z.ptr.bitmap[zwht], bts, err = msgp.ReadUint64Bytes(bts) if err != nil { return } } default: bts, err = msgp.Skip(bts) if err != nil { return } } } } case "i": z.i, bts, err = msgp.ReadIntBytes(bts) if err != nil { return } default: bts, err = msgp.Skip(bts) if err != nil { return } } } o = bts return } // Deprecated: Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message func (z *bitmapContainerShortIterator) Msgsize() (s int) { s = 1 + 4 if z.ptr == nil { s += msgp.NilSize } else { s += 1 + 12 + msgp.IntSize + 7 + msgp.ArrayHeaderSize + (len(z.ptr.bitmap) * (msgp.Uint64Size)) } s += 2 + msgp.IntSize return } roaring-0.4.21/bitmapcontainer_gen_test.go 0000664 0000000 0000000 00000011434 13542657257 0020660 0 ustar 00root root 0000000 0000000 package roaring // NOTE: THIS FILE WAS PRODUCED BY THE // MSGP CODE GENERATION TOOL (github.com/tinylib/msgp) // DO NOT EDIT import ( "bytes" "testing" "github.com/tinylib/msgp/msgp" ) func TestMarshalUnmarshalbitmapContainer(t *testing.T) { v := bitmapContainer{} bts, err := v.MarshalMsg(nil) if err != nil { t.Fatal(err) } left, err := v.UnmarshalMsg(bts) if err != nil { t.Fatal(err) } if len(left) > 0 { t.Errorf("%d bytes left over after UnmarshalMsg(): %q", len(left), left) } left, err = msgp.Skip(bts) if err != nil { t.Fatal(err) } if len(left) > 0 { t.Errorf("%d bytes left over after Skip(): %q", len(left), left) } } func BenchmarkMarshalMsgbitmapContainer(b *testing.B) { v := bitmapContainer{} b.ReportAllocs() b.ResetTimer() for i := 0; i < b.N; i++ { v.MarshalMsg(nil) } } func BenchmarkAppendMsgbitmapContainer(b *testing.B) { v := bitmapContainer{} bts := make([]byte, 0, v.Msgsize()) bts, _ = v.MarshalMsg(bts[0:0]) b.SetBytes(int64(len(bts))) b.ReportAllocs() b.ResetTimer() for i := 0; i < b.N; i++ { bts, _ = v.MarshalMsg(bts[0:0]) } } func BenchmarkUnmarshalbitmapContainer(b *testing.B) { v := bitmapContainer{} bts, _ := v.MarshalMsg(nil) b.ReportAllocs() b.SetBytes(int64(len(bts))) b.ResetTimer() for i := 0; i < b.N; i++ { _, err := v.UnmarshalMsg(bts) if err != nil { b.Fatal(err) } } } func TestEncodeDecodebitmapContainer(t *testing.T) { v := bitmapContainer{} var buf bytes.Buffer msgp.Encode(&buf, &v) m := v.Msgsize() if buf.Len() > m { t.Logf("WARNING: Msgsize() for %v is inaccurate", v) } vn := bitmapContainer{} err := msgp.Decode(&buf, &vn) if err != nil { t.Error(err) } buf.Reset() msgp.Encode(&buf, &v) err = msgp.NewReader(&buf).Skip() if err != nil { t.Error(err) } } func BenchmarkEncodebitmapContainer(b *testing.B) { v := bitmapContainer{} var buf bytes.Buffer msgp.Encode(&buf, &v) b.SetBytes(int64(buf.Len())) en := msgp.NewWriter(msgp.Nowhere) b.ReportAllocs() b.ResetTimer() for i := 0; i < b.N; i++ { v.EncodeMsg(en) } en.Flush() } func BenchmarkDecodebitmapContainer(b *testing.B) { v := bitmapContainer{} var buf bytes.Buffer msgp.Encode(&buf, &v) b.SetBytes(int64(buf.Len())) rd := msgp.NewEndlessReader(buf.Bytes(), b) dc := msgp.NewReader(rd) b.ReportAllocs() b.ResetTimer() for i := 0; i < b.N; i++ { err := v.DecodeMsg(dc) if err != nil { b.Fatal(err) } } } func TestMarshalUnmarshalbitmapContainerShortIterator(t *testing.T) { v := bitmapContainerShortIterator{} bts, err := v.MarshalMsg(nil) if err != nil { t.Fatal(err) } left, err := v.UnmarshalMsg(bts) if err != nil { t.Fatal(err) } if len(left) > 0 { t.Errorf("%d bytes left over after UnmarshalMsg(): %q", len(left), left) } left, err = msgp.Skip(bts) if err != nil { t.Fatal(err) } if len(left) > 0 { t.Errorf("%d bytes left over after Skip(): %q", len(left), left) } } func BenchmarkMarshalMsgbitmapContainerShortIterator(b *testing.B) { v := bitmapContainerShortIterator{} b.ReportAllocs() b.ResetTimer() for i := 0; i < b.N; i++ { v.MarshalMsg(nil) } } func BenchmarkAppendMsgbitmapContainerShortIterator(b *testing.B) { v := bitmapContainerShortIterator{} bts := make([]byte, 0, v.Msgsize()) bts, _ = v.MarshalMsg(bts[0:0]) b.SetBytes(int64(len(bts))) b.ReportAllocs() b.ResetTimer() for i := 0; i < b.N; i++ { bts, _ = v.MarshalMsg(bts[0:0]) } } func BenchmarkUnmarshalbitmapContainerShortIterator(b *testing.B) { v := bitmapContainerShortIterator{} bts, _ := v.MarshalMsg(nil) b.ReportAllocs() b.SetBytes(int64(len(bts))) b.ResetTimer() for i := 0; i < b.N; i++ { _, err := v.UnmarshalMsg(bts) if err != nil { b.Fatal(err) } } } func TestEncodeDecodebitmapContainerShortIterator(t *testing.T) { v := bitmapContainerShortIterator{} var buf bytes.Buffer msgp.Encode(&buf, &v) m := v.Msgsize() if buf.Len() > m { t.Logf("WARNING: Msgsize() for %v is inaccurate", v) } vn := bitmapContainerShortIterator{} err := msgp.Decode(&buf, &vn) if err != nil { t.Error(err) } buf.Reset() msgp.Encode(&buf, &v) err = msgp.NewReader(&buf).Skip() if err != nil { t.Error(err) } } func BenchmarkEncodebitmapContainerShortIterator(b *testing.B) { v := bitmapContainerShortIterator{} var buf bytes.Buffer msgp.Encode(&buf, &v) b.SetBytes(int64(buf.Len())) en := msgp.NewWriter(msgp.Nowhere) b.ReportAllocs() b.ResetTimer() for i := 0; i < b.N; i++ { v.EncodeMsg(en) } en.Flush() } func BenchmarkDecodebitmapContainerShortIterator(b *testing.B) { v := bitmapContainerShortIterator{} var buf bytes.Buffer msgp.Encode(&buf, &v) b.SetBytes(int64(buf.Len())) rd := msgp.NewEndlessReader(buf.Bytes(), b) dc := msgp.NewReader(rd) b.ReportAllocs() b.ResetTimer() for i := 0; i < b.N; i++ { err := v.DecodeMsg(dc) if err != nil { b.Fatal(err) } } } roaring-0.4.21/bitmapcontainer_test.go 0000664 0000000 0000000 00000012516 13542657257 0020031 0 ustar 00root root 0000000 0000000 package roaring import ( "github.com/stretchr/testify/assert" "math/rand" "testing" ) // bitmapContainer's numberOfRuns() function should be correct against the runContainer equivalent func TestBitmapContainerNumberOfRuns024(t *testing.T) { seed := int64(42) rand.Seed(seed) trials := []trial{ {n: 1000, percentFill: .1, ntrial: 10}, } for _, tr := range trials { for j := 0; j < tr.ntrial; j++ { ma := make(map[int]bool) n := tr.n a := []uint16{} draw := int(float64(n) * tr.percentFill) for i := 0; i < draw; i++ { r0 := rand.Intn(n) a = append(a, uint16(r0)) ma[r0] = true } // RunContainer compute this automatically rc := newRunContainer16FromVals(false, a...) rcNr := rc.numberOfRuns() // vs bitmapContainer bc := newBitmapContainer() for k := range ma { bc.iadd(uint16(k)) } bcNr := bc.numberOfRuns() assert.Equal(t, rcNr, bcNr) } } } // bitmap containers get cardinality in range, miss the last index, issue #183 func TestBitmapcontainerAndCardinality(t *testing.T) { for r := 0; r <= 65535; r++ { c1 := newRunContainer16Range(0, uint16(r)) c2 := newBitmapContainerwithRange(0, int(r)) assert.Equal(t, r+1, c1.andCardinality(c2)) } } func TestIssue181(t *testing.T) { t.Run("Initial issue 181", func(t *testing.T) { a := New() var x uint32 // adding 1M integers for i := 1; i <= 1000000; i++ { x += uint32(rand.Intn(10) + 1) a.Add(x) } b := New() for i := 1; i <= int(x); i++ { b.Add(uint32(i)) } assert.Equal(t, b.AndCardinality(a), a.AndCardinality(b)) assert.Equal(t, b.AndCardinality(a), And(a, b).GetCardinality()) }) t.Run("Second version of issue 181", func(t *testing.T) { a := New() var x uint32 // adding 1M integers for i := 1; i <= 1000000; i++ { x += uint32(rand.Intn(10) + 1) a.Add(x) } b := New() b.AddRange(1, uint64(x)) assert.Equal(t, b.AndCardinality(a), a.AndCardinality(b)) assert.Equal(t, b.AndCardinality(a), And(a, b).GetCardinality()) }) } // RunReverseIterator16 unit tests for cur, next, hasNext, and remove should pass func TestBitmapContainerReverseIterator(t *testing.T) { t.Run("reverse iterator on the empty container", func(t *testing.T) { bc := newBitmapContainer() it := bc.getReverseIterator() assert.False(t, it.hasNext()) assert.Panics(t, func() { it.next() }) }) t.Run("reverse iterator on the container with range(0,0)", func(t *testing.T) { bc := newBitmapContainerwithRange(0, 0) it := bc.getReverseIterator() assert.True(t, it.hasNext()) assert.EqualValues(t, 0, it.next()) }) t.Run("reverse iterator on the container with range(4,4)", func(t *testing.T) { bc := newBitmapContainerwithRange(4, 4) it := bc.getReverseIterator() assert.True(t, it.hasNext()) assert.EqualValues(t, 4, it.next()) }) t.Run("reverse iterator on the container with range(4,9)", func(t *testing.T) { bc := newBitmapContainerwithRange(4, 9) it := bc.getReverseIterator() assert.True(t, it.hasNext()) for i := 9; i >= 4; i-- { assert.EqualValues(t, i, it.next()) if i > 4 { assert.True(t, it.hasNext()) } else if i == 4 { assert.False(t, it.hasNext()) } } assert.False(t, it.hasNext()) assert.Panics(t, func() { it.next() }) }) t.Run("reverse iterator on the container with values", func(t *testing.T) { values := []uint16{0, 2, 15, 16, 31, 32, 33, 9999, MaxUint16} bc := newBitmapContainer() for n := 0; n < len(values); n++ { bc.iadd(values[n]) } it := bc.getReverseIterator() n := len(values) assert.True(t, it.hasNext()) for it.hasNext() { n-- assert.Equal(t, values[n], it.next()) } assert.Equal(t, 0, n) }) } func TestBitmapNextSet(t *testing.T) { testSize := 5000 bc := newBitmapContainer() for i := 0; i < testSize; i++ { bc.iadd(uint16(i)) } m := 0 for n := 0; m < testSize; n, m = bc.NextSetBit(n+1), m+1 { assert.Equal(t, m, n) } assert.Equal(t, 5000, m) } func TestBitmapPrevSet(t *testing.T) { testSize := 5000 bc := newBitmapContainer() for i := 0; i < testSize; i++ { bc.iadd(uint16(i)) } m := testSize - 1 for n := testSize - 1; n > 0; n, m = bc.PrevSetBit(n-1), m-1 { assert.Equal(t, m, n) } assert.Equal(t, 0, m) } func TestBitmapIteratorPeekNext(t *testing.T) { testContainerIteratorPeekNext(t, newBitmapContainer()) } func TestBitmapIteratorAdvance(t *testing.T) { testContainerIteratorAdvance(t, newBitmapContainer()) } // go test -bench BenchmarkShortIteratorAdvance -run - func BenchmarkShortIteratorAdvanceBitmap(b *testing.B) { benchmarkContainerIteratorAdvance(b, newBitmapContainer()) } // go test -bench BenchmarkShortIteratorNext -run - func BenchmarkShortIteratorNextBitmap(b *testing.B) { benchmarkContainerIteratorNext(b, newBitmapContainer()) } func TestBitmapOffset(t *testing.T) { nums := []uint16{10, 100, 1000} expected := make([]int, len(nums)) offtest := uint16(65000) v := container(newBitmapContainer()) for i, n := range nums { v.iadd(n) expected[i] = int(n) + int(offtest) } w := v.addOffset(offtest) w0card := w[0].getCardinality() w1card := w[1].getCardinality() assert.Equal(t, 3, w0card+w1card) wout := make([]int, len(nums)) for i := 0; i < w0card; i++ { wout[i] = w[0].selectInt(uint16(i)) } for i := 0; i < w1card; i++ { wout[i+w0card] = w[1].selectInt(uint16(i)) + 65536 } for i, x := range wout { assert.Equal(t, expected[i], x) } } roaring-0.4.21/byte_input.go 0000664 0000000 0000000 00000006271 13542657257 0015776 0 ustar 00root root 0000000 0000000 package roaring import ( "encoding/binary" "io" ) type byteInput interface { // next returns a slice containing the next n bytes from the buffer, // advancing the buffer as if the bytes had been returned by Read. next(n int) ([]byte, error) // readUInt32 reads uint32 with LittleEndian order readUInt32() (uint32, error) // readUInt16 reads uint16 with LittleEndian order readUInt16() (uint16, error) // getReadBytes returns read bytes getReadBytes() int64 // skipBytes skips exactly n bytes skipBytes(n int) error } func newByteInputFromReader(reader io.Reader) byteInput { return &byteInputAdapter{ r: reader, readBytes: 0, } } func newByteInput(buf []byte) byteInput { return &byteBuffer{ buf: buf, off: 0, } } type byteBuffer struct { buf []byte off int } // next returns a slice containing the next n bytes from the reader // If there are fewer bytes than the given n, io.ErrUnexpectedEOF will be returned func (b *byteBuffer) next(n int) ([]byte, error) { m := len(b.buf) - b.off if n > m { return nil, io.ErrUnexpectedEOF } data := b.buf[b.off : b.off+n] b.off += n return data, nil } // readUInt32 reads uint32 with LittleEndian order func (b *byteBuffer) readUInt32() (uint32, error) { if len(b.buf)-b.off < 4 { return 0, io.ErrUnexpectedEOF } v := binary.LittleEndian.Uint32(b.buf[b.off:]) b.off += 4 return v, nil } // readUInt16 reads uint16 with LittleEndian order func (b *byteBuffer) readUInt16() (uint16, error) { if len(b.buf)-b.off < 2 { return 0, io.ErrUnexpectedEOF } v := binary.LittleEndian.Uint16(b.buf[b.off:]) b.off += 2 return v, nil } // getReadBytes returns read bytes func (b *byteBuffer) getReadBytes() int64 { return int64(b.off) } // skipBytes skips exactly n bytes func (b *byteBuffer) skipBytes(n int) error { m := len(b.buf) - b.off if n > m { return io.ErrUnexpectedEOF } b.off += n return nil } // reset resets the given buffer with a new byte slice func (b *byteBuffer) reset(buf []byte) { b.buf = buf b.off = 0 } type byteInputAdapter struct { r io.Reader readBytes int } // next returns a slice containing the next n bytes from the buffer, // advancing the buffer as if the bytes had been returned by Read. func (b *byteInputAdapter) next(n int) ([]byte, error) { buf := make([]byte, n) m, err := io.ReadAtLeast(b.r, buf, n) b.readBytes += m if err != nil { return nil, err } return buf, nil } // readUInt32 reads uint32 with LittleEndian order func (b *byteInputAdapter) readUInt32() (uint32, error) { buf, err := b.next(4) if err != nil { return 0, err } return binary.LittleEndian.Uint32(buf), nil } // readUInt16 reads uint16 with LittleEndian order func (b *byteInputAdapter) readUInt16() (uint16, error) { buf, err := b.next(2) if err != nil { return 0, err } return binary.LittleEndian.Uint16(buf), nil } // getReadBytes returns read bytes func (b *byteInputAdapter) getReadBytes() int64 { return int64(b.readBytes) } // skipBytes skips exactly n bytes func (b *byteInputAdapter) skipBytes(n int) error { _, err := b.next(n) return err } // reset resets the given buffer with a new stream func (b *byteInputAdapter) reset(stream io.Reader) { b.r = stream b.readBytes = 0 } roaring-0.4.21/byte_input_test.go 0000664 0000000 0000000 00000002534 13542657257 0017033 0 ustar 00root root 0000000 0000000 package roaring import ( "bytes" "github.com/stretchr/testify/assert" "testing" ) func TestByteInputFlow(t *testing.T) { t.Run("Test should be an error on empty data", func(t *testing.T) { buf := bytes.NewBuffer([]byte{}) instances := []byteInput{ newByteInput(buf.Bytes()), newByteInputFromReader(buf), } for _, input := range instances { n, err := input.readUInt16() assert.EqualValues(t, 0, n) assert.Error(t, err) p, err := input.readUInt32() assert.EqualValues(t, 0, p) assert.Error(t, err) b, err := input.next(10) assert.Nil(t, b) assert.Error(t, err) err = input.skipBytes(10) assert.Error(t, err) } }) t.Run("Test on nonempty data", func(t *testing.T) { buf := bytes.NewBuffer(uint16SliceAsByteSlice([]uint16{1, 10, 32, 66, 23})) instances := []byteInput{ newByteInput(buf.Bytes()), newByteInputFromReader(buf), } for _, input := range instances { n, err := input.readUInt16() assert.EqualValues(t, 1, n) assert.NoError(t, err) p, err := input.readUInt32() assert.EqualValues(t, 2097162, p) // 32 << 16 | 10 assert.NoError(t, err) b, err := input.next(2) assert.EqualValues(t, []byte{66, 0}, b) assert.NoError(t, err) err = input.skipBytes(2) assert.NoError(t, err) b, err = input.next(1) assert.Nil(t, b) assert.Error(t, err) } }) } roaring-0.4.21/clz.go 0000664 0000000 0000000 00000000343 13542657257 0014376 0 ustar 00root root 0000000 0000000 // +build go1.9 // "go1.9", from Go version 1.9 onward // See https://golang.org/pkg/go/build/#hdr-Build_Constraints package roaring import "math/bits" func countLeadingZeros(x uint64) int { return bits.LeadingZeros64(x) } roaring-0.4.21/clz_compat.go 0000664 0000000 0000000 00000000715 13542657257 0015744 0 ustar 00root root 0000000 0000000 // +build !go1.9 package roaring // LeadingZeroBits returns the number of consecutive most significant zero // bits of x. func countLeadingZeros(i uint64) int { if i == 0 { return 64 } n := 1 x := uint32(i >> 32) if x == 0 { n += 32 x = uint32(i) } if (x >> 16) == 0 { n += 16 x <<= 16 } if (x >> 24) == 0 { n += 8 x <<= 8 } if x>>28 == 0 { n += 4 x <<= 4 } if x>>30 == 0 { n += 2 x <<= 2 } n -= int(x >> 31) return n } roaring-0.4.21/clz_test.go 0000664 0000000 0000000 00000001551 13542657257 0015437 0 ustar 00root root 0000000 0000000 package roaring import ( "github.com/stretchr/testify/assert" "testing" ) func numberOfLeadingZeros(i uint64) int { if i == 0 { return 64 } n := 1 x := uint32(i >> 32) if x == 0 { n += 32 x = uint32(i) } if (x >> 16) == 0 { n += 16 x <<= 16 } if (x >> 24) == 0 { n += 8 x <<= 8 } if x>>28 == 0 { n += 4 x <<= 4 } if x>>30 == 0 { n += 2 x <<= 2 } n -= int(x >> 31) return n } func TestCountLeadingZeros072(t *testing.T) { assert.Equal(t, 64, numberOfLeadingZeros(0)) assert.Equal(t, 60, numberOfLeadingZeros(8)) assert.Equal(t, 64-17-1, numberOfLeadingZeros(1<<17)) assert.Equal(t, 0, numberOfLeadingZeros(0xFFFFFFFFFFFFFFFF)) assert.Equal(t, 64, countLeadingZeros(0)) assert.Equal(t, 60, countLeadingZeros(8)) assert.Equal(t, 64-17-1, countLeadingZeros(1<<17)) assert.Equal(t, 0, countLeadingZeros(0xFFFFFFFFFFFFFFFF)) } roaring-0.4.21/container_test.go 0000664 0000000 0000000 00000013016 13542657257 0016630 0 ustar 00root root 0000000 0000000 package roaring import ( "fmt" "github.com/stretchr/testify/assert" "log" "testing" ) func makeContainer(ss []uint16) container { c := newArrayContainer() for _, s := range ss { c.iadd(s) } return c } func checkContent(c container, s []uint16) bool { si := c.getShortIterator() ctr := 0 fail := false for si.hasNext() { if ctr == len(s) { log.Println("HERE") fail = true break } i := si.next() if i != s[ctr] { log.Println("THERE", i, s[ctr]) fail = true break } ctr++ } if ctr != len(s) { log.Println("LAST") fail = true } if fail { log.Println("fail, found ") si = c.getShortIterator() z := 0 for si.hasNext() { si.next() z++ } log.Println(z, len(s)) } return !fail } func testContainerIteratorPeekNext(t *testing.T, c container) { testSize := 5000 for i := 0; i < testSize; i++ { c.iadd(uint16(i)) } i := c.getShortIterator() assert.True(t, i.hasNext()) for i.hasNext() { assert.Equal(t, i.peekNext(), i.next()) testSize-- } assert.Equal(t, 0, testSize) } func testContainerIteratorAdvance(t *testing.T, con container) { values := []uint16{1, 2, 15, 16, 31, 32, 33, 9999} for _, v := range values { con.iadd(v) } cases := []struct { minval uint16 expected uint16 }{ {0, 1}, {1, 1}, {2, 2}, {3, 15}, {15, 15}, {30, 31}, {31, 31}, {33, 33}, {34, 9999}, {9998, 9999}, {9999, 9999}, } t.Run("advance by using a new short iterator", func(t *testing.T) { for _, c := range cases { i := con.getShortIterator() i.advanceIfNeeded(c.minval) assert.True(t, i.hasNext()) assert.Equal(t, c.expected, i.peekNext()) } }) t.Run("advance by using the same short iterator", func(t *testing.T) { i := con.getShortIterator() for _, c := range cases { i.advanceIfNeeded(c.minval) assert.True(t, i.hasNext()) assert.Equal(t, c.expected, i.peekNext()) } }) t.Run("advance out of a container value", func(t *testing.T) { i := con.getShortIterator() i.advanceIfNeeded(33) assert.True(t, i.hasNext()) assert.EqualValues(t, 33, i.peekNext()) i.advanceIfNeeded(MaxUint16 - 1) assert.False(t, i.hasNext()) i.advanceIfNeeded(MaxUint16) assert.False(t, i.hasNext()) }) t.Run("advance on a value that is less than the pointed value", func(t *testing.T) { i := con.getShortIterator() i.advanceIfNeeded(29) assert.True(t, i.hasNext()) assert.EqualValues(t, 31, i.peekNext()) i.advanceIfNeeded(13) assert.True(t, i.hasNext()) assert.EqualValues(t, 31, i.peekNext()) }) } func benchmarkContainerIteratorAdvance(b *testing.B, con container) { for _, initsize := range []int{1, 650, 6500, MaxUint16} { for i := 0; i < initsize; i++ { con.iadd(uint16(i)) } b.Run(fmt.Sprintf("init size %d shortIterator advance", initsize), func(b *testing.B) { b.StartTimer() diff := uint16(0) for n := 0; n < b.N; n++ { val := uint16(n % initsize) i := con.getShortIterator() i.advanceIfNeeded(val) diff += i.peekNext() - val } b.StopTimer() if diff != 0 { b.Fatalf("Expected diff 0, got %d", diff) } }) } } func benchmarkContainerIteratorNext(b *testing.B, con container) { for _, initsize := range []int{1, 650, 6500, MaxUint16} { for i := 0; i < initsize; i++ { con.iadd(uint16(i)) } b.Run(fmt.Sprintf("init size %d shortIterator next", initsize), func(b *testing.B) { b.StartTimer() diff := 0 for n := 0; n < b.N; n++ { i := con.getShortIterator() j := 0 for i.hasNext() { i.next() j++ } diff += j - initsize } b.StopTimer() if diff != 0 { b.Fatalf("Expected diff 0, got %d", diff) } }) } } func TestContainerReverseIterator(t *testing.T) { content := []uint16{1, 3, 5, 7, 9} c := makeContainer(content) si := c.getReverseIterator() i := 4 for si.hasNext() { assert.Equal(t, content[i], si.next()) i-- } assert.Equal(t, -1, i) } func TestRoaringContainer(t *testing.T) { t.Run("countTrailingZeros", func(t *testing.T) { x := uint64(0) o := countTrailingZeros(x) assert.Equal(t, 64, o) x = 1 << 3 o = countTrailingZeros(x) assert.Equal(t, 3, o) }) t.Run("ArrayShortIterator", func(t *testing.T) { content := []uint16{1, 3, 5, 7, 9} c := makeContainer(content) si := c.getShortIterator() i := 0 for si.hasNext() { si.next() i++ } assert.Equal(t, 5, i) }) t.Run("BinarySearch", func(t *testing.T) { content := []uint16{1, 3, 5, 7, 9} res := binarySearch(content, 5) assert.Equal(t, 2, res) res = binarySearch(content, 4) assert.Less(t, res, 0) }) t.Run("bitmapcontainer", func(t *testing.T) { content := []uint16{1, 3, 5, 7, 9} a := newArrayContainer() b := newBitmapContainer() for _, v := range content { a.iadd(v) b.iadd(v) } c := a.toBitmapContainer() assert.Equal(t, b.getCardinality(), a.getCardinality()) assert.Equal(t, b.getCardinality(), c.getCardinality()) }) t.Run("inottest0", func(t *testing.T) { content := []uint16{9} c := makeContainer(content) c = c.inot(0, 11) si := c.getShortIterator() i := 0 for si.hasNext() { si.next() i++ } assert.Equal(t, 10, i) }) t.Run("inotTest1", func(t *testing.T) { // Array container, range is complete content := []uint16{1, 3, 5, 7, 9} //content := []uint16{1} edge := 1 << 13 c := makeContainer(content) c = c.inot(0, edge+1) size := edge - len(content) s := make([]uint16, size+1) pos := 0 for i := uint16(0); i < uint16(edge+1); i++ { if binarySearch(content, i) < 0 { s[pos] = i pos++ } } assert.True(t, checkContent(c, s)) }) } roaring-0.4.21/ctz.go 0000664 0000000 0000000 00000000345 13542657257 0014410 0 ustar 00root root 0000000 0000000 // +build go1.9 // "go1.9", from Go version 1.9 onward // See https://golang.org/pkg/go/build/#hdr-Build_Constraints package roaring import "math/bits" func countTrailingZeros(x uint64) int { return bits.TrailingZeros64(x) } roaring-0.4.21/ctz_compat.go 0000664 0000000 0000000 00000005637 13542657257 0015764 0 ustar 00root root 0000000 0000000 // +build !go1.9 package roaring // Reuse of portions of go/src/math/big standard lib code // under this license: /* Copyright (c) 2009 The Go Authors. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of Google Inc. nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ const deBruijn32 = 0x077CB531 var deBruijn32Lookup = []byte{ 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8, 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9, } const deBruijn64 = 0x03f79d71b4ca8b09 var deBruijn64Lookup = []byte{ 0, 1, 56, 2, 57, 49, 28, 3, 61, 58, 42, 50, 38, 29, 17, 4, 62, 47, 59, 36, 45, 43, 51, 22, 53, 39, 33, 30, 24, 18, 12, 5, 63, 55, 48, 27, 60, 41, 37, 16, 46, 35, 44, 21, 52, 32, 23, 11, 54, 26, 40, 15, 34, 20, 31, 10, 25, 14, 19, 9, 13, 8, 7, 6, } // trailingZeroBits returns the number of consecutive least significant zero // bits of x. func countTrailingZeros(x uint64) int { // x & -x leaves only the right-most bit set in the word. Let k be the // index of that bit. Since only a single bit is set, the value is two // to the power of k. Multiplying by a power of two is equivalent to // left shifting, in this case by k bits. The de Bruijn constant is // such that all six bit, consecutive substrings are distinct. // Therefore, if we have a left shifted version of this constant we can // find by how many bits it was shifted by looking at which six bit // substring ended up at the top of the word. // (Knuth, volume 4, section 7.3.1) if x == 0 { // We have to special case 0; the fomula // below doesn't work for 0. return 64 } return int(deBruijn64Lookup[((x&-x)*(deBruijn64))>>58]) } roaring-0.4.21/ctz_test.go 0000664 0000000 0000000 00000003676 13542657257 0015461 0 ustar 00root root 0000000 0000000 package roaring import ( "encoding/binary" "github.com/stretchr/testify/assert" "math/rand" "testing" ) func TestCountTrailingZeros072(t *testing.T) { assert.Equal(t, 64, numberOfTrailingZeros(0)) assert.Equal(t, 3, numberOfTrailingZeros(8)) assert.Equal(t, 0, numberOfTrailingZeros(7)) assert.Equal(t, 17, numberOfTrailingZeros(1<<17)) assert.Equal(t, 17, numberOfTrailingZeros(7<<17)) assert.Equal(t, 33, numberOfTrailingZeros(255<<33)) assert.Equal(t, 64, countTrailingZeros(0)) assert.Equal(t, 3, countTrailingZeros(8)) assert.Equal(t, 0, countTrailingZeros(7)) assert.Equal(t, 17, countTrailingZeros(1<<17)) assert.Equal(t, 17, countTrailingZeros(7<<17)) assert.Equal(t, 33, countTrailingZeros(255<<33)) } func getRandomUint64Set(n int) []uint64 { seed := int64(42) rand.Seed(seed) var buf [8]byte var o []uint64 for i := 0; i < n; i++ { rand.Read(buf[:]) o = append(o, binary.LittleEndian.Uint64(buf[:])) } return o } func getAllOneBitUint64Set() []uint64 { var o []uint64 for i := uint(0); i < 64; i++ { o = append(o, 1<>63)) } roaring-0.4.21/example_roaring_test.go 0000664 0000000 0000000 00000005361 13542657257 0020026 0 ustar 00root root 0000000 0000000 package roaring import ( "bytes" "fmt" "testing" ) // Example_roaring demonstrates how to use the roaring library. func TestExample_roaring060(t *testing.T) { // example inspired by https://github.com/fzandona/goroar fmt.Println("==roaring==") rb1 := BitmapOf(1, 2, 3, 4, 5, 100, 1000) fmt.Println(rb1.String()) rb2 := BitmapOf(3, 4, 1000) fmt.Println(rb2.String()) rb3 := New() fmt.Println(rb3.String()) fmt.Println("Cardinality: ", rb1.GetCardinality()) if rb1.GetCardinality() != 7 { t.Errorf("Bad cardinality: %v", rb1.GetCardinality()) } fmt.Println("Contains 3? ", rb1.Contains(3)) if !rb1.Contains(3) { t.Errorf("Should contain 3.") } rb1.And(rb2) rb3.Add(1) rb3.Add(5) rb3.Or(rb1) // prints 1, 3, 4, 5, 1000 i := rb3.Iterator() for i.HasNext() { fmt.Println(i.Next()) } fmt.Println() // next we include an example of serialization buf := new(bytes.Buffer) size, err := rb1.WriteTo(buf) if err != nil { fmt.Println("Failed writing") t.Errorf("Failed writing") } else { fmt.Println("Wrote ", size, " bytes") } newrb := New() _, err = newrb.ReadFrom(buf) if err != nil { fmt.Println("Failed reading") t.Errorf("Failed reading") } if !rb1.Equals(newrb) { fmt.Println("I did not get back to original bitmap?") t.Errorf("Bad serialization") } else { fmt.Println("I wrote the content to a byte stream and read it back.") } } // Example_roaring demonstrates how to use the roaring library with run containers. func TestExample2_roaring061(t *testing.T) { r1 := New() for i := uint32(100); i < 1000; i++ { r1.Add(i) } if !r1.Contains(500) { t.Errorf("should contain 500") } rb2 := r1.Clone() // compute how many bits there are: cardinality := r1.GetCardinality() // if your bitmaps have long runs, you can compress them by calling // run_optimize size := r1.GetSizeInBytes() r1.RunOptimize() if cardinality != r1.GetCardinality() { t.Errorf("RunOptimize should not change cardinality.") } compactSize := r1.GetSizeInBytes() if compactSize >= size { t.Errorf("Run optimized size should be smaller.") } if !r1.Equals(rb2) { t.Errorf("RunOptimize should not affect equality.") } fmt.Print("size before run optimize: ", size, " bytes, and after: ", compactSize, " bytes.\n") rb3 := New() rb3.AddRange(1, 10000000) r1.Or(rb3) if !r1.Equals(rb3) { t.Errorf("union with large run should give back contained set") } rb1 := r1.Clone() rb1.AndNot(rb3) if !rb1.IsEmpty() { t.Errorf("And not with large should clear...") } for i := uint32(0); i < 10000; i += 3 { rb1.Add(i) } rb1.AndNot(rb3) rb1card := rb1.GetCardinality() if rb1card != 1 { //rb1.RunOptimize() //fmt.Printf("\n rb1 = %s\n", rb1) t.Errorf("Only the value 0 should survive the andNot; rb1card = %v", rb1card) } } roaring-0.4.21/fastaggregation.go 0000664 0000000 0000000 00000013541 13542657257 0016757 0 ustar 00root root 0000000 0000000 package roaring import ( "container/heap" ) // Or function that requires repairAfterLazy func lazyOR(x1, x2 *Bitmap) *Bitmap { answer := NewBitmap() pos1 := 0 pos2 := 0 length1 := x1.highlowcontainer.size() length2 := x2.highlowcontainer.size() main: for (pos1 < length1) && (pos2 < length2) { s1 := x1.highlowcontainer.getKeyAtIndex(pos1) s2 := x2.highlowcontainer.getKeyAtIndex(pos2) for { if s1 < s2 { answer.highlowcontainer.appendCopy(x1.highlowcontainer, pos1) pos1++ if pos1 == length1 { break main } s1 = x1.highlowcontainer.getKeyAtIndex(pos1) } else if s1 > s2 { answer.highlowcontainer.appendCopy(x2.highlowcontainer, pos2) pos2++ if pos2 == length2 { break main } s2 = x2.highlowcontainer.getKeyAtIndex(pos2) } else { c1 := x1.highlowcontainer.getContainerAtIndex(pos1) switch t := c1.(type) { case *arrayContainer: c1 = t.toBitmapContainer() case *runContainer16: if !t.isFull() { c1 = t.toBitmapContainer() } } answer.highlowcontainer.appendContainer(s1, c1.lazyOR(x2.highlowcontainer.getContainerAtIndex(pos2)), false) pos1++ pos2++ if (pos1 == length1) || (pos2 == length2) { break main } s1 = x1.highlowcontainer.getKeyAtIndex(pos1) s2 = x2.highlowcontainer.getKeyAtIndex(pos2) } } } if pos1 == length1 { answer.highlowcontainer.appendCopyMany(x2.highlowcontainer, pos2, length2) } else if pos2 == length2 { answer.highlowcontainer.appendCopyMany(x1.highlowcontainer, pos1, length1) } return answer } // In-place Or function that requires repairAfterLazy func (x1 *Bitmap) lazyOR(x2 *Bitmap) *Bitmap { pos1 := 0 pos2 := 0 length1 := x1.highlowcontainer.size() length2 := x2.highlowcontainer.size() main: for (pos1 < length1) && (pos2 < length2) { s1 := x1.highlowcontainer.getKeyAtIndex(pos1) s2 := x2.highlowcontainer.getKeyAtIndex(pos2) for { if s1 < s2 { pos1++ if pos1 == length1 { break main } s1 = x1.highlowcontainer.getKeyAtIndex(pos1) } else if s1 > s2 { x1.highlowcontainer.insertNewKeyValueAt(pos1, s2, x2.highlowcontainer.getContainerAtIndex(pos2).clone()) pos2++ pos1++ length1++ if pos2 == length2 { break main } s2 = x2.highlowcontainer.getKeyAtIndex(pos2) } else { c1 := x1.highlowcontainer.getContainerAtIndex(pos1) switch t := c1.(type) { case *arrayContainer: c1 = t.toBitmapContainer() case *runContainer16: if !t.isFull() { c1 = t.toBitmapContainer() } case *bitmapContainer: c1 = x1.highlowcontainer.getWritableContainerAtIndex(pos1) } x1.highlowcontainer.containers[pos1] = c1.lazyIOR(x2.highlowcontainer.getContainerAtIndex(pos2)) x1.highlowcontainer.needCopyOnWrite[pos1] = false pos1++ pos2++ if (pos1 == length1) || (pos2 == length2) { break main } s1 = x1.highlowcontainer.getKeyAtIndex(pos1) s2 = x2.highlowcontainer.getKeyAtIndex(pos2) } } } if pos1 == length1 { x1.highlowcontainer.appendCopyMany(x2.highlowcontainer, pos2, length2) } return x1 } // to be called after lazy aggregates func (x1 *Bitmap) repairAfterLazy() { for pos := 0; pos < x1.highlowcontainer.size(); pos++ { c := x1.highlowcontainer.getContainerAtIndex(pos) switch c.(type) { case *bitmapContainer: if c.(*bitmapContainer).cardinality == invalidCardinality { c = x1.highlowcontainer.getWritableContainerAtIndex(pos) c.(*bitmapContainer).computeCardinality() if c.(*bitmapContainer).getCardinality() <= arrayDefaultMaxSize { x1.highlowcontainer.setContainerAtIndex(pos, c.(*bitmapContainer).toArrayContainer()) } else if c.(*bitmapContainer).isFull() { x1.highlowcontainer.setContainerAtIndex(pos, newRunContainer16Range(0, MaxUint16)) } } } } } // FastAnd computes the intersection between many bitmaps quickly // Compared to the And function, it can take many bitmaps as input, thus saving the trouble // of manually calling "And" many times. func FastAnd(bitmaps ...*Bitmap) *Bitmap { if len(bitmaps) == 0 { return NewBitmap() } else if len(bitmaps) == 1 { return bitmaps[0].Clone() } answer := And(bitmaps[0], bitmaps[1]) for _, bm := range bitmaps[2:] { answer.And(bm) } return answer } // FastOr computes the union between many bitmaps quickly, as opposed to having to call Or repeatedly. // It might also be faster than calling Or repeatedly. func FastOr(bitmaps ...*Bitmap) *Bitmap { if len(bitmaps) == 0 { return NewBitmap() } else if len(bitmaps) == 1 { return bitmaps[0].Clone() } answer := lazyOR(bitmaps[0], bitmaps[1]) for _, bm := range bitmaps[2:] { answer = answer.lazyOR(bm) } // here is where repairAfterLazy is called. answer.repairAfterLazy() return answer } // HeapOr computes the union between many bitmaps quickly using a heap. // It might be faster than calling Or repeatedly. func HeapOr(bitmaps ...*Bitmap) *Bitmap { if len(bitmaps) == 0 { return NewBitmap() } // TODO: for better speed, we could do the operation lazily, see Java implementation pq := make(priorityQueue, len(bitmaps)) for i, bm := range bitmaps { pq[i] = &item{bm, i} } heap.Init(&pq) for pq.Len() > 1 { x1 := heap.Pop(&pq).(*item) x2 := heap.Pop(&pq).(*item) heap.Push(&pq, &item{Or(x1.value, x2.value), 0}) } return heap.Pop(&pq).(*item).value } // HeapXor computes the symmetric difference between many bitmaps quickly (as opposed to calling Xor repeated). // Internally, this function uses a heap. // It might be faster than calling Xor repeatedly. func HeapXor(bitmaps ...*Bitmap) *Bitmap { if len(bitmaps) == 0 { return NewBitmap() } pq := make(priorityQueue, len(bitmaps)) for i, bm := range bitmaps { pq[i] = &item{bm, i} } heap.Init(&pq) for pq.Len() > 1 { x1 := heap.Pop(&pq).(*item) x2 := heap.Pop(&pq).(*item) heap.Push(&pq, &item{Xor(x1.value, x2.value), 0}) } return heap.Pop(&pq).(*item).value } roaring-0.4.21/fastaggregation_test.go 0000664 0000000 0000000 00000011220 13542657257 0020006 0 ustar 00root root 0000000 0000000 package roaring // to run just these tests: go test -run TestFastAggregations* import ( "container/heap" "github.com/stretchr/testify/assert" "testing" ) func TestFastAggregationsSize(t *testing.T) { rb1 := NewBitmap() rb2 := NewBitmap() rb3 := NewBitmap() for i := uint32(0); i < 1000000; i += 3 { rb1.Add(i) } for i := uint32(0); i < 1000000; i += 7 { rb2.Add(i) } for i := uint32(0); i < 1000000; i += 1001 { rb3.Add(i) } pq := make(priorityQueue, 3) pq[0] = &item{rb1, 0} pq[1] = &item{rb2, 1} pq[2] = &item{rb3, 2} heap.Init(&pq) assert.Equal(t, rb3.GetSizeInBytes(), heap.Pop(&pq).(*item).value.GetSizeInBytes()) assert.Equal(t, rb2.GetSizeInBytes(), heap.Pop(&pq).(*item).value.GetSizeInBytes()) assert.Equal(t, rb1.GetSizeInBytes(), heap.Pop(&pq).(*item).value.GetSizeInBytes()) } func TestFastAggregationsCont(t *testing.T) { rb1 := NewBitmap() rb2 := NewBitmap() rb3 := NewBitmap() for i := uint32(0); i < 10; i += 3 { rb1.Add(i) } for i := uint32(0); i < 10; i += 7 { rb2.Add(i) } for i := uint32(0); i < 10; i += 1001 { rb3.Add(i) } for i := uint32(1000000); i < 1000000+10; i += 1001 { rb1.Add(i) } for i := uint32(1000000); i < 1000000+10; i += 7 { rb2.Add(i) } for i := uint32(1000000); i < 1000000+10; i += 3 { rb3.Add(i) } rb1.Add(500000) pq := make(containerPriorityQueue, 3) pq[0] = &containeritem{rb1, 0, 0} pq[1] = &containeritem{rb2, 0, 1} pq[2] = &containeritem{rb3, 0, 2} heap.Init(&pq) expected := []int{6, 4, 5, 6, 5, 4, 6} counter := 0 for pq.Len() > 0 { x1 := heap.Pop(&pq).(*containeritem) assert.EqualValues(t, expected[counter], x1.value.GetCardinality()) counter++ x1.keyindex++ if x1.keyindex < x1.value.highlowcontainer.size() { heap.Push(&pq, x1) } } } func TestFastAggregationsAdvanced_run(t *testing.T) { rb1 := NewBitmap() rb2 := NewBitmap() rb3 := NewBitmap() for i := uint32(500); i < 75000; i++ { rb1.Add(i) } for i := uint32(0); i < 1000000; i += 7 { rb2.Add(i) } for i := uint32(0); i < 1000000; i += 1001 { rb3.Add(i) } for i := uint32(1000000); i < 2000000; i += 1001 { rb1.Add(i) } for i := uint32(1000000); i < 2000000; i += 3 { rb2.Add(i) } for i := uint32(1000000); i < 2000000; i += 7 { rb3.Add(i) } rb1.RunOptimize() rb1.Or(rb2) rb1.Or(rb3) bigand := And(And(rb1, rb2), rb3) bigxor := Xor(Xor(rb1, rb2), rb3) assert.True(t, FastOr(rb1, rb2, rb3).Equals(rb1)) assert.True(t, HeapOr(rb1, rb2, rb3).Equals(rb1)) assert.Equal(t, rb1.GetCardinality(), HeapOr(rb1, rb2, rb3).GetCardinality()) assert.True(t, HeapXor(rb1, rb2, rb3).Equals(bigxor)) assert.True(t, FastAnd(rb1, rb2, rb3).Equals(bigand)) } func TestFastAggregationsXOR(t *testing.T) { rb1 := NewBitmap() rb2 := NewBitmap() rb3 := NewBitmap() for i := uint32(0); i < 40000; i++ { rb1.Add(i) } for i := uint32(0); i < 40000; i += 4000 { rb2.Add(i) } for i := uint32(0); i < 40000; i += 5000 { rb3.Add(i) } assert.EqualValues(t, 40000, rb1.GetCardinality()) xor1 := Xor(rb1, rb2) xor1alt := Xor(rb2, rb1) assert.True(t, xor1alt.Equals(xor1)) assert.True(t, HeapXor(rb1, rb2).Equals(xor1)) xor2 := Xor(rb2, rb3) xor2alt := Xor(rb3, rb2) assert.True(t, xor2alt.Equals(xor2)) assert.True(t, HeapXor(rb2, rb3).Equals(xor2)) bigxor := Xor(Xor(rb1, rb2), rb3) bigxoralt1 := Xor(rb1, Xor(rb2, rb3)) bigxoralt2 := Xor(rb1, Xor(rb3, rb2)) bigxoralt3 := Xor(rb3, Xor(rb1, rb2)) bigxoralt4 := Xor(Xor(rb1, rb2), rb3) assert.True(t, bigxoralt2.Equals(bigxor)) assert.True(t, bigxoralt1.Equals(bigxor)) assert.True(t, bigxoralt3.Equals(bigxor)) assert.True(t, bigxoralt4.Equals(bigxor)) assert.True(t, HeapXor(rb1, rb2, rb3).Equals(bigxor)) } func TestFastAggregationsXOR_run(t *testing.T) { rb1 := NewBitmap() rb2 := NewBitmap() rb3 := NewBitmap() for i := uint32(0); i < 40000; i++ { rb1.Add(i) } rb1.RunOptimize() for i := uint32(0); i < 40000; i += 4000 { rb2.Add(i) } for i := uint32(0); i < 40000; i += 5000 { rb3.Add(i) } assert.EqualValues(t, 40000, rb1.GetCardinality()) xor1 := Xor(rb1, rb2) xor1alt := Xor(rb2, rb1) assert.True(t, xor1alt.Equals(xor1)) assert.True(t, HeapXor(rb1, rb2).Equals(xor1)) xor2 := Xor(rb2, rb3) xor2alt := Xor(rb3, rb2) assert.True(t, xor2alt.Equals(xor2)) assert.True(t, HeapXor(rb2, rb3).Equals(xor2)) bigxor := Xor(Xor(rb1, rb2), rb3) bigxoralt1 := Xor(rb1, Xor(rb2, rb3)) bigxoralt2 := Xor(rb1, Xor(rb3, rb2)) bigxoralt3 := Xor(rb3, Xor(rb1, rb2)) bigxoralt4 := Xor(Xor(rb1, rb2), rb3) assert.True(t, bigxoralt2.Equals(bigxor)) assert.True(t, bigxoralt1.Equals(bigxor)) assert.True(t, bigxoralt3.Equals(bigxor)) assert.True(t, bigxoralt4.Equals(bigxor)) assert.True(t, HeapXor(rb1, rb2, rb3).Equals(bigxor)) } roaring-0.4.21/go.mod 0000664 0000000 0000000 00000001126 13542657257 0014365 0 ustar 00root root 0000000 0000000 module github.com/RoaringBitmap/roaring go 1.12 require ( github.com/glycerine/go-unsnap-stream v0.0.0-20181221182339-f9677308dec2 github.com/glycerine/goconvey v0.0.0-20190410193231-58a59202ab31 // indirect github.com/golang/snappy v0.0.1 // indirect github.com/gopherjs/gopherjs v0.0.0-20190910122728-9d188e94fb99 // indirect github.com/jtolds/gls v4.20.0+incompatible // indirect github.com/mschoch/smat v0.0.0-20160514031455-90eadee771ae github.com/philhofer/fwd v1.0.0 // indirect github.com/stretchr/testify v1.4.0 github.com/tinylib/msgp v1.1.0 github.com/willf/bitset v1.1.10 ) roaring-0.4.21/go.sum 0000664 0000000 0000000 00000005425 13542657257 0014420 0 ustar 00root root 0000000 0000000 github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/glycerine/go-unsnap-stream v0.0.0-20181221182339-f9677308dec2 h1:Ujru1hufTHVb++eG6OuNDKMxZnGIvF6o/u8q/8h2+I4= github.com/glycerine/go-unsnap-stream v0.0.0-20181221182339-f9677308dec2/go.mod h1:/20jfyN9Y5QPEAprSgKAUr+glWDY39ZiUEAYOEv5dsE= github.com/glycerine/goconvey v0.0.0-20190410193231-58a59202ab31 h1:gclg6gY70GLy3PbkQ1AERPfmLMMagS60DKF78eWwLn8= github.com/glycerine/goconvey v0.0.0-20190410193231-58a59202ab31/go.mod h1:Ogl1Tioa0aV7gstGFO7KhffUsb9M4ydbEbbxpcEDc24= github.com/golang/snappy v0.0.1 h1:Qgr9rKW7uDUkrbSmQeiDsGa8SjGyCOGtuasMWwvp2P4= github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/gopherjs/gopherjs v0.0.0-20190910122728-9d188e94fb99 h1:twflg0XRTjwKpxb/jFExr4HGq6on2dEOmnL6FV+fgPw= github.com/gopherjs/gopherjs v0.0.0-20190910122728-9d188e94fb99/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY= github.com/jtolds/gls v4.20.0+incompatible h1:xdiiI2gbIgH/gLH7ADydsJ1uDOEzR8yvV7C0MuV77Wo= github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU= github.com/mschoch/smat v0.0.0-20160514031455-90eadee771ae h1:VeRdUYdCw49yizlSbMEn2SZ+gT+3IUKx8BqxyQdz+BY= github.com/mschoch/smat v0.0.0-20160514031455-90eadee771ae/go.mod h1:qAyveg+e4CE+eKJXWVjKXM4ck2QobLqTDytGJbLLhJg= github.com/philhofer/fwd v1.0.0 h1:UbZqGr5Y38ApvM/V/jEljVxwocdweyH+vmYvRPBnbqQ= github.com/philhofer/fwd v1.0.0/go.mod h1:gk3iGcWd9+svBvR0sR+KPcfE+RNWozjowpeBVG3ZVNU= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/stretchr/objx v0.1.0 h1:4G4v2dO3VZwixGIRoQ5Lfboy6nUhCyYzaqnIAPPhYs4= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJyk= github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= github.com/tinylib/msgp v1.1.0 h1:9fQd+ICuRIu/ue4vxJZu6/LzxN0HwMds2nq/0cFvxHU= github.com/tinylib/msgp v1.1.0/go.mod h1:+d+yLhGm8mzTaHzB+wgMYrodPfmZrzkirds8fDWklFE= github.com/willf/bitset v1.1.10 h1:NotGKqX0KwQ72NUzqrjZq5ipPNDQex9lo3WpaS8L2sc= github.com/willf/bitset v1.1.10/go.mod h1:RjeCKbqT1RxIR/KWY6phxZiaY1IyutSBfGjNPySAYV4= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/yaml.v2 v2.2.2 h1:ZCJp+EgiOT7lHqUV2J862kp8Qj64Jo6az82+3Td9dZw= gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= roaring-0.4.21/manyiterator.go 0000664 0000000 0000000 00000000444 13542657257 0016326 0 ustar 00root root 0000000 0000000 package roaring type manyIterable interface { nextMany(hs uint32, buf []uint32) int } func (si *shortIterator) nextMany(hs uint32, buf []uint32) int { n := 0 l := si.loc s := si.slice for n < len(buf) && l < len(s) { buf[n] = uint32(s[l]) | hs l++ n++ } si.loc = l return n } roaring-0.4.21/optimize_test.go 0000664 0000000 0000000 00000006564 13542657257 0016520 0 ustar 00root root 0000000 0000000 package roaring_test import ( "github.com/RoaringBitmap/roaring" "testing" ) func TestSelectAfterOptimize(t *testing.T) { r := roaring.New() intArray := []uint32{438945, 438946, 438947, 438948, 438949, 438950, 438951, 438952, 438953, 438954, 438955, 438956, 438957, 438958, 438959, 438960, 438961, 438962, 438963, 438964, 438965, 438966, 438967, 438968, 438969, 438970, 438971, 438972, 438973, 438974, 438975, 438976, 438977, 438978, 438979, 438980, 438981, 438982, 438983, 438984, 438985, 438986, 438987, 438988, 438989, 438990, 438991, 438992, 438993, 438994, 438995, 438996, 438997, 438998, 438999, 439000, 439001, 439002, 439003, 439004, 439005, 439006, 439007, 439008, 439009, 439010, 439011, 439012, 439013, 439014, 439015, 439016, 439017, 439018, 439019, 439020, 439021, 439022, 439023, 439024, 439025, 439026, 439027, 439028, 439029, 439030, 439031, 439032, 439033, 439034, 439035, 439036, 439037, 439038, 439039, 439040, 439041, 439042, 439043, 439044, 439045, 439046, 439047, 439048, 439049, 439050, 439051, 439052, 439053, 439054, 439055, 439056, 439057, 439058, 439059, 439060, 439061, 439062, 439063, 439064, 439065, 439066, 439067, 439068, 439069, 439070, 439071, 439072, 439073, 439074, 439075, 439076, 439077, 439078, 439079, 439080, 439081, 439082, 439083, 439084, 439085, 439086, 439087, 439088, 439089, 439090, 439091, 439092, 439093, 439094, 439095, 439096, 439097, 439098, 439099, 439100, 439101, 439102, 439103, 439104, 439105, 439106, 439107, 439108, 439109, 439110, 439111, 439112, 439113, 439114, 439115, 439116, 439117, 439118, 439119, 439120, 439121, 439122, 439123, 439124, 439125, 439126, 439127, 439128, 439129, 439130, 439131, 439132, 439133, 439134, 439135, 439136, 439137, 439138, 439139, 439140, 439141, 439142, 439143, 439144, 439145, 439146, 439147, 439148, 439149, 439150, 439151, 439152, 439153, 439154, 439155, 439156, 439157, 439158, 439159, 439160, 439161, 439162, 439163, 439164, 439165, 439166, 439167, 439168, 439169, 439170, 439171, 439172, 439173, 439174, 439175, 439176, 439177, 439178, 439179, 439180, 439181, 439182, 439183, 439184, 439185, 439186, 439187, 439188, 439189, 439190, 439191, 439192, 439193, 439194, 439195, 439196, 439197, 439198, 439199, 439200, 439201, 439202, 439203, 439204, 439205, 439206, 439207, 439208, 439209, 439210, 439211, 439212, 439213, 439214, 439215, 439216, 439217, 439218, 439219, 439220, 439221, 439222, 439223, 439224, 439225, 439226, 439227, 439228, 439229, 439230, 439231, 439232, 439233, 439234, 439235, 439236, 439237, 439238, 439239, 439240, 439241, 439242, 439243, 439244, 439245, 439246, 439248} for _, value := range intArray { r.Add(value) } // save original version as array origArray := r.ToArray() // comment this out to get a passing test r.RunOptimize() // get a list of values after optimize optimized := r.ToArray() // this should be fine in both cases if diff := len(optimized) - len(origArray); diff != 0 { t.Fatal("element count different - diff:", diff) } // this is also fine for i := range optimized { if optimized[i] != origArray[i] { t.Errorf("array compare %d", i) } } // this produces errors with the optimized version of the bitmap n := r.GetCardinality() for i := uint64(0); i < n; i++ { v, err := r.Select(uint32(i)) if err != nil { t.Fatal(err) } if diff := origArray[i] - v; diff != 0 { t.Errorf("select %03d failed - %d vs %d (diff:%d)", i, origArray[i], v, diff) } } } roaring-0.4.21/parallel.go 0000664 0000000 0000000 00000032347 13542657257 0015413 0 ustar 00root root 0000000 0000000 package roaring import ( "container/heap" "fmt" "runtime" "sync" ) var defaultWorkerCount = runtime.NumCPU() type bitmapContainerKey struct { key uint16 idx int bitmap *Bitmap } type multipleContainers struct { key uint16 containers []container idx int } type keyedContainer struct { key uint16 container container idx int } type bitmapContainerHeap []bitmapContainerKey func (h bitmapContainerHeap) Len() int { return len(h) } func (h bitmapContainerHeap) Less(i, j int) bool { return h[i].key < h[j].key } func (h bitmapContainerHeap) Swap(i, j int) { h[i], h[j] = h[j], h[i] } func (h *bitmapContainerHeap) Push(x interface{}) { // Push and Pop use pointer receivers because they modify the slice's length, // not just its contents. *h = append(*h, x.(bitmapContainerKey)) } func (h *bitmapContainerHeap) Pop() interface{} { old := *h n := len(old) x := old[n-1] *h = old[0 : n-1] return x } func (h bitmapContainerHeap) Peek() bitmapContainerKey { return h[0] } func (h *bitmapContainerHeap) popIncrementing() (key uint16, container container) { k := h.Peek() key = k.key container = k.bitmap.highlowcontainer.containers[k.idx] newIdx := k.idx + 1 if newIdx < k.bitmap.highlowcontainer.size() { k = bitmapContainerKey{ k.bitmap.highlowcontainer.keys[newIdx], newIdx, k.bitmap, } (*h)[0] = k heap.Fix(h, 0) } else { heap.Pop(h) } return } func (h *bitmapContainerHeap) Next(containers []container) multipleContainers { if h.Len() == 0 { return multipleContainers{} } key, container := h.popIncrementing() containers = append(containers, container) for h.Len() > 0 && key == h.Peek().key { _, container = h.popIncrementing() containers = append(containers, container) } return multipleContainers{ key, containers, -1, } } func newBitmapContainerHeap(bitmaps ...*Bitmap) bitmapContainerHeap { // Initialize heap var h bitmapContainerHeap = make([]bitmapContainerKey, 0, len(bitmaps)) for _, bitmap := range bitmaps { if !bitmap.IsEmpty() { key := bitmapContainerKey{ bitmap.highlowcontainer.keys[0], 0, bitmap, } h = append(h, key) } } heap.Init(&h) return h } func repairAfterLazy(c container) container { switch t := c.(type) { case *bitmapContainer: if t.cardinality == invalidCardinality { t.computeCardinality() } if t.getCardinality() <= arrayDefaultMaxSize { return t.toArrayContainer() } else if c.(*bitmapContainer).isFull() { return newRunContainer16Range(0, MaxUint16) } } return c } func toBitmapContainer(c container) container { switch t := c.(type) { case *arrayContainer: return t.toBitmapContainer() case *runContainer16: if !t.isFull() { return t.toBitmapContainer() } } return c } func appenderRoutine(bitmapChan chan<- *Bitmap, resultChan <-chan keyedContainer, expectedKeysChan <-chan int) { expectedKeys := -1 appendedKeys := 0 var keys []uint16 var containers []container for appendedKeys != expectedKeys { select { case item := <-resultChan: if len(keys) <= item.idx { keys = append(keys, make([]uint16, item.idx-len(keys)+1)...) containers = append(containers, make([]container, item.idx-len(containers)+1)...) } keys[item.idx] = item.key containers[item.idx] = item.container appendedKeys++ case msg := <-expectedKeysChan: expectedKeys = msg } } answer := &Bitmap{ roaringArray{ make([]uint16, 0, expectedKeys), make([]container, 0, expectedKeys), make([]bool, 0, expectedKeys), false, nil, }, } for i := range keys { if containers[i] != nil { // in case a resulting container was empty, see ParAnd function answer.highlowcontainer.appendContainer(keys[i], containers[i], false) } } bitmapChan <- answer } // ParHeapOr computes the union (OR) of all provided bitmaps in parallel, // where the parameter "parallelism" determines how many workers are to be used // (if it is set to 0, a default number of workers is chosen) // ParHeapOr uses a heap to compute the union. For rare cases it might be faster than ParOr func ParHeapOr(parallelism int, bitmaps ...*Bitmap) *Bitmap { bitmapCount := len(bitmaps) if bitmapCount == 0 { return NewBitmap() } else if bitmapCount == 1 { return bitmaps[0].Clone() } if parallelism == 0 { parallelism = defaultWorkerCount } h := newBitmapContainerHeap(bitmaps...) bitmapChan := make(chan *Bitmap) inputChan := make(chan multipleContainers, 128) resultChan := make(chan keyedContainer, 32) expectedKeysChan := make(chan int) pool := sync.Pool{ New: func() interface{} { return make([]container, 0, len(bitmaps)) }, } orFunc := func() { // Assumes only structs with >=2 containers are passed for input := range inputChan { c := toBitmapContainer(input.containers[0]).lazyOR(input.containers[1]) for _, next := range input.containers[2:] { c = c.lazyIOR(next) } c = repairAfterLazy(c) kx := keyedContainer{ input.key, c, input.idx, } resultChan <- kx pool.Put(input.containers[:0]) } } go appenderRoutine(bitmapChan, resultChan, expectedKeysChan) for i := 0; i < parallelism; i++ { go orFunc() } idx := 0 for h.Len() > 0 { ck := h.Next(pool.Get().([]container)) if len(ck.containers) == 1 { resultChan <- keyedContainer{ ck.key, ck.containers[0], idx, } pool.Put(ck.containers[:0]) } else { ck.idx = idx inputChan <- ck } idx++ } expectedKeysChan <- idx bitmap := <-bitmapChan close(inputChan) close(resultChan) close(expectedKeysChan) return bitmap } // ParAnd computes the intersection (AND) of all provided bitmaps in parallel, // where the parameter "parallelism" determines how many workers are to be used // (if it is set to 0, a default number of workers is chosen) func ParAnd(parallelism int, bitmaps ...*Bitmap) *Bitmap { bitmapCount := len(bitmaps) if bitmapCount == 0 { return NewBitmap() } else if bitmapCount == 1 { return bitmaps[0].Clone() } if parallelism == 0 { parallelism = defaultWorkerCount } h := newBitmapContainerHeap(bitmaps...) bitmapChan := make(chan *Bitmap) inputChan := make(chan multipleContainers, 128) resultChan := make(chan keyedContainer, 32) expectedKeysChan := make(chan int) andFunc := func() { // Assumes only structs with >=2 containers are passed for input := range inputChan { c := input.containers[0].and(input.containers[1]) for _, next := range input.containers[2:] { if c.getCardinality() == 0 { break } c = c.iand(next) } // Send a nil explicitly if the result of the intersection is an empty container if c.getCardinality() == 0 { c = nil } kx := keyedContainer{ input.key, c, input.idx, } resultChan <- kx } } go appenderRoutine(bitmapChan, resultChan, expectedKeysChan) for i := 0; i < parallelism; i++ { go andFunc() } idx := 0 for h.Len() > 0 { ck := h.Next(make([]container, 0, 4)) if len(ck.containers) == bitmapCount { ck.idx = idx inputChan <- ck idx++ } } expectedKeysChan <- idx bitmap := <-bitmapChan close(inputChan) close(resultChan) close(expectedKeysChan) return bitmap } // ParOr computes the union (OR) of all provided bitmaps in parallel, // where the parameter "parallelism" determines how many workers are to be used // (if it is set to 0, a default number of workers is chosen) func ParOr(parallelism int, bitmaps ...*Bitmap) *Bitmap { var lKey uint16 = MaxUint16 var hKey uint16 bitmapsFiltered := bitmaps[:0] for _, b := range bitmaps { if !b.IsEmpty() { bitmapsFiltered = append(bitmapsFiltered, b) } } bitmaps = bitmapsFiltered for _, b := range bitmaps { lKey = minOfUint16(lKey, b.highlowcontainer.keys[0]) hKey = maxOfUint16(hKey, b.highlowcontainer.keys[b.highlowcontainer.size()-1]) } if lKey == MaxUint16 && hKey == 0 { return New() } else if len(bitmaps) == 1 { return bitmaps[0] } keyRange := hKey - lKey + 1 if keyRange == 1 { // revert to FastOr. Since the key range is 0 // no container-level aggregation parallelism is achievable return FastOr(bitmaps...) } if parallelism == 0 { parallelism = defaultWorkerCount } var chunkSize int var chunkCount int if parallelism*4 > int(keyRange) { chunkSize = 1 chunkCount = int(keyRange) } else { chunkCount = parallelism * 4 chunkSize = (int(keyRange) + chunkCount - 1) / chunkCount } if chunkCount*chunkSize < int(keyRange) { // it's fine to panic to indicate an implementation error panic(fmt.Sprintf("invariant check failed: chunkCount * chunkSize < keyRange, %d * %d < %d", chunkCount, chunkSize, keyRange)) } chunks := make([]*roaringArray, chunkCount) chunkSpecChan := make(chan parChunkSpec, minOfInt(maxOfInt(64, 2*parallelism), int(chunkCount))) chunkChan := make(chan parChunk, minOfInt(32, int(chunkCount))) orFunc := func() { for spec := range chunkSpecChan { ra := lazyOrOnRange(&bitmaps[0].highlowcontainer, &bitmaps[1].highlowcontainer, spec.start, spec.end) for _, b := range bitmaps[2:] { ra = lazyIOrOnRange(ra, &b.highlowcontainer, spec.start, spec.end) } for i, c := range ra.containers { ra.containers[i] = repairAfterLazy(c) } chunkChan <- parChunk{ra, spec.idx} } } for i := 0; i < parallelism; i++ { go orFunc() } go func() { for i := 0; i < chunkCount; i++ { spec := parChunkSpec{ start: uint16(int(lKey) + i*chunkSize), end: uint16(minOfInt(int(lKey)+(i+1)*chunkSize-1, int(hKey))), idx: int(i), } chunkSpecChan <- spec } }() chunksRemaining := chunkCount for chunk := range chunkChan { chunks[chunk.idx] = chunk.ra chunksRemaining-- if chunksRemaining == 0 { break } } close(chunkChan) close(chunkSpecChan) containerCount := 0 for _, chunk := range chunks { containerCount += chunk.size() } result := Bitmap{ roaringArray{ containers: make([]container, containerCount), keys: make([]uint16, containerCount), needCopyOnWrite: make([]bool, containerCount), }, } resultOffset := 0 for _, chunk := range chunks { copy(result.highlowcontainer.containers[resultOffset:], chunk.containers) copy(result.highlowcontainer.keys[resultOffset:], chunk.keys) copy(result.highlowcontainer.needCopyOnWrite[resultOffset:], chunk.needCopyOnWrite) resultOffset += chunk.size() } return &result } type parChunkSpec struct { start uint16 end uint16 idx int } type parChunk struct { ra *roaringArray idx int } func (c parChunk) size() int { return c.ra.size() } func parNaiveStartAt(ra *roaringArray, start uint16, last uint16) int { for idx, key := range ra.keys { if key >= start && key <= last { return idx } else if key > last { break } } return ra.size() } func lazyOrOnRange(ra1, ra2 *roaringArray, start, last uint16) *roaringArray { answer := newRoaringArray() length1 := ra1.size() length2 := ra2.size() idx1 := parNaiveStartAt(ra1, start, last) idx2 := parNaiveStartAt(ra2, start, last) var key1 uint16 var key2 uint16 if idx1 < length1 && idx2 < length2 { key1 = ra1.getKeyAtIndex(idx1) key2 = ra2.getKeyAtIndex(idx2) for key1 <= last && key2 <= last { if key1 < key2 { answer.appendCopy(*ra1, idx1) idx1++ if idx1 == length1 { break } key1 = ra1.getKeyAtIndex(idx1) } else if key1 > key2 { answer.appendCopy(*ra2, idx2) idx2++ if idx2 == length2 { break } key2 = ra2.getKeyAtIndex(idx2) } else { c1 := ra1.getFastContainerAtIndex(idx1, false) answer.appendContainer(key1, c1.lazyOR(ra2.getContainerAtIndex(idx2)), false) idx1++ idx2++ if idx1 == length1 || idx2 == length2 { break } key1 = ra1.getKeyAtIndex(idx1) key2 = ra2.getKeyAtIndex(idx2) } } } if idx2 < length2 { key2 = ra2.getKeyAtIndex(idx2) for key2 <= last { answer.appendCopy(*ra2, idx2) idx2++ if idx2 == length2 { break } key2 = ra2.getKeyAtIndex(idx2) } } if idx1 < length1 { key1 = ra1.getKeyAtIndex(idx1) for key1 <= last { answer.appendCopy(*ra1, idx1) idx1++ if idx1 == length1 { break } key1 = ra1.getKeyAtIndex(idx1) } } return answer } func lazyIOrOnRange(ra1, ra2 *roaringArray, start, last uint16) *roaringArray { length1 := ra1.size() length2 := ra2.size() idx1 := 0 idx2 := parNaiveStartAt(ra2, start, last) var key1 uint16 var key2 uint16 if idx1 < length1 && idx2 < length2 { key1 = ra1.getKeyAtIndex(idx1) key2 = ra2.getKeyAtIndex(idx2) for key1 <= last && key2 <= last { if key1 < key2 { idx1++ if idx1 >= length1 { break } key1 = ra1.getKeyAtIndex(idx1) } else if key1 > key2 { ra1.insertNewKeyValueAt(idx1, key2, ra2.getContainerAtIndex(idx2)) ra1.needCopyOnWrite[idx1] = true idx2++ idx1++ length1++ if idx2 >= length2 { break } key2 = ra2.getKeyAtIndex(idx2) } else { c1 := ra1.getFastContainerAtIndex(idx1, true) ra1.containers[idx1] = c1.lazyIOR(ra2.getContainerAtIndex(idx2)) ra1.needCopyOnWrite[idx1] = false idx1++ idx2++ if idx1 >= length1 || idx2 >= length2 { break } key1 = ra1.getKeyAtIndex(idx1) key2 = ra2.getKeyAtIndex(idx2) } } } if idx2 < length2 { key2 = ra2.getKeyAtIndex(idx2) for key2 <= last { ra1.appendCopy(*ra2, idx2) idx2++ if idx2 >= length2 { break } key2 = ra2.getKeyAtIndex(idx2) } } return ra1 } roaring-0.4.21/parallel_benchmark_test.go 0000664 0000000 0000000 00000002012 13542657257 0020446 0 ustar 00root root 0000000 0000000 package roaring import ( "math/rand" "testing" ) func BenchmarkIntersectionLargeParallel(b *testing.B) { b.StopTimer() initsize := 650000 r := rand.New(rand.NewSource(0)) s1 := NewBitmap() sz := 150 * 1000 * 1000 for i := 0; i < initsize; i++ { s1.Add(uint32(r.Int31n(int32(sz)))) } s2 := NewBitmap() sz = 100 * 1000 * 1000 for i := 0; i < initsize; i++ { s2.Add(uint32(r.Int31n(int32(sz)))) } b.StartTimer() card := uint64(0) for j := 0; j < b.N; j++ { s3 := ParAnd(0, s1, s2) card = card + s3.GetCardinality() } } func BenchmarkIntersectionLargeRoaring(b *testing.B) { b.StopTimer() initsize := 650000 r := rand.New(rand.NewSource(0)) s1 := NewBitmap() sz := 150 * 1000 * 1000 for i := 0; i < initsize; i++ { s1.Add(uint32(r.Int31n(int32(sz)))) } s2 := NewBitmap() sz = 100 * 1000 * 1000 for i := 0; i < initsize; i++ { s2.Add(uint32(r.Int31n(int32(sz)))) } b.StartTimer() card := uint64(0) for j := 0; j < b.N; j++ { s3 := And(s1, s2) card = card + s3.GetCardinality() } } roaring-0.4.21/popcnt.go 0000664 0000000 0000000 00000000342 13542657257 0015110 0 ustar 00root root 0000000 0000000 // +build go1.9 // "go1.9", from Go version 1.9 onward // See https://golang.org/pkg/go/build/#hdr-Build_Constraints package roaring import "math/bits" func popcount(x uint64) uint64 { return uint64(bits.OnesCount64(x)) } roaring-0.4.21/popcnt_amd64.s 0000664 0000000 0000000 00000003312 13542657257 0015740 0 ustar 00root root 0000000 0000000 // +build amd64,!appengine,!go1.9 TEXT ·hasAsm(SB),4,$0-1 MOVQ $1, AX CPUID SHRQ $23, CX ANDQ $1, CX MOVB CX, ret+0(FP) RET #define POPCNTQ_DX_DX BYTE $0xf3; BYTE $0x48; BYTE $0x0f; BYTE $0xb8; BYTE $0xd2 TEXT ·popcntSliceAsm(SB),4,$0-32 XORQ AX, AX MOVQ s+0(FP), SI MOVQ s_len+8(FP), CX TESTQ CX, CX JZ popcntSliceEnd popcntSliceLoop: BYTE $0xf3; BYTE $0x48; BYTE $0x0f; BYTE $0xb8; BYTE $0x16 // POPCNTQ (SI), DX ADDQ DX, AX ADDQ $8, SI LOOP popcntSliceLoop popcntSliceEnd: MOVQ AX, ret+24(FP) RET TEXT ·popcntMaskSliceAsm(SB),4,$0-56 XORQ AX, AX MOVQ s+0(FP), SI MOVQ s_len+8(FP), CX TESTQ CX, CX JZ popcntMaskSliceEnd MOVQ m+24(FP), DI popcntMaskSliceLoop: MOVQ (DI), DX NOTQ DX ANDQ (SI), DX POPCNTQ_DX_DX ADDQ DX, AX ADDQ $8, SI ADDQ $8, DI LOOP popcntMaskSliceLoop popcntMaskSliceEnd: MOVQ AX, ret+48(FP) RET TEXT ·popcntAndSliceAsm(SB),4,$0-56 XORQ AX, AX MOVQ s+0(FP), SI MOVQ s_len+8(FP), CX TESTQ CX, CX JZ popcntAndSliceEnd MOVQ m+24(FP), DI popcntAndSliceLoop: MOVQ (DI), DX ANDQ (SI), DX POPCNTQ_DX_DX ADDQ DX, AX ADDQ $8, SI ADDQ $8, DI LOOP popcntAndSliceLoop popcntAndSliceEnd: MOVQ AX, ret+48(FP) RET TEXT ·popcntOrSliceAsm(SB),4,$0-56 XORQ AX, AX MOVQ s+0(FP), SI MOVQ s_len+8(FP), CX TESTQ CX, CX JZ popcntOrSliceEnd MOVQ m+24(FP), DI popcntOrSliceLoop: MOVQ (DI), DX ORQ (SI), DX POPCNTQ_DX_DX ADDQ DX, AX ADDQ $8, SI ADDQ $8, DI LOOP popcntOrSliceLoop popcntOrSliceEnd: MOVQ AX, ret+48(FP) RET TEXT ·popcntXorSliceAsm(SB),4,$0-56 XORQ AX, AX MOVQ s+0(FP), SI MOVQ s_len+8(FP), CX TESTQ CX, CX JZ popcntXorSliceEnd MOVQ m+24(FP), DI popcntXorSliceLoop: MOVQ (DI), DX XORQ (SI), DX POPCNTQ_DX_DX ADDQ DX, AX ADDQ $8, SI ADDQ $8, DI LOOP popcntXorSliceLoop popcntXorSliceEnd: MOVQ AX, ret+48(FP) RET roaring-0.4.21/popcnt_asm.go 0000664 0000000 0000000 00000002226 13542657257 0015753 0 ustar 00root root 0000000 0000000 // +build amd64,!appengine,!go1.9 package roaring // *** the following functions are defined in popcnt_amd64.s //go:noescape func hasAsm() bool // useAsm is a flag used to select the GO or ASM implementation of the popcnt function var useAsm = hasAsm() //go:noescape func popcntSliceAsm(s []uint64) uint64 //go:noescape func popcntMaskSliceAsm(s, m []uint64) uint64 //go:noescape func popcntAndSliceAsm(s, m []uint64) uint64 //go:noescape func popcntOrSliceAsm(s, m []uint64) uint64 //go:noescape func popcntXorSliceAsm(s, m []uint64) uint64 func popcntSlice(s []uint64) uint64 { if useAsm { return popcntSliceAsm(s) } return popcntSliceGo(s) } func popcntMaskSlice(s, m []uint64) uint64 { if useAsm { return popcntMaskSliceAsm(s, m) } return popcntMaskSliceGo(s, m) } func popcntAndSlice(s, m []uint64) uint64 { if useAsm { return popcntAndSliceAsm(s, m) } return popcntAndSliceGo(s, m) } func popcntOrSlice(s, m []uint64) uint64 { if useAsm { return popcntOrSliceAsm(s, m) } return popcntOrSliceGo(s, m) } func popcntXorSlice(s, m []uint64) uint64 { if useAsm { return popcntXorSliceAsm(s, m) } return popcntXorSliceGo(s, m) } roaring-0.4.21/popcnt_bench_test.go 0000664 0000000 0000000 00000000312 13542657257 0017303 0 ustar 00root root 0000000 0000000 package roaring import "testing" func BenchmarkPopcount(b *testing.B) { b.StopTimer() r := getRandomUint64Set(64) b.ResetTimer() b.StartTimer() for i := 0; i < b.N; i++ { popcntSlice(r) } } roaring-0.4.21/popcnt_compat.go 0000664 0000000 0000000 00000001002 13542657257 0016445 0 ustar 00root root 0000000 0000000 // +build !go1.9 package roaring // bit population count, take from // https://code.google.com/p/go/issues/detail?id=4988#c11 // credit: https://code.google.com/u/arnehormann/ // credit: https://play.golang.org/p/U7SogJ7psJ // credit: http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel func popcount(x uint64) uint64 { x -= (x >> 1) & 0x5555555555555555 x = (x>>2)&0x3333333333333333 + x&0x3333333333333333 x += x >> 4 x &= 0x0f0f0f0f0f0f0f0f x *= 0x0101010101010101 return x >> 56 } roaring-0.4.21/popcnt_generic.go 0000664 0000000 0000000 00000000654 13542657257 0016612 0 ustar 00root root 0000000 0000000 // +build !amd64 appengine go1.9 package roaring func popcntSlice(s []uint64) uint64 { return popcntSliceGo(s) } func popcntMaskSlice(s, m []uint64) uint64 { return popcntMaskSliceGo(s, m) } func popcntAndSlice(s, m []uint64) uint64 { return popcntAndSliceGo(s, m) } func popcntOrSlice(s, m []uint64) uint64 { return popcntOrSliceGo(s, m) } func popcntXorSlice(s, m []uint64) uint64 { return popcntXorSliceGo(s, m) } roaring-0.4.21/popcnt_slices.go 0000664 0000000 0000000 00000001235 13542657257 0016454 0 ustar 00root root 0000000 0000000 package roaring func popcntSliceGo(s []uint64) uint64 { cnt := uint64(0) for _, x := range s { cnt += popcount(x) } return cnt } func popcntMaskSliceGo(s, m []uint64) uint64 { cnt := uint64(0) for i := range s { cnt += popcount(s[i] &^ m[i]) } return cnt } func popcntAndSliceGo(s, m []uint64) uint64 { cnt := uint64(0) for i := range s { cnt += popcount(s[i] & m[i]) } return cnt } func popcntOrSliceGo(s, m []uint64) uint64 { cnt := uint64(0) for i := range s { cnt += popcount(s[i] | m[i]) } return cnt } func popcntXorSliceGo(s, m []uint64) uint64 { cnt := uint64(0) for i := range s { cnt += popcount(s[i] ^ m[i]) } return cnt } roaring-0.4.21/popcnt_slices_test.go 0000664 0000000 0000000 00000003122 13542657257 0017510 0 ustar 00root root 0000000 0000000 // +build amd64,!appengine,!go1.9 // This file tests the popcnt functions package roaring import ( "testing" "github.com/stretchr/testify/assert" ) func TestPopcntSlice(t *testing.T) { s := []uint64{2, 3, 5, 7, 11, 13, 17, 19, 23, 29} resGo := popcntSliceGo(s) resAsm := popcntSliceAsm(s) res := popcntSlice(s) assert.Equal(t, resGo, resAsm) assert.Equal(t, resGo, res) } func TestPopcntMaskSlice(t *testing.T) { s := []uint64{2, 3, 5, 7, 11, 13, 17, 19, 23, 29} m := []uint64{31, 37, 41, 43, 47, 53, 59, 61, 67, 71} resGo := popcntMaskSliceGo(s, m) resAsm := popcntMaskSliceAsm(s, m) res := popcntMaskSlice(s, m) assert.Equal(t, resGo, resAsm) assert.Equal(t, resGo, res) } func TestPopcntAndSlice(t *testing.T) { s := []uint64{2, 3, 5, 7, 11, 13, 17, 19, 23, 29} m := []uint64{31, 37, 41, 43, 47, 53, 59, 61, 67, 71} resGo := popcntAndSliceGo(s, m) resAsm := popcntAndSliceAsm(s, m) res := popcntAndSlice(s, m) assert.Equal(t, resGo, resAsm) assert.Equal(t, resGo, res) } func TestPopcntOrSlice(t *testing.T) { s := []uint64{2, 3, 5, 7, 11, 13, 17, 19, 23, 29} m := []uint64{31, 37, 41, 43, 47, 53, 59, 61, 67, 71} resGo := popcntOrSliceGo(s, m) resAsm := popcntOrSliceAsm(s, m) res := popcntOrSlice(s, m) assert.Equal(t, resGo, resAsm) assert.Equal(t, resGo, res) } func TestPopcntXorSlice(t *testing.T) { s := []uint64{2, 3, 5, 7, 11, 13, 17, 19, 23, 29} m := []uint64{31, 37, 41, 43, 47, 53, 59, 61, 67, 71} resGo := popcntXorSliceGo(s, m) resAsm := popcntXorSliceAsm(s, m) res := popcntXorSlice(s, m) assert.Equal(t, resGo, resAsm) assert.Equal(t, resGo, res) } roaring-0.4.21/priorityqueue.go 0000664 0000000 0000000 00000004221 13542657257 0016533 0 ustar 00root root 0000000 0000000 package roaring import "container/heap" ///////////// // The priorityQueue is used to keep Bitmaps sorted. //////////// type item struct { value *Bitmap index int } type priorityQueue []*item func (pq priorityQueue) Len() int { return len(pq) } func (pq priorityQueue) Less(i, j int) bool { return pq[i].value.GetSizeInBytes() < pq[j].value.GetSizeInBytes() } func (pq priorityQueue) Swap(i, j int) { pq[i], pq[j] = pq[j], pq[i] pq[i].index = i pq[j].index = j } func (pq *priorityQueue) Push(x interface{}) { n := len(*pq) item := x.(*item) item.index = n *pq = append(*pq, item) } func (pq *priorityQueue) Pop() interface{} { old := *pq n := len(old) item := old[n-1] item.index = -1 // for safety *pq = old[0 : n-1] return item } func (pq *priorityQueue) update(item *item, value *Bitmap) { item.value = value heap.Fix(pq, item.index) } ///////////// // The containerPriorityQueue is used to keep the containers of various Bitmaps sorted. //////////// type containeritem struct { value *Bitmap keyindex int index int } type containerPriorityQueue []*containeritem func (pq containerPriorityQueue) Len() int { return len(pq) } func (pq containerPriorityQueue) Less(i, j int) bool { k1 := pq[i].value.highlowcontainer.getKeyAtIndex(pq[i].keyindex) k2 := pq[j].value.highlowcontainer.getKeyAtIndex(pq[j].keyindex) if k1 != k2 { return k1 < k2 } c1 := pq[i].value.highlowcontainer.getContainerAtIndex(pq[i].keyindex) c2 := pq[j].value.highlowcontainer.getContainerAtIndex(pq[j].keyindex) return c1.getCardinality() > c2.getCardinality() } func (pq containerPriorityQueue) Swap(i, j int) { pq[i], pq[j] = pq[j], pq[i] pq[i].index = i pq[j].index = j } func (pq *containerPriorityQueue) Push(x interface{}) { n := len(*pq) item := x.(*containeritem) item.index = n *pq = append(*pq, item) } func (pq *containerPriorityQueue) Pop() interface{} { old := *pq n := len(old) item := old[n-1] item.index = -1 // for safety *pq = old[0 : n-1] return item } //func (pq *containerPriorityQueue) update(item *containeritem, value *Bitmap, keyindex int) { // item.value = value // item.keyindex = keyindex // heap.Fix(pq, item.index) //} roaring-0.4.21/real_data_benchmark_test.go 0000664 0000000 0000000 00000011144 13542657257 0020574 0 ustar 00root root 0000000 0000000 package roaring import ( "archive/zip" "bytes" "fmt" "io" "os" "path" "strconv" "strings" "testing" ) // To run these benchmarks, type BENCH_REAL_DATA=1 go test -bench BenchmarkRealData -run - var benchRealData = false var realDatasets = []string{ "census-income_srt", "census-income", "census1881_srt", "census1881", "dimension_003", "dimension_008", "dimension_033", "uscensus2000", "weather_sept_85_srt", "weather_sept_85", "wikileaks-noquotes_srt", "wikileaks-noquotes", } func init() { if envStr, ok := os.LookupEnv("BENCH_REAL_DATA"); ok { v, err := strconv.ParseBool(envStr) if err != nil { v = false } benchRealData = v } } func retrieveRealDataBitmaps(datasetName string, optimize bool) ([]*Bitmap, error) { gopath, ok := os.LookupEnv("GOPATH") if !ok { return nil, fmt.Errorf("GOPATH not set. It's required to locate real-roaring-datasets. Set GOPATH or disable BENCH_REAL_DATA") } basePath := path.Join(gopath, "src", "github.com", "RoaringBitmap", "real-roaring-datasets") if _, err := os.Stat(basePath); os.IsNotExist(err) { return nil, fmt.Errorf("real-roaring-datasets does not exist. Run `go get github.com/RoaringBitmap/real-roaring-datasets`") } datasetPath := path.Join(basePath, datasetName+".zip") if _, err := os.Stat(datasetPath); os.IsNotExist(err) { return nil, fmt.Errorf("dataset %s does not exist, tried path: %s", datasetName, datasetPath) } zipFile, err := zip.OpenReader(datasetPath) if err != nil { return nil, fmt.Errorf("error opening dataset %s zipfile, cause: %v", datasetPath, err) } defer zipFile.Close() var largestFileSize uint64 for _, f := range zipFile.File { if f.UncompressedSize64 > largestFileSize { largestFileSize = f.UncompressedSize64 } } bitmaps := make([]*Bitmap, len(zipFile.File)) buf := make([]byte, largestFileSize) var bufStep uint64 = 32768 // apparently the largest buffer zip can read for i, f := range zipFile.File { r, err := f.Open() if err != nil { return nil, fmt.Errorf("failed to read bitmap file %s from dataset %s, cause: %v", f.Name, datasetName, err) } var totalReadBytes uint64 for { var endOffset uint64 if f.UncompressedSize64 < totalReadBytes+bufStep { endOffset = f.UncompressedSize64 } else { endOffset = totalReadBytes + bufStep } readBytes, err := r.Read(buf[totalReadBytes:endOffset]) totalReadBytes += uint64(readBytes) if err == io.EOF { r.Close() break } else if err != nil { r.Close() return nil, fmt.Errorf("could not read content of file %s from dataset %s, cause: %v", f.Name, datasetName, err) } } elemsAsBytes := bytes.Split(buf[:totalReadBytes], []byte{44}) // 44 is a comma b := NewBitmap() for _, elemBytes := range elemsAsBytes { elemStr := strings.TrimSpace(string(elemBytes)) e, err := strconv.ParseUint(elemStr, 10, 32) if err != nil { r.Close() return nil, fmt.Errorf("could not parse %s as uint32. Reading %s from %s. Cause: %v", elemStr, f.Name, datasetName, err) } b.Add(uint32(e)) } if optimize { b.RunOptimize() } bitmaps[i] = b } return bitmaps, nil } func benchmarkRealDataAggregate(b *testing.B, aggregator func(b []*Bitmap) uint64) { if !benchRealData { b.SkipNow() } for _, dataset := range realDatasets { b.Run(dataset, func(b *testing.B) { bitmaps, err := retrieveRealDataBitmaps(dataset, true) if err != nil { b.Fatal(err) } b.ResetTimer() for i := 0; i < b.N; i++ { aggregator(bitmaps) } }) } } func BenchmarkRealDataNext(b *testing.B) { benchmarkRealDataAggregate(b, func(bitmaps []*Bitmap) uint64 { tot := uint64(0) for _, b := range bitmaps { it := b.Iterator() for it.HasNext() { tot += uint64(it.Next()) } } return tot }) } func BenchmarkRealDataNextMany(b *testing.B) { benchmarkRealDataAggregate(b, func(bitmaps []*Bitmap) uint64 { tot := uint64(0) buf := make([]uint32, 4096) for _, b := range bitmaps { it := b.ManyIterator() for n := it.NextMany(buf); n != 0; n = it.NextMany(buf) { for _, v := range buf[:n] { tot += uint64(v) } } } return tot }) } func BenchmarkRealDataParOr(b *testing.B) { benchmarkRealDataAggregate(b, func(bitmaps []*Bitmap) uint64 { return ParOr(0, bitmaps...).GetCardinality() //return ParHeapOr(0, bitmaps...).GetCardinality() }) } func BenchmarkRealDataParHeapOr(b *testing.B) { benchmarkRealDataAggregate(b, func(bitmaps []*Bitmap) uint64 { return ParHeapOr(0, bitmaps...).GetCardinality() }) } func BenchmarkRealDataFastOr(b *testing.B) { benchmarkRealDataAggregate(b, func(bitmaps []*Bitmap) uint64 { return FastOr(bitmaps...).GetCardinality() }) } roaring-0.4.21/roaring.go 0000664 0000000 0000000 00000127637 13542657257 0015267 0 ustar 00root root 0000000 0000000 // Package roaring is an implementation of Roaring Bitmaps in Go. // They provide fast compressed bitmap data structures (also called bitset). // They are ideally suited to represent sets of integers over // relatively small ranges. // See http://roaringbitmap.org for details. package roaring import ( "bytes" "encoding/base64" "fmt" "io" "strconv" "sync" ) // Bitmap represents a compressed bitmap where you can add integers. type Bitmap struct { highlowcontainer roaringArray } // ToBase64 serializes a bitmap as Base64 func (rb *Bitmap) ToBase64() (string, error) { buf := new(bytes.Buffer) _, err := rb.WriteTo(buf) return base64.StdEncoding.EncodeToString(buf.Bytes()), err } // FromBase64 deserializes a bitmap from Base64 func (rb *Bitmap) FromBase64(str string) (int64, error) { data, err := base64.StdEncoding.DecodeString(str) if err != nil { return 0, err } buf := bytes.NewBuffer(data) return rb.ReadFrom(buf) } // WriteTo writes a serialized version of this bitmap to stream. // The format is compatible with other RoaringBitmap // implementations (Java, C) and is documented here: // https://github.com/RoaringBitmap/RoaringFormatSpec func (rb *Bitmap) WriteTo(stream io.Writer) (int64, error) { return rb.highlowcontainer.writeTo(stream) } // ToBytes returns an array of bytes corresponding to what is written // when calling WriteTo func (rb *Bitmap) ToBytes() ([]byte, error) { return rb.highlowcontainer.toBytes() } // Deprecated: WriteToMsgpack writes a msgpack2/snappy-streaming compressed serialized // version of this bitmap to stream. The format is not // compatible with the WriteTo() format, and is // experimental: it may produce smaller on disk // footprint and/or be faster to read, depending // on your content. Currently only the Go roaring // implementation supports this format. func (rb *Bitmap) WriteToMsgpack(stream io.Writer) (int64, error) { return 0, rb.highlowcontainer.writeToMsgpack(stream) } // ReadFrom reads a serialized version of this bitmap from stream. // The format is compatible with other RoaringBitmap // implementations (Java, C) and is documented here: // https://github.com/RoaringBitmap/RoaringFormatSpec func (rb *Bitmap) ReadFrom(reader io.Reader) (p int64, err error) { stream := byteInputAdapterPool.Get().(*byteInputAdapter) stream.reset(reader) p, err = rb.highlowcontainer.readFrom(stream) byteInputAdapterPool.Put(stream) return } // FromBuffer creates a bitmap from its serialized version stored in buffer // // The format specification is available here: // https://github.com/RoaringBitmap/RoaringFormatSpec // // The provided byte array (buf) is expected to be a constant. // The function makes the best effort attempt not to copy data. // You should take care not to modify buff as it will // likely result in unexpected program behavior. // // Resulting bitmaps are effectively immutable in the following sense: // a copy-on-write marker is used so that when you modify the resulting // bitmap, copies of selected data (containers) are made. // You should *not* change the copy-on-write status of the resulting // bitmaps (SetCopyOnWrite). // // If buf becomes unavailable, then a bitmap created with // FromBuffer would be effectively broken. Furthermore, any // bitmap derived from this bitmap (e.g., via Or, And) might // also be broken. Thus, before making buf unavailable, you should // call CloneCopyOnWriteContainers on all such bitmaps. // func (rb *Bitmap) FromBuffer(buf []byte) (p int64, err error) { stream := byteBufferPool.Get().(*byteBuffer) stream.reset(buf) p, err = rb.highlowcontainer.readFrom(stream) byteBufferPool.Put(stream) return } var ( byteBufferPool = sync.Pool{ New: func() interface{} { return &byteBuffer{} }, } byteInputAdapterPool = sync.Pool{ New: func() interface{} { return &byteInputAdapter{} }, } ) // RunOptimize attempts to further compress the runs of consecutive values found in the bitmap func (rb *Bitmap) RunOptimize() { rb.highlowcontainer.runOptimize() } // HasRunCompression returns true if the bitmap benefits from run compression func (rb *Bitmap) HasRunCompression() bool { return rb.highlowcontainer.hasRunCompression() } // Deprecated: ReadFromMsgpack reads a msgpack2/snappy-streaming serialized // version of this bitmap from stream. The format is // expected is that written by the WriteToMsgpack() // call; see additional notes there. func (rb *Bitmap) ReadFromMsgpack(stream io.Reader) (int64, error) { return 0, rb.highlowcontainer.readFromMsgpack(stream) } // MarshalBinary implements the encoding.BinaryMarshaler interface for the bitmap // (same as ToBytes) func (rb *Bitmap) MarshalBinary() ([]byte, error) { return rb.ToBytes() } // UnmarshalBinary implements the encoding.BinaryUnmarshaler interface for the bitmap func (rb *Bitmap) UnmarshalBinary(data []byte) error { r := bytes.NewReader(data) _, err := rb.ReadFrom(r) return err } // NewBitmap creates a new empty Bitmap (see also New) func NewBitmap() *Bitmap { return &Bitmap{} } // New creates a new empty Bitmap (same as NewBitmap) func New() *Bitmap { return &Bitmap{} } // Clear resets the Bitmap to be logically empty, but may retain // some memory allocations that may speed up future operations func (rb *Bitmap) Clear() { rb.highlowcontainer.clear() } // ToArray creates a new slice containing all of the integers stored in the Bitmap in sorted order func (rb *Bitmap) ToArray() []uint32 { array := make([]uint32, rb.GetCardinality()) pos := 0 pos2 := 0 for pos < rb.highlowcontainer.size() { hs := uint32(rb.highlowcontainer.getKeyAtIndex(pos)) << 16 c := rb.highlowcontainer.getContainerAtIndex(pos) pos++ c.fillLeastSignificant16bits(array, pos2, hs) pos2 += c.getCardinality() } return array } // GetSizeInBytes estimates the memory usage of the Bitmap. Note that this // might differ slightly from the amount of bytes required for persistent storage func (rb *Bitmap) GetSizeInBytes() uint64 { size := uint64(8) for _, c := range rb.highlowcontainer.containers { size += uint64(2) + uint64(c.getSizeInBytes()) } return size } // GetSerializedSizeInBytes computes the serialized size in bytes // of the Bitmap. It should correspond to the // number of bytes written when invoking WriteTo. You can expect // that this function is much cheaper computationally than WriteTo. func (rb *Bitmap) GetSerializedSizeInBytes() uint64 { return rb.highlowcontainer.serializedSizeInBytes() } // BoundSerializedSizeInBytes returns an upper bound on the serialized size in bytes // assuming that one wants to store "cardinality" integers in [0, universe_size) func BoundSerializedSizeInBytes(cardinality uint64, universeSize uint64) uint64 { contnbr := (universeSize + uint64(65535)) / uint64(65536) if contnbr > cardinality { contnbr = cardinality // we can't have more containers than we have values } headermax := 8*contnbr + 4 if 4 > (contnbr+7)/8 { headermax += 4 } else { headermax += (contnbr + 7) / 8 } valsarray := uint64(arrayContainerSizeInBytes(int(cardinality))) valsbitmap := contnbr * uint64(bitmapContainerSizeInBytes()) valsbest := valsarray if valsbest > valsbitmap { valsbest = valsbitmap } return valsbest + headermax } // IntIterable allows you to iterate over the values in a Bitmap type IntIterable interface { HasNext() bool Next() uint32 } // IntPeekable allows you to look at the next value without advancing and // advance as long as the next value is smaller than minval type IntPeekable interface { IntIterable // PeekNext peeks the next value without advancing the iterator PeekNext() uint32 // AdvanceIfNeeded advances as long as the next value is smaller than minval AdvanceIfNeeded(minval uint32) } type intIterator struct { pos int hs uint32 iter shortPeekable highlowcontainer *roaringArray } // HasNext returns true if there are more integers to iterate over func (ii *intIterator) HasNext() bool { return ii.pos < ii.highlowcontainer.size() } func (ii *intIterator) init() { if ii.highlowcontainer.size() > ii.pos { ii.iter = ii.highlowcontainer.getContainerAtIndex(ii.pos).getShortIterator() ii.hs = uint32(ii.highlowcontainer.getKeyAtIndex(ii.pos)) << 16 } } // Next returns the next integer func (ii *intIterator) Next() uint32 { x := uint32(ii.iter.next()) | ii.hs if !ii.iter.hasNext() { ii.pos = ii.pos + 1 ii.init() } return x } // PeekNext peeks the next value without advancing the iterator func (ii *intIterator) PeekNext() uint32 { return uint32(ii.iter.peekNext()&maxLowBit) | ii.hs } // AdvanceIfNeeded advances as long as the next value is smaller than minval func (ii *intIterator) AdvanceIfNeeded(minval uint32) { to := minval >> 16 for ii.HasNext() && (ii.hs>>16) < to { ii.pos++ ii.init() } if ii.HasNext() && (ii.hs>>16) == to { ii.iter.advanceIfNeeded(lowbits(minval)) if !ii.iter.hasNext() { ii.pos++ ii.init() } } } func newIntIterator(a *Bitmap) *intIterator { p := new(intIterator) p.pos = 0 p.highlowcontainer = &a.highlowcontainer p.init() return p } type intReverseIterator struct { pos int hs uint32 iter shortIterable highlowcontainer *roaringArray } // HasNext returns true if there are more integers to iterate over func (ii *intReverseIterator) HasNext() bool { return ii.pos >= 0 } func (ii *intReverseIterator) init() { if ii.pos >= 0 { ii.iter = ii.highlowcontainer.getContainerAtIndex(ii.pos).getReverseIterator() ii.hs = uint32(ii.highlowcontainer.getKeyAtIndex(ii.pos)) << 16 } else { ii.iter = nil } } // Next returns the next integer func (ii *intReverseIterator) Next() uint32 { x := uint32(ii.iter.next()) | ii.hs if !ii.iter.hasNext() { ii.pos = ii.pos - 1 ii.init() } return x } func newIntReverseIterator(a *Bitmap) *intReverseIterator { p := new(intReverseIterator) p.highlowcontainer = &a.highlowcontainer p.pos = a.highlowcontainer.size() - 1 p.init() return p } // ManyIntIterable allows you to iterate over the values in a Bitmap type ManyIntIterable interface { // pass in a buffer to fill up with values, returns how many values were returned NextMany([]uint32) int } type manyIntIterator struct { pos int hs uint32 iter manyIterable highlowcontainer *roaringArray } func (ii *manyIntIterator) init() { if ii.highlowcontainer.size() > ii.pos { ii.iter = ii.highlowcontainer.getContainerAtIndex(ii.pos).getManyIterator() ii.hs = uint32(ii.highlowcontainer.getKeyAtIndex(ii.pos)) << 16 } else { ii.iter = nil } } func (ii *manyIntIterator) NextMany(buf []uint32) int { n := 0 for n < len(buf) { if ii.iter == nil { break } moreN := ii.iter.nextMany(ii.hs, buf[n:]) n += moreN if moreN == 0 { ii.pos = ii.pos + 1 ii.init() } } return n } func newManyIntIterator(a *Bitmap) *manyIntIterator { p := new(manyIntIterator) p.pos = 0 p.highlowcontainer = &a.highlowcontainer p.init() return p } // String creates a string representation of the Bitmap func (rb *Bitmap) String() string { // inspired by https://github.com/fzandona/goroar/ var buffer bytes.Buffer start := []byte("{") buffer.Write(start) i := rb.Iterator() counter := 0 if i.HasNext() { counter = counter + 1 buffer.WriteString(strconv.FormatInt(int64(i.Next()), 10)) } for i.HasNext() { buffer.WriteString(",") counter = counter + 1 // to avoid exhausting the memory if counter > 0x40000 { buffer.WriteString("...") break } buffer.WriteString(strconv.FormatInt(int64(i.Next()), 10)) } buffer.WriteString("}") return buffer.String() } // Iterator creates a new IntPeekable to iterate over the integers contained in the bitmap, in sorted order; // the iterator becomes invalid if the bitmap is modified (e.g., with Add or Remove). func (rb *Bitmap) Iterator() IntPeekable { return newIntIterator(rb) } // ReverseIterator creates a new IntIterable to iterate over the integers contained in the bitmap, in sorted order; // the iterator becomes invalid if the bitmap is modified (e.g., with Add or Remove). func (rb *Bitmap) ReverseIterator() IntIterable { return newIntReverseIterator(rb) } // ManyIterator creates a new ManyIntIterable to iterate over the integers contained in the bitmap, in sorted order; // the iterator becomes invalid if the bitmap is modified (e.g., with Add or Remove). func (rb *Bitmap) ManyIterator() ManyIntIterable { return newManyIntIterator(rb) } // Clone creates a copy of the Bitmap func (rb *Bitmap) Clone() *Bitmap { ptr := new(Bitmap) ptr.highlowcontainer = *rb.highlowcontainer.clone() return ptr } // Minimum get the smallest value stored in this roaring bitmap, assumes that it is not empty func (rb *Bitmap) Minimum() uint32 { return uint32(rb.highlowcontainer.containers[0].minimum()) | (uint32(rb.highlowcontainer.keys[0]) << 16) } // Maximum get the largest value stored in this roaring bitmap, assumes that it is not empty func (rb *Bitmap) Maximum() uint32 { lastindex := len(rb.highlowcontainer.containers) - 1 return uint32(rb.highlowcontainer.containers[lastindex].maximum()) | (uint32(rb.highlowcontainer.keys[lastindex]) << 16) } // Contains returns true if the integer is contained in the bitmap func (rb *Bitmap) Contains(x uint32) bool { hb := highbits(x) c := rb.highlowcontainer.getContainer(hb) return c != nil && c.contains(lowbits(x)) } // ContainsInt returns true if the integer is contained in the bitmap (this is a convenience method, the parameter is casted to uint32 and Contains is called) func (rb *Bitmap) ContainsInt(x int) bool { return rb.Contains(uint32(x)) } // Equals returns true if the two bitmaps contain the same integers func (rb *Bitmap) Equals(o interface{}) bool { srb, ok := o.(*Bitmap) if ok { return srb.highlowcontainer.equals(rb.highlowcontainer) } return false } // AddOffset adds the value 'offset' to each and every value in a bitmap, generating a new bitmap in the process func AddOffset(x *Bitmap, offset uint32) (answer *Bitmap) { containerOffset := highbits(offset) inOffset := lowbits(offset) if inOffset == 0 { answer = x.Clone() for pos := 0; pos < answer.highlowcontainer.size(); pos++ { key := answer.highlowcontainer.getKeyAtIndex(pos) key += containerOffset answer.highlowcontainer.keys[pos] = key } } else { answer = New() for pos := 0; pos < x.highlowcontainer.size(); pos++ { key := x.highlowcontainer.getKeyAtIndex(pos) key += containerOffset c := x.highlowcontainer.getContainerAtIndex(pos) offsetted := c.addOffset(inOffset) if offsetted[0].getCardinality() > 0 { curSize := answer.highlowcontainer.size() lastkey := uint16(0) if curSize > 0 { lastkey = answer.highlowcontainer.getKeyAtIndex(curSize - 1) } if curSize > 0 && lastkey == key { prev := answer.highlowcontainer.getContainerAtIndex(curSize - 1) orrseult := prev.ior(offsetted[0]) answer.highlowcontainer.setContainerAtIndex(curSize-1, orrseult) } else { answer.highlowcontainer.appendContainer(key, offsetted[0], false) } } if offsetted[1].getCardinality() > 0 { answer.highlowcontainer.appendContainer(key+1, offsetted[1], false) } } } return answer } // Add the integer x to the bitmap func (rb *Bitmap) Add(x uint32) { hb := highbits(x) ra := &rb.highlowcontainer i := ra.getIndex(hb) if i >= 0 { var c container c = ra.getWritableContainerAtIndex(i).iaddReturnMinimized(lowbits(x)) rb.highlowcontainer.setContainerAtIndex(i, c) } else { newac := newArrayContainer() rb.highlowcontainer.insertNewKeyValueAt(-i-1, hb, newac.iaddReturnMinimized(lowbits(x))) } } // add the integer x to the bitmap, return the container and its index func (rb *Bitmap) addwithptr(x uint32) (int, container) { hb := highbits(x) ra := &rb.highlowcontainer i := ra.getIndex(hb) var c container if i >= 0 { c = ra.getWritableContainerAtIndex(i).iaddReturnMinimized(lowbits(x)) rb.highlowcontainer.setContainerAtIndex(i, c) return i, c } newac := newArrayContainer() c = newac.iaddReturnMinimized(lowbits(x)) rb.highlowcontainer.insertNewKeyValueAt(-i-1, hb, c) return -i - 1, c } // CheckedAdd adds the integer x to the bitmap and return true if it was added (false if the integer was already present) func (rb *Bitmap) CheckedAdd(x uint32) bool { // TODO: add unit tests for this method hb := highbits(x) i := rb.highlowcontainer.getIndex(hb) if i >= 0 { C := rb.highlowcontainer.getWritableContainerAtIndex(i) oldcard := C.getCardinality() C = C.iaddReturnMinimized(lowbits(x)) rb.highlowcontainer.setContainerAtIndex(i, C) return C.getCardinality() > oldcard } newac := newArrayContainer() rb.highlowcontainer.insertNewKeyValueAt(-i-1, hb, newac.iaddReturnMinimized(lowbits(x))) return true } // AddInt adds the integer x to the bitmap (convenience method: the parameter is casted to uint32 and we call Add) func (rb *Bitmap) AddInt(x int) { rb.Add(uint32(x)) } // Remove the integer x from the bitmap func (rb *Bitmap) Remove(x uint32) { hb := highbits(x) i := rb.highlowcontainer.getIndex(hb) if i >= 0 { c := rb.highlowcontainer.getWritableContainerAtIndex(i).iremoveReturnMinimized(lowbits(x)) rb.highlowcontainer.setContainerAtIndex(i, c) if rb.highlowcontainer.getContainerAtIndex(i).getCardinality() == 0 { rb.highlowcontainer.removeAtIndex(i) } } } // CheckedRemove removes the integer x from the bitmap and return true if the integer was effectively remove (and false if the integer was not present) func (rb *Bitmap) CheckedRemove(x uint32) bool { // TODO: add unit tests for this method hb := highbits(x) i := rb.highlowcontainer.getIndex(hb) if i >= 0 { C := rb.highlowcontainer.getWritableContainerAtIndex(i) oldcard := C.getCardinality() C = C.iremoveReturnMinimized(lowbits(x)) rb.highlowcontainer.setContainerAtIndex(i, C) if rb.highlowcontainer.getContainerAtIndex(i).getCardinality() == 0 { rb.highlowcontainer.removeAtIndex(i) return true } return C.getCardinality() < oldcard } return false } // IsEmpty returns true if the Bitmap is empty (it is faster than doing (GetCardinality() == 0)) func (rb *Bitmap) IsEmpty() bool { return rb.highlowcontainer.size() == 0 } // GetCardinality returns the number of integers contained in the bitmap func (rb *Bitmap) GetCardinality() uint64 { size := uint64(0) for _, c := range rb.highlowcontainer.containers { size += uint64(c.getCardinality()) } return size } // Rank returns the number of integers that are smaller or equal to x (Rank(infinity) would be GetCardinality()) func (rb *Bitmap) Rank(x uint32) uint64 { size := uint64(0) for i := 0; i < rb.highlowcontainer.size(); i++ { key := rb.highlowcontainer.getKeyAtIndex(i) if key > highbits(x) { return size } if key < highbits(x) { size += uint64(rb.highlowcontainer.getContainerAtIndex(i).getCardinality()) } else { return size + uint64(rb.highlowcontainer.getContainerAtIndex(i).rank(lowbits(x))) } } return size } // Select returns the xth integer in the bitmap func (rb *Bitmap) Select(x uint32) (uint32, error) { if rb.GetCardinality() <= uint64(x) { return 0, fmt.Errorf("can't find %dth integer in a bitmap with only %d items", x, rb.GetCardinality()) } remaining := x for i := 0; i < rb.highlowcontainer.size(); i++ { c := rb.highlowcontainer.getContainerAtIndex(i) if remaining >= uint32(c.getCardinality()) { remaining -= uint32(c.getCardinality()) } else { key := rb.highlowcontainer.getKeyAtIndex(i) return uint32(key)<<16 + uint32(c.selectInt(uint16(remaining))), nil } } return 0, fmt.Errorf("can't find %dth integer in a bitmap with only %d items", x, rb.GetCardinality()) } // And computes the intersection between two bitmaps and stores the result in the current bitmap func (rb *Bitmap) And(x2 *Bitmap) { pos1 := 0 pos2 := 0 intersectionsize := 0 length1 := rb.highlowcontainer.size() length2 := x2.highlowcontainer.size() main: for { if pos1 < length1 && pos2 < length2 { s1 := rb.highlowcontainer.getKeyAtIndex(pos1) s2 := x2.highlowcontainer.getKeyAtIndex(pos2) for { if s1 == s2 { c1 := rb.highlowcontainer.getWritableContainerAtIndex(pos1) c2 := x2.highlowcontainer.getContainerAtIndex(pos2) diff := c1.iand(c2) if diff.getCardinality() > 0 { rb.highlowcontainer.replaceKeyAndContainerAtIndex(intersectionsize, s1, diff, false) intersectionsize++ } pos1++ pos2++ if (pos1 == length1) || (pos2 == length2) { break main } s1 = rb.highlowcontainer.getKeyAtIndex(pos1) s2 = x2.highlowcontainer.getKeyAtIndex(pos2) } else if s1 < s2 { pos1 = rb.highlowcontainer.advanceUntil(s2, pos1) if pos1 == length1 { break main } s1 = rb.highlowcontainer.getKeyAtIndex(pos1) } else { //s1 > s2 pos2 = x2.highlowcontainer.advanceUntil(s1, pos2) if pos2 == length2 { break main } s2 = x2.highlowcontainer.getKeyAtIndex(pos2) } } } else { break } } rb.highlowcontainer.resize(intersectionsize) } // OrCardinality returns the cardinality of the union between two bitmaps, bitmaps are not modified func (rb *Bitmap) OrCardinality(x2 *Bitmap) uint64 { pos1 := 0 pos2 := 0 length1 := rb.highlowcontainer.size() length2 := x2.highlowcontainer.size() answer := uint64(0) main: for { if (pos1 < length1) && (pos2 < length2) { s1 := rb.highlowcontainer.getKeyAtIndex(pos1) s2 := x2.highlowcontainer.getKeyAtIndex(pos2) for { if s1 < s2 { answer += uint64(rb.highlowcontainer.getContainerAtIndex(pos1).getCardinality()) pos1++ if pos1 == length1 { break main } s1 = rb.highlowcontainer.getKeyAtIndex(pos1) } else if s1 > s2 { answer += uint64(x2.highlowcontainer.getContainerAtIndex(pos2).getCardinality()) pos2++ if pos2 == length2 { break main } s2 = x2.highlowcontainer.getKeyAtIndex(pos2) } else { // TODO: could be faster if we did not have to materialize the container answer += uint64(rb.highlowcontainer.getContainerAtIndex(pos1).or(x2.highlowcontainer.getContainerAtIndex(pos2)).getCardinality()) pos1++ pos2++ if (pos1 == length1) || (pos2 == length2) { break main } s1 = rb.highlowcontainer.getKeyAtIndex(pos1) s2 = x2.highlowcontainer.getKeyAtIndex(pos2) } } } else { break } } for ; pos1 < length1; pos1++ { answer += uint64(rb.highlowcontainer.getContainerAtIndex(pos1).getCardinality()) } for ; pos2 < length2; pos2++ { answer += uint64(x2.highlowcontainer.getContainerAtIndex(pos2).getCardinality()) } return answer } // AndCardinality returns the cardinality of the intersection between two bitmaps, bitmaps are not modified func (rb *Bitmap) AndCardinality(x2 *Bitmap) uint64 { pos1 := 0 pos2 := 0 answer := uint64(0) length1 := rb.highlowcontainer.size() length2 := x2.highlowcontainer.size() main: for { if pos1 < length1 && pos2 < length2 { s1 := rb.highlowcontainer.getKeyAtIndex(pos1) s2 := x2.highlowcontainer.getKeyAtIndex(pos2) for { if s1 == s2 { c1 := rb.highlowcontainer.getContainerAtIndex(pos1) c2 := x2.highlowcontainer.getContainerAtIndex(pos2) answer += uint64(c1.andCardinality(c2)) pos1++ pos2++ if (pos1 == length1) || (pos2 == length2) { break main } s1 = rb.highlowcontainer.getKeyAtIndex(pos1) s2 = x2.highlowcontainer.getKeyAtIndex(pos2) } else if s1 < s2 { pos1 = rb.highlowcontainer.advanceUntil(s2, pos1) if pos1 == length1 { break main } s1 = rb.highlowcontainer.getKeyAtIndex(pos1) } else { //s1 > s2 pos2 = x2.highlowcontainer.advanceUntil(s1, pos2) if pos2 == length2 { break main } s2 = x2.highlowcontainer.getKeyAtIndex(pos2) } } } else { break } } return answer } // Intersects checks whether two bitmap intersects, bitmaps are not modified func (rb *Bitmap) Intersects(x2 *Bitmap) bool { pos1 := 0 pos2 := 0 length1 := rb.highlowcontainer.size() length2 := x2.highlowcontainer.size() main: for { if pos1 < length1 && pos2 < length2 { s1 := rb.highlowcontainer.getKeyAtIndex(pos1) s2 := x2.highlowcontainer.getKeyAtIndex(pos2) for { if s1 == s2 { c1 := rb.highlowcontainer.getContainerAtIndex(pos1) c2 := x2.highlowcontainer.getContainerAtIndex(pos2) if c1.intersects(c2) { return true } pos1++ pos2++ if (pos1 == length1) || (pos2 == length2) { break main } s1 = rb.highlowcontainer.getKeyAtIndex(pos1) s2 = x2.highlowcontainer.getKeyAtIndex(pos2) } else if s1 < s2 { pos1 = rb.highlowcontainer.advanceUntil(s2, pos1) if pos1 == length1 { break main } s1 = rb.highlowcontainer.getKeyAtIndex(pos1) } else { //s1 > s2 pos2 = x2.highlowcontainer.advanceUntil(s1, pos2) if pos2 == length2 { break main } s2 = x2.highlowcontainer.getKeyAtIndex(pos2) } } } else { break } } return false } // Xor computes the symmetric difference between two bitmaps and stores the result in the current bitmap func (rb *Bitmap) Xor(x2 *Bitmap) { pos1 := 0 pos2 := 0 length1 := rb.highlowcontainer.size() length2 := x2.highlowcontainer.size() for { if (pos1 < length1) && (pos2 < length2) { s1 := rb.highlowcontainer.getKeyAtIndex(pos1) s2 := x2.highlowcontainer.getKeyAtIndex(pos2) if s1 < s2 { pos1 = rb.highlowcontainer.advanceUntil(s2, pos1) if pos1 == length1 { break } } else if s1 > s2 { c := x2.highlowcontainer.getWritableContainerAtIndex(pos2) rb.highlowcontainer.insertNewKeyValueAt(pos1, x2.highlowcontainer.getKeyAtIndex(pos2), c) length1++ pos1++ pos2++ } else { // TODO: couple be computed in-place for reduced memory usage c := rb.highlowcontainer.getContainerAtIndex(pos1).xor(x2.highlowcontainer.getContainerAtIndex(pos2)) if c.getCardinality() > 0 { rb.highlowcontainer.setContainerAtIndex(pos1, c) pos1++ } else { rb.highlowcontainer.removeAtIndex(pos1) length1-- } pos2++ } } else { break } } if pos1 == length1 { rb.highlowcontainer.appendCopyMany(x2.highlowcontainer, pos2, length2) } } // Or computes the union between two bitmaps and stores the result in the current bitmap func (rb *Bitmap) Or(x2 *Bitmap) { pos1 := 0 pos2 := 0 length1 := rb.highlowcontainer.size() length2 := x2.highlowcontainer.size() main: for (pos1 < length1) && (pos2 < length2) { s1 := rb.highlowcontainer.getKeyAtIndex(pos1) s2 := x2.highlowcontainer.getKeyAtIndex(pos2) for { if s1 < s2 { pos1++ if pos1 == length1 { break main } s1 = rb.highlowcontainer.getKeyAtIndex(pos1) } else if s1 > s2 { rb.highlowcontainer.insertNewKeyValueAt(pos1, s2, x2.highlowcontainer.getContainerAtIndex(pos2).clone()) pos1++ length1++ pos2++ if pos2 == length2 { break main } s2 = x2.highlowcontainer.getKeyAtIndex(pos2) } else { rb.highlowcontainer.replaceKeyAndContainerAtIndex(pos1, s1, rb.highlowcontainer.getWritableContainerAtIndex(pos1).ior(x2.highlowcontainer.getContainerAtIndex(pos2)), false) pos1++ pos2++ if (pos1 == length1) || (pos2 == length2) { break main } s1 = rb.highlowcontainer.getKeyAtIndex(pos1) s2 = x2.highlowcontainer.getKeyAtIndex(pos2) } } } if pos1 == length1 { rb.highlowcontainer.appendCopyMany(x2.highlowcontainer, pos2, length2) } } // AndNot computes the difference between two bitmaps and stores the result in the current bitmap func (rb *Bitmap) AndNot(x2 *Bitmap) { pos1 := 0 pos2 := 0 intersectionsize := 0 length1 := rb.highlowcontainer.size() length2 := x2.highlowcontainer.size() main: for { if pos1 < length1 && pos2 < length2 { s1 := rb.highlowcontainer.getKeyAtIndex(pos1) s2 := x2.highlowcontainer.getKeyAtIndex(pos2) for { if s1 == s2 { c1 := rb.highlowcontainer.getWritableContainerAtIndex(pos1) c2 := x2.highlowcontainer.getContainerAtIndex(pos2) diff := c1.iandNot(c2) if diff.getCardinality() > 0 { rb.highlowcontainer.replaceKeyAndContainerAtIndex(intersectionsize, s1, diff, false) intersectionsize++ } pos1++ pos2++ if (pos1 == length1) || (pos2 == length2) { break main } s1 = rb.highlowcontainer.getKeyAtIndex(pos1) s2 = x2.highlowcontainer.getKeyAtIndex(pos2) } else if s1 < s2 { c1 := rb.highlowcontainer.getContainerAtIndex(pos1) mustCopyOnWrite := rb.highlowcontainer.needsCopyOnWrite(pos1) rb.highlowcontainer.replaceKeyAndContainerAtIndex(intersectionsize, s1, c1, mustCopyOnWrite) intersectionsize++ pos1++ if pos1 == length1 { break main } s1 = rb.highlowcontainer.getKeyAtIndex(pos1) } else { //s1 > s2 pos2 = x2.highlowcontainer.advanceUntil(s1, pos2) if pos2 == length2 { break main } s2 = x2.highlowcontainer.getKeyAtIndex(pos2) } } } else { break } } // TODO:implement as a copy for pos1 < length1 { c1 := rb.highlowcontainer.getContainerAtIndex(pos1) s1 := rb.highlowcontainer.getKeyAtIndex(pos1) mustCopyOnWrite := rb.highlowcontainer.needsCopyOnWrite(pos1) rb.highlowcontainer.replaceKeyAndContainerAtIndex(intersectionsize, s1, c1, mustCopyOnWrite) intersectionsize++ pos1++ } rb.highlowcontainer.resize(intersectionsize) } // Or computes the union between two bitmaps and returns the result func Or(x1, x2 *Bitmap) *Bitmap { answer := NewBitmap() pos1 := 0 pos2 := 0 length1 := x1.highlowcontainer.size() length2 := x2.highlowcontainer.size() main: for (pos1 < length1) && (pos2 < length2) { s1 := x1.highlowcontainer.getKeyAtIndex(pos1) s2 := x2.highlowcontainer.getKeyAtIndex(pos2) for { if s1 < s2 { answer.highlowcontainer.appendCopy(x1.highlowcontainer, pos1) pos1++ if pos1 == length1 { break main } s1 = x1.highlowcontainer.getKeyAtIndex(pos1) } else if s1 > s2 { answer.highlowcontainer.appendCopy(x2.highlowcontainer, pos2) pos2++ if pos2 == length2 { break main } s2 = x2.highlowcontainer.getKeyAtIndex(pos2) } else { answer.highlowcontainer.appendContainer(s1, x1.highlowcontainer.getContainerAtIndex(pos1).or(x2.highlowcontainer.getContainerAtIndex(pos2)), false) pos1++ pos2++ if (pos1 == length1) || (pos2 == length2) { break main } s1 = x1.highlowcontainer.getKeyAtIndex(pos1) s2 = x2.highlowcontainer.getKeyAtIndex(pos2) } } } if pos1 == length1 { answer.highlowcontainer.appendCopyMany(x2.highlowcontainer, pos2, length2) } else if pos2 == length2 { answer.highlowcontainer.appendCopyMany(x1.highlowcontainer, pos1, length1) } return answer } // And computes the intersection between two bitmaps and returns the result func And(x1, x2 *Bitmap) *Bitmap { answer := NewBitmap() pos1 := 0 pos2 := 0 length1 := x1.highlowcontainer.size() length2 := x2.highlowcontainer.size() main: for pos1 < length1 && pos2 < length2 { s1 := x1.highlowcontainer.getKeyAtIndex(pos1) s2 := x2.highlowcontainer.getKeyAtIndex(pos2) for { if s1 == s2 { C := x1.highlowcontainer.getContainerAtIndex(pos1) C = C.and(x2.highlowcontainer.getContainerAtIndex(pos2)) if C.getCardinality() > 0 { answer.highlowcontainer.appendContainer(s1, C, false) } pos1++ pos2++ if (pos1 == length1) || (pos2 == length2) { break main } s1 = x1.highlowcontainer.getKeyAtIndex(pos1) s2 = x2.highlowcontainer.getKeyAtIndex(pos2) } else if s1 < s2 { pos1 = x1.highlowcontainer.advanceUntil(s2, pos1) if pos1 == length1 { break main } s1 = x1.highlowcontainer.getKeyAtIndex(pos1) } else { // s1 > s2 pos2 = x2.highlowcontainer.advanceUntil(s1, pos2) if pos2 == length2 { break main } s2 = x2.highlowcontainer.getKeyAtIndex(pos2) } } } return answer } // Xor computes the symmetric difference between two bitmaps and returns the result func Xor(x1, x2 *Bitmap) *Bitmap { answer := NewBitmap() pos1 := 0 pos2 := 0 length1 := x1.highlowcontainer.size() length2 := x2.highlowcontainer.size() for { if (pos1 < length1) && (pos2 < length2) { s1 := x1.highlowcontainer.getKeyAtIndex(pos1) s2 := x2.highlowcontainer.getKeyAtIndex(pos2) if s1 < s2 { answer.highlowcontainer.appendCopy(x1.highlowcontainer, pos1) pos1++ } else if s1 > s2 { answer.highlowcontainer.appendCopy(x2.highlowcontainer, pos2) pos2++ } else { c := x1.highlowcontainer.getContainerAtIndex(pos1).xor(x2.highlowcontainer.getContainerAtIndex(pos2)) if c.getCardinality() > 0 { answer.highlowcontainer.appendContainer(s1, c, false) } pos1++ pos2++ } } else { break } } if pos1 == length1 { answer.highlowcontainer.appendCopyMany(x2.highlowcontainer, pos2, length2) } else if pos2 == length2 { answer.highlowcontainer.appendCopyMany(x1.highlowcontainer, pos1, length1) } return answer } // AndNot computes the difference between two bitmaps and returns the result func AndNot(x1, x2 *Bitmap) *Bitmap { answer := NewBitmap() pos1 := 0 pos2 := 0 length1 := x1.highlowcontainer.size() length2 := x2.highlowcontainer.size() main: for { if pos1 < length1 && pos2 < length2 { s1 := x1.highlowcontainer.getKeyAtIndex(pos1) s2 := x2.highlowcontainer.getKeyAtIndex(pos2) for { if s1 < s2 { answer.highlowcontainer.appendCopy(x1.highlowcontainer, pos1) pos1++ if pos1 == length1 { break main } s1 = x1.highlowcontainer.getKeyAtIndex(pos1) } else if s1 == s2 { c1 := x1.highlowcontainer.getContainerAtIndex(pos1) c2 := x2.highlowcontainer.getContainerAtIndex(pos2) diff := c1.andNot(c2) if diff.getCardinality() > 0 { answer.highlowcontainer.appendContainer(s1, diff, false) } pos1++ pos2++ if (pos1 == length1) || (pos2 == length2) { break main } s1 = x1.highlowcontainer.getKeyAtIndex(pos1) s2 = x2.highlowcontainer.getKeyAtIndex(pos2) } else { //s1 > s2 pos2 = x2.highlowcontainer.advanceUntil(s1, pos2) if pos2 == length2 { break main } s2 = x2.highlowcontainer.getKeyAtIndex(pos2) } } } else { break } } if pos2 == length2 { answer.highlowcontainer.appendCopyMany(x1.highlowcontainer, pos1, length1) } return answer } // AddMany add all of the values in dat func (rb *Bitmap) AddMany(dat []uint32) { if len(dat) == 0 { return } prev := dat[0] idx, c := rb.addwithptr(prev) for _, i := range dat[1:] { if highbits(prev) == highbits(i) { c = c.iaddReturnMinimized(lowbits(i)) rb.highlowcontainer.setContainerAtIndex(idx, c) } else { idx, c = rb.addwithptr(i) } prev = i } } // BitmapOf generates a new bitmap filled with the specified integers func BitmapOf(dat ...uint32) *Bitmap { ans := NewBitmap() ans.AddMany(dat) return ans } // Flip negates the bits in the given range (i.e., [rangeStart,rangeEnd)), any integer present in this range and in the bitmap is removed, // and any integer present in the range and not in the bitmap is added. // The function uses 64-bit parameters even though a Bitmap stores 32-bit values because it is allowed and meaningful to use [0,uint64(0x100000000)) as a range // while uint64(0x100000000) cannot be represented as a 32-bit value. func (rb *Bitmap) Flip(rangeStart, rangeEnd uint64) { if rangeEnd > MaxUint32+1 { panic("rangeEnd > MaxUint32+1") } if rangeStart > MaxUint32+1 { panic("rangeStart > MaxUint32+1") } if rangeStart >= rangeEnd { return } hbStart := uint32(highbits(uint32(rangeStart))) lbStart := uint32(lowbits(uint32(rangeStart))) hbLast := uint32(highbits(uint32(rangeEnd - 1))) lbLast := uint32(lowbits(uint32(rangeEnd - 1))) var max uint32 = maxLowBit for hb := hbStart; hb <= hbLast; hb++ { var containerStart uint32 if hb == hbStart { containerStart = uint32(lbStart) } containerLast := max if hb == hbLast { containerLast = uint32(lbLast) } i := rb.highlowcontainer.getIndex(uint16(hb)) if i >= 0 { c := rb.highlowcontainer.getWritableContainerAtIndex(i).inot(int(containerStart), int(containerLast)+1) if c.getCardinality() > 0 { rb.highlowcontainer.setContainerAtIndex(i, c) } else { rb.highlowcontainer.removeAtIndex(i) } } else { // *think* the range of ones must never be // empty. rb.highlowcontainer.insertNewKeyValueAt(-i-1, uint16(hb), rangeOfOnes(int(containerStart), int(containerLast))) } } } // FlipInt calls Flip after casting the parameters (convenience method) func (rb *Bitmap) FlipInt(rangeStart, rangeEnd int) { rb.Flip(uint64(rangeStart), uint64(rangeEnd)) } // AddRange adds the integers in [rangeStart, rangeEnd) to the bitmap. // The function uses 64-bit parameters even though a Bitmap stores 32-bit values because it is allowed and meaningful to use [0,uint64(0x100000000)) as a range // while uint64(0x100000000) cannot be represented as a 32-bit value. func (rb *Bitmap) AddRange(rangeStart, rangeEnd uint64) { if rangeStart >= rangeEnd { return } if rangeEnd-1 > MaxUint32 { panic("rangeEnd-1 > MaxUint32") } hbStart := uint32(highbits(uint32(rangeStart))) lbStart := uint32(lowbits(uint32(rangeStart))) hbLast := uint32(highbits(uint32(rangeEnd - 1))) lbLast := uint32(lowbits(uint32(rangeEnd - 1))) var max uint32 = maxLowBit for hb := hbStart; hb <= hbLast; hb++ { containerStart := uint32(0) if hb == hbStart { containerStart = lbStart } containerLast := max if hb == hbLast { containerLast = lbLast } i := rb.highlowcontainer.getIndex(uint16(hb)) if i >= 0 { c := rb.highlowcontainer.getWritableContainerAtIndex(i).iaddRange(int(containerStart), int(containerLast)+1) rb.highlowcontainer.setContainerAtIndex(i, c) } else { // *think* the range of ones must never be // empty. rb.highlowcontainer.insertNewKeyValueAt(-i-1, uint16(hb), rangeOfOnes(int(containerStart), int(containerLast))) } } } // RemoveRange removes the integers in [rangeStart, rangeEnd) from the bitmap. // The function uses 64-bit parameters even though a Bitmap stores 32-bit values because it is allowed and meaningful to use [0,uint64(0x100000000)) as a range // while uint64(0x100000000) cannot be represented as a 32-bit value. func (rb *Bitmap) RemoveRange(rangeStart, rangeEnd uint64) { if rangeStart >= rangeEnd { return } if rangeEnd-1 > MaxUint32 { // logically, we should assume that the user wants to // remove all values from rangeStart to infinity // see https://github.com/RoaringBitmap/roaring/issues/141 rangeEnd = uint64(0x100000000) } hbStart := uint32(highbits(uint32(rangeStart))) lbStart := uint32(lowbits(uint32(rangeStart))) hbLast := uint32(highbits(uint32(rangeEnd - 1))) lbLast := uint32(lowbits(uint32(rangeEnd - 1))) var max uint32 = maxLowBit if hbStart == hbLast { i := rb.highlowcontainer.getIndex(uint16(hbStart)) if i < 0 { return } c := rb.highlowcontainer.getWritableContainerAtIndex(i).iremoveRange(int(lbStart), int(lbLast+1)) if c.getCardinality() > 0 { rb.highlowcontainer.setContainerAtIndex(i, c) } else { rb.highlowcontainer.removeAtIndex(i) } return } ifirst := rb.highlowcontainer.getIndex(uint16(hbStart)) ilast := rb.highlowcontainer.getIndex(uint16(hbLast)) if ifirst >= 0 { if lbStart != 0 { c := rb.highlowcontainer.getWritableContainerAtIndex(ifirst).iremoveRange(int(lbStart), int(max+1)) if c.getCardinality() > 0 { rb.highlowcontainer.setContainerAtIndex(ifirst, c) ifirst++ } } } else { ifirst = -ifirst - 1 } if ilast >= 0 { if lbLast != max { c := rb.highlowcontainer.getWritableContainerAtIndex(ilast).iremoveRange(int(0), int(lbLast+1)) if c.getCardinality() > 0 { rb.highlowcontainer.setContainerAtIndex(ilast, c) } else { ilast++ } } else { ilast++ } } else { ilast = -ilast - 1 } rb.highlowcontainer.removeIndexRange(ifirst, ilast) } // Flip negates the bits in the given range (i.e., [rangeStart,rangeEnd)), any integer present in this range and in the bitmap is removed, // and any integer present in the range and not in the bitmap is added, a new bitmap is returned leaving // the current bitmap unchanged. // The function uses 64-bit parameters even though a Bitmap stores 32-bit values because it is allowed and meaningful to use [0,uint64(0x100000000)) as a range // while uint64(0x100000000) cannot be represented as a 32-bit value. func Flip(bm *Bitmap, rangeStart, rangeEnd uint64) *Bitmap { if rangeStart >= rangeEnd { return bm.Clone() } if rangeStart > MaxUint32 { panic("rangeStart > MaxUint32") } if rangeEnd-1 > MaxUint32 { panic("rangeEnd-1 > MaxUint32") } answer := NewBitmap() hbStart := uint32(highbits(uint32(rangeStart))) lbStart := uint32(lowbits(uint32(rangeStart))) hbLast := uint32(highbits(uint32(rangeEnd - 1))) lbLast := uint32(lowbits(uint32(rangeEnd - 1))) // copy the containers before the active area answer.highlowcontainer.appendCopiesUntil(bm.highlowcontainer, uint16(hbStart)) var max uint32 = maxLowBit for hb := hbStart; hb <= hbLast; hb++ { var containerStart uint32 if hb == hbStart { containerStart = uint32(lbStart) } containerLast := max if hb == hbLast { containerLast = uint32(lbLast) } i := bm.highlowcontainer.getIndex(uint16(hb)) j := answer.highlowcontainer.getIndex(uint16(hb)) if i >= 0 { c := bm.highlowcontainer.getContainerAtIndex(i).not(int(containerStart), int(containerLast)+1) if c.getCardinality() > 0 { answer.highlowcontainer.insertNewKeyValueAt(-j-1, uint16(hb), c) } } else { // *think* the range of ones must never be // empty. answer.highlowcontainer.insertNewKeyValueAt(-j-1, uint16(hb), rangeOfOnes(int(containerStart), int(containerLast))) } } // copy the containers after the active area. answer.highlowcontainer.appendCopiesAfter(bm.highlowcontainer, uint16(hbLast)) return answer } // SetCopyOnWrite sets this bitmap to use copy-on-write so that copies are fast and memory conscious // if the parameter is true, otherwise we leave the default where hard copies are made // (copy-on-write requires extra care in a threaded context). // Calling SetCopyOnWrite(true) on a bitmap created with FromBuffer is unsafe. func (rb *Bitmap) SetCopyOnWrite(val bool) { rb.highlowcontainer.copyOnWrite = val } // GetCopyOnWrite gets this bitmap's copy-on-write property func (rb *Bitmap) GetCopyOnWrite() (val bool) { return rb.highlowcontainer.copyOnWrite } // CloneCopyOnWriteContainers clones all containers which have // needCopyOnWrite set to true. // This can be used to make sure it is safe to munmap a []byte // that the roaring array may still have a reference to, after // calling FromBuffer. // More generally this function is useful if you call FromBuffer // to construct a bitmap with a backing array buf // and then later discard the buf array. Note that you should call // CloneCopyOnWriteContainers on all bitmaps that were derived // from the 'FromBuffer' bitmap since they map have dependencies // on the buf array as well. func (rb *Bitmap) CloneCopyOnWriteContainers() { rb.highlowcontainer.cloneCopyOnWriteContainers() } // FlipInt calls Flip after casting the parameters (convenience method) func FlipInt(bm *Bitmap, rangeStart, rangeEnd int) *Bitmap { return Flip(bm, uint64(rangeStart), uint64(rangeEnd)) } // Statistics provides details on the container types in use. type Statistics struct { Cardinality uint64 Containers uint64 ArrayContainers uint64 ArrayContainerBytes uint64 ArrayContainerValues uint64 BitmapContainers uint64 BitmapContainerBytes uint64 BitmapContainerValues uint64 RunContainers uint64 RunContainerBytes uint64 RunContainerValues uint64 } // Stats returns details on container type usage in a Statistics struct. func (rb *Bitmap) Stats() Statistics { stats := Statistics{} stats.Containers = uint64(len(rb.highlowcontainer.containers)) for _, c := range rb.highlowcontainer.containers { stats.Cardinality += uint64(c.getCardinality()) switch c.(type) { case *arrayContainer: stats.ArrayContainers++ stats.ArrayContainerBytes += uint64(c.getSizeInBytes()) stats.ArrayContainerValues += uint64(c.getCardinality()) case *bitmapContainer: stats.BitmapContainers++ stats.BitmapContainerBytes += uint64(c.getSizeInBytes()) stats.BitmapContainerValues += uint64(c.getCardinality()) case *runContainer16: stats.RunContainers++ stats.RunContainerBytes += uint64(c.getSizeInBytes()) stats.RunContainerValues += uint64(c.getCardinality()) } } return stats } roaring-0.4.21/roaring_test.go 0000664 0000000 0000000 00000135576 13542657257 0016327 0 ustar 00root root 0000000 0000000 package roaring import ( "bytes" "log" "math" "math/rand" "strconv" "testing" "unsafe" "github.com/stretchr/testify/assert" "github.com/willf/bitset" ) func TestReverseIteratorCount(t *testing.T) { array := []int{2, 63, 64, 65, 4095, 4096, 4097, 4159, 4160, 4161, 5000, 20000, 66666} for _, testSize := range array { b := New() for i := uint32(0); i < uint32(testSize); i++ { b.Add(i) } it := b.ReverseIterator() count := 0 for it.HasNext() { it.Next() count++ } assert.Equal(t, testSize, count) } } func TestRoaringIntervalCheck(t *testing.T) { r := BitmapOf(1, 2, 3, 1000) rangeb := New() rangeb.AddRange(10, 1000+1) assert.True(t, r.Intersects(rangeb)) rangeb2 := New() rangeb2.AddRange(10, 1000) assert.False(t, r.Intersects(rangeb2)) } func TestRoaringRangeEnd(t *testing.T) { r := New() r.Add(MaxUint32) assert.EqualValues(t, 1, r.GetCardinality()) r.RemoveRange(0, MaxUint32) assert.EqualValues(t, 1, r.GetCardinality()) r.RemoveRange(0, math.MaxUint64) assert.EqualValues(t, 0, r.GetCardinality()) r.Add(MaxUint32) assert.EqualValues(t, 1, r.GetCardinality()) r.RemoveRange(0, 0x100000001) assert.EqualValues(t, 0, r.GetCardinality()) r.Add(MaxUint32) assert.EqualValues(t, 1, r.GetCardinality()) r.RemoveRange(0, 0x100000000) assert.EqualValues(t, 0, r.GetCardinality()) } func TestFirstLast(t *testing.T) { bm := New() bm.AddInt(2) bm.AddInt(4) bm.AddInt(8) assert.EqualValues(t, 2, bm.Minimum()) assert.EqualValues(t, 8, bm.Maximum()) i := 1 << 5 for ; i < (1 << 17); i++ { bm.AddInt(i) assert.EqualValues(t, 2, bm.Minimum()) assert.EqualValues(t, i, bm.Maximum()) } bm.RunOptimize() assert.EqualValues(t, 2, bm.Minimum()) assert.EqualValues(t, i-1, bm.Maximum()) } func TestRoaringBitmapBitmapOf(t *testing.T) { array := []uint32{5580, 33722, 44031, 57276, 83097} bmp := BitmapOf(array...) assert.EqualValues(t, len(array), bmp.GetCardinality()) by, _ := bmp.ToBytes() assert.EqualValues(t, len(by), bmp.GetSerializedSizeInBytes()) } func TestRoaringBitmapAdd(t *testing.T) { array := []uint32{5580, 33722, 44031, 57276, 83097} bmp := New() for _, v := range array { bmp.Add(v) } assert.EqualValues(t, len(array), bmp.GetCardinality()) } func TestRoaringBitmapAddMany(t *testing.T) { array := []uint32{5580, 33722, 44031, 57276, 83097} bmp := NewBitmap() bmp.AddMany(array) assert.EqualValues(t, len(array), bmp.GetCardinality()) } func TestRoaringBitmapAddOffset(t *testing.T) { array := []uint32{5580, 33722, 44031, 57276, 83097} bmp := NewBitmap() bmp.AddMany(array) offtest := uint32(25000) cop := AddOffset(bmp, offtest) assert.EqualValues(t, len(array), cop.GetCardinality()) expected := make([]uint32, len(array)) for i, x := range array { expected[i] = x + offtest } wout := cop.ToArray() assert.EqualValues(t, expected, wout) } func TestRoaringInPlaceAndNotBitmapContainer(t *testing.T) { bm := NewBitmap() for i := 0; i < 8192; i++ { bm.Add(uint32(i)) } toRemove := NewBitmap() for i := 128; i < 8192; i++ { toRemove.Add(uint32(i)) } bm.AndNot(toRemove) var b bytes.Buffer _, err := bm.WriteTo(&b) assert.NoError(t, err) bm2 := NewBitmap() bm2.ReadFrom(bytes.NewBuffer(b.Bytes())) assert.True(t, bm2.Equals(bm)) } // https://github.com/RoaringBitmap/roaring/issues/64 func TestFlip64(t *testing.T) { bm := New() bm.AddInt(0) bm.Flip(1, 2) i := bm.Iterator() assert.False(t, i.Next() != 0 || i.Next() != 1 || i.HasNext()) } // https://github.com/RoaringBitmap/roaring/issues/64 func TestFlip64Off(t *testing.T) { bm := New() bm.AddInt(10) bm.Flip(11, 12) i := bm.Iterator() assert.False(t, i.Next() != 10 || i.Next() != 11 || i.HasNext()) } func TestStringer(t *testing.T) { v := NewBitmap() for i := uint32(0); i < 10; i++ { v.Add(i) } assert.Equal(t, "{0,1,2,3,4,5,6,7,8,9}", v.String()) v.RunOptimize() assert.Equal(t, "{0,1,2,3,4,5,6,7,8,9}", v.String()) } func TestFastCard(t *testing.T) { bm := NewBitmap() bm.Add(1) bm.AddRange(21, 260000) bm2 := NewBitmap() bm2.Add(25) assert.EqualValues(t, 1, bm2.AndCardinality(bm)) assert.Equal(t, bm.GetCardinality(), bm2.OrCardinality(bm)) assert.EqualValues(t, 1, bm.AndCardinality(bm2)) assert.Equal(t, bm.GetCardinality(), bm.OrCardinality(bm2)) assert.EqualValues(t, 1, bm2.AndCardinality(bm)) assert.Equal(t, bm.GetCardinality(), bm2.OrCardinality(bm)) bm.RunOptimize() assert.EqualValues(t, 1, bm2.AndCardinality(bm)) assert.Equal(t, bm.GetCardinality(), bm2.OrCardinality(bm)) assert.EqualValues(t, 1, bm.AndCardinality(bm2)) assert.Equal(t, bm.GetCardinality(), bm.OrCardinality(bm2)) assert.EqualValues(t, 1, bm2.AndCardinality(bm)) assert.Equal(t, bm.GetCardinality(), bm2.OrCardinality(bm)) } func TestIntersects1(t *testing.T) { bm := NewBitmap() bm.Add(1) bm.AddRange(21, 26) bm2 := NewBitmap() bm2.Add(25) assert.True(t, bm2.Intersects(bm)) bm.Remove(25) assert.Equal(t, false, bm2.Intersects(bm)) bm.AddRange(1, 100000) assert.True(t, bm2.Intersects(bm)) } func TestRangePanic(t *testing.T) { bm := NewBitmap() bm.Add(1) bm.AddRange(21, 26) bm.AddRange(9, 14) bm.AddRange(11, 16) } func TestRangeRemoval(t *testing.T) { bm := NewBitmap() bm.Add(1) bm.AddRange(21, 26) bm.AddRange(9, 14) bm.RemoveRange(11, 16) bm.RemoveRange(1, 26) c := bm.GetCardinality() assert.EqualValues(t, 0, c) bm.AddRange(1, 10000) c = bm.GetCardinality() assert.EqualValues(t, 10000-1, c) bm.RemoveRange(1, 10000) c = bm.GetCardinality() assert.EqualValues(t, 0, c) } func TestRangeRemovalFromContent(t *testing.T) { bm := NewBitmap() for i := 100; i < 10000; i++ { bm.AddInt(i * 3) } bm.AddRange(21, 26) bm.AddRange(9, 14) bm.RemoveRange(11, 16) bm.RemoveRange(0, 30000) c := bm.GetCardinality() assert.EqualValues(t, 00, c) } func TestFlipOnEmpty(t *testing.T) { t.Run("TestFlipOnEmpty in-place", func(t *testing.T) { bm := NewBitmap() bm.Flip(0, 10) c := bm.GetCardinality() assert.EqualValues(t, 10, c) }) t.Run("TestFlipOnEmpty, generating new result", func(t *testing.T) { bm := NewBitmap() bm = Flip(bm, 0, 10) c := bm.GetCardinality() assert.EqualValues(t, 10, c) }) } func TestBitmapRank2(t *testing.T) { r := NewBitmap() for i := uint32(1); i < 8194; i += 2 { r.Add(i) } rank := r.Rank(63) assert.EqualValues(t, 32, rank) } func TestBitmapRank(t *testing.T) { for N := uint32(1); N <= 1048576; N *= 2 { t.Run("rank tests"+strconv.Itoa(int(N)), func(t *testing.T) { for gap := uint32(1); gap <= 65536; gap *= 2 { rb1 := NewBitmap() for x := uint32(0); x <= N; x += gap { rb1.Add(x) } for y := uint32(0); y <= N; y++ { if rb1.Rank(y) != uint64((y+1+gap-1)/gap) { assert.Equal(t, (y+1+gap-1)/gap, rb1.Rank(y)) } } } }) } } func TestBitmapSelect(t *testing.T) { for N := uint32(1); N <= 1048576; N *= 2 { t.Run("rank tests"+strconv.Itoa(int(N)), func(t *testing.T) { for gap := uint32(1); gap <= 65536; gap *= 2 { rb1 := NewBitmap() for x := uint32(0); x <= N; x += gap { rb1.Add(x) } for y := uint32(0); y <= N/gap; y++ { expectedInt := y * gap i, err := rb1.Select(y) if err != nil { t.Fatal(err) } if i != expectedInt { assert.Equal(t, expectedInt, i) } } } }) } } // some extra tests func TestBitmapExtra(t *testing.T) { for N := uint32(1); N <= 65536; N *= 2 { t.Run("extra tests"+strconv.Itoa(int(N)), func(t *testing.T) { for gap := uint32(1); gap <= 65536; gap *= 2 { bs1 := bitset.New(0) rb1 := NewBitmap() for x := uint32(0); x <= N; x += gap { bs1.Set(uint(x)) rb1.Add(x) } assert.EqualValues(t, rb1.GetCardinality(), bs1.Count()) assert.True(t, equalsBitSet(bs1, rb1)) for offset := uint32(1); offset <= gap; offset *= 2 { bs2 := bitset.New(0) rb2 := NewBitmap() for x := uint32(0); x <= N; x += gap { bs2.Set(uint(x + offset)) rb2.Add(x + offset) } assert.EqualValues(t, rb2.GetCardinality(), bs2.Count()) assert.True(t, equalsBitSet(bs2, rb2)) clonebs1 := bs1.Clone() clonebs1.InPlaceIntersection(bs2) if !equalsBitSet(clonebs1, And(rb1, rb2)) { v := rb1.Clone() v.And(rb2) assert.True(t, equalsBitSet(clonebs1, v)) } // testing OR clonebs1 = bs1.Clone() clonebs1.InPlaceUnion(bs2) assert.True(t, equalsBitSet(clonebs1, Or(rb1, rb2))) // testing XOR clonebs1 = bs1.Clone() clonebs1.InPlaceSymmetricDifference(bs2) assert.True(t, equalsBitSet(clonebs1, Xor(rb1, rb2))) //testing NOTAND clonebs1 = bs1.Clone() clonebs1.InPlaceDifference(bs2) assert.True(t, equalsBitSet(clonebs1, AndNot(rb1, rb2))) } } }) } } func FlipRange(start, end int, bs *bitset.BitSet) { for i := start; i < end; i++ { bs.Flip(uint(i)) } } func TestBitmap(t *testing.T) { t.Run("Test Contains", func(t *testing.T) { rbm1 := NewBitmap() for k := 0; k < 1000; k++ { rbm1.AddInt(17 * k) } for k := 0; k < 17*1000; k++ { assert.Equal(t, (k/17*17 == k), rbm1.ContainsInt(k)) } }) t.Run("Test Clone", func(t *testing.T) { rb1 := NewBitmap() rb1.Add(10) rb2 := rb1.Clone() rb2.Remove(10) assert.True(t, rb1.Contains(10)) }) t.Run("Test run array not equal", func(t *testing.T) { rb := NewBitmap() rb2 := NewBitmap() rb.AddRange(0, 1<<16) for i := 0; i < 10; i++ { rb2.AddInt(i) } assert.EqualValues(t, 1<<16, rb.GetCardinality()) assert.EqualValues(t, 10, rb2.GetCardinality()) assert.False(t, rb.Equals(rb2)) rb.RunOptimize() rb2.RunOptimize() assert.EqualValues(t, 1<<16, rb.GetCardinality()) assert.EqualValues(t, 10, rb2.GetCardinality()) assert.False(t, rb.Equals(rb2)) }) t.Run("Test ANDNOT4", func(t *testing.T) { rb := NewBitmap() rb2 := NewBitmap() for i := 0; i < 200000; i += 4 { rb2.AddInt(i) } for i := 200000; i < 400000; i += 14 { rb2.AddInt(i) } off := AndNot(rb2, rb) andNotresult := AndNot(rb, rb2) assert.True(t, rb.Equals(andNotresult)) assert.True(t, rb2.Equals(off)) rb2.AndNot(rb) assert.True(t, rb2.Equals(off)) }) t.Run("Test AND", func(t *testing.T) { rr := NewBitmap() for k := 0; k < 4000; k++ { rr.AddInt(k) } rr.Add(100000) rr.Add(110000) rr2 := NewBitmap() rr2.Add(13) rrand := And(rr, rr2) array := rrand.ToArray() assert.Equal(t, 1, len(array)) assert.EqualValues(t, 13, array[0]) rr.And(rr2) array = rr.ToArray() assert.Equal(t, 1, len(array)) assert.EqualValues(t, 13, array[0]) }) t.Run("Test AND 2", func(t *testing.T) { rr := NewBitmap() for k := 4000; k < 4256; k++ { rr.AddInt(k) } for k := 65536; k < 65536+4000; k++ { rr.AddInt(k) } for k := 3 * 65536; k < 3*65536+9000; k++ { rr.AddInt(k) } for k := 4 * 65535; k < 4*65535+7000; k++ { rr.AddInt(k) } for k := 6 * 65535; k < 6*65535+10000; k++ { rr.AddInt(k) } for k := 8 * 65535; k < 8*65535+1000; k++ { rr.AddInt(k) } for k := 9 * 65535; k < 9*65535+30000; k++ { rr.AddInt(k) } rr2 := NewBitmap() for k := 4000; k < 4256; k++ { rr2.AddInt(k) } for k := 65536; k < 65536+4000; k++ { rr2.AddInt(k) } for k := 3*65536 + 2000; k < 3*65536+6000; k++ { rr2.AddInt(k) } for k := 6 * 65535; k < 6*65535+1000; k++ { rr2.AddInt(k) } for k := 7 * 65535; k < 7*65535+1000; k++ { rr2.AddInt(k) } for k := 10 * 65535; k < 10*65535+5000; k++ { rr2.AddInt(k) } correct := And(rr, rr2) rr.And(rr2) assert.True(t, correct.Equals(rr)) }) t.Run("Test AND 2", func(t *testing.T) { rr := NewBitmap() for k := 0; k < 4000; k++ { rr.AddInt(k) } rr.AddInt(100000) rr.AddInt(110000) rr2 := NewBitmap() rr2.AddInt(13) rrand := And(rr, rr2) array := rrand.ToArray() assert.Equal(t, 1, len(array)) assert.EqualValues(t, 13, array[0]) }) t.Run("Test AND 3a", func(t *testing.T) { rr := NewBitmap() rr2 := NewBitmap() for k := 6 * 65536; k < 6*65536+10000; k++ { rr.AddInt(k) } for k := 6 * 65536; k < 6*65536+1000; k++ { rr2.AddInt(k) } result := And(rr, rr2) assert.EqualValues(t, 1000, result.GetCardinality()) }) t.Run("Test AND 3", func(t *testing.T) { var arrayand [11256]uint32 //393,216 pos := 0 rr := NewBitmap() for k := 4000; k < 4256; k++ { rr.AddInt(k) } for k := 65536; k < 65536+4000; k++ { rr.AddInt(k) } for k := 3 * 65536; k < 3*65536+1000; k++ { rr.AddInt(k) } for k := 3*65536 + 1000; k < 3*65536+7000; k++ { rr.AddInt(k) } for k := 3*65536 + 7000; k < 3*65536+9000; k++ { rr.AddInt(k) } for k := 4 * 65536; k < 4*65536+7000; k++ { rr.AddInt(k) } for k := 8 * 65536; k < 8*65536+1000; k++ { rr.AddInt(k) } for k := 9 * 65536; k < 9*65536+30000; k++ { rr.AddInt(k) } rr2 := NewBitmap() for k := 4000; k < 4256; k++ { rr2.AddInt(k) arrayand[pos] = uint32(k) pos++ } for k := 65536; k < 65536+4000; k++ { rr2.AddInt(k) arrayand[pos] = uint32(k) pos++ } for k := 3*65536 + 1000; k < 3*65536+7000; k++ { rr2.AddInt(k) arrayand[pos] = uint32(k) pos++ } for k := 6 * 65536; k < 6*65536+10000; k++ { rr.AddInt(k) } for k := 6 * 65536; k < 6*65536+1000; k++ { rr2.AddInt(k) arrayand[pos] = uint32(k) pos++ } for k := 7 * 65536; k < 7*65536+1000; k++ { rr2.AddInt(k) } for k := 10 * 65536; k < 10*65536+5000; k++ { rr2.AddInt(k) } rrand := And(rr, rr2) arrayres := rrand.ToArray() ok := true for i := range arrayres { if i < len(arrayand) { if arrayres[i] != arrayand[i] { log.Println(i, arrayres[i], arrayand[i]) ok = false } } else { log.Println('x', arrayres[i]) ok = false } } assert.Equal(t, len(arrayres), len(arrayand)) assert.True(t, ok) }) t.Run("Test AND 4", func(t *testing.T) { rb := NewBitmap() rb2 := NewBitmap() for i := 0; i < 200000; i += 4 { rb2.AddInt(i) } for i := 200000; i < 400000; i += 14 { rb2.AddInt(i) } //TODO: Bitmap.And(bm,bm2) andresult := And(rb, rb2) off := And(rb2, rb) assert.True(t, andresult.Equals(off)) assert.EqualValues(t, 0, andresult.GetCardinality()) for i := 500000; i < 600000; i += 14 { rb.AddInt(i) } for i := 200000; i < 400000; i += 3 { rb2.AddInt(i) } andresult2 := And(rb, rb2) assert.EqualValues(t, 0, andresult.GetCardinality()) assert.EqualValues(t, 0, andresult2.GetCardinality()) for i := 0; i < 200000; i += 4 { rb.AddInt(i) } for i := 200000; i < 400000; i += 14 { rb.AddInt(i) } assert.EqualValues(t, 0, andresult.GetCardinality()) rc := And(rb, rb2) rb.And(rb2) assert.Equal(t, rb.GetCardinality(), rc.GetCardinality()) }) t.Run("ArrayContainerCardinalityTest", func(t *testing.T) { ac := newArrayContainer() for k := uint16(0); k < 100; k++ { ac.iadd(k) assert.EqualValues(t, k+1, ac.getCardinality()) } for k := uint16(0); k < 100; k++ { ac.iadd(k) assert.EqualValues(t, 100, ac.getCardinality()) } }) t.Run("or test", func(t *testing.T) { rr := NewBitmap() for k := 0; k < 4000; k++ { rr.AddInt(k) } rr2 := NewBitmap() for k := 4000; k < 8000; k++ { rr2.AddInt(k) } result := Or(rr, rr2) assert.Equal(t, rr.GetCardinality()+rr2.GetCardinality(), result.GetCardinality()) }) t.Run("basic test", func(t *testing.T) { rr := NewBitmap() var a [4002]uint32 pos := 0 for k := 0; k < 4000; k++ { rr.AddInt(k) a[pos] = uint32(k) pos++ } rr.AddInt(100000) a[pos] = 100000 pos++ rr.AddInt(110000) a[pos] = 110000 pos++ array := rr.ToArray() ok := true for i := range a { if array[i] != a[i] { log.Println("rr : ", array[i], " a : ", a[i]) ok = false } } assert.Equal(t, len(a), len(array)) assert.True(t, ok) }) t.Run("BitmapContainerCardinalityTest", func(t *testing.T) { ac := newBitmapContainer() for k := uint16(0); k < 100; k++ { ac.iadd(k) assert.EqualValues(t, k+1, ac.getCardinality()) } for k := uint16(0); k < 100; k++ { ac.iadd(k) assert.EqualValues(t, 100, ac.getCardinality()) } }) t.Run("BitmapContainerTest", func(t *testing.T) { rr := newBitmapContainer() rr.iadd(uint16(110)) rr.iadd(uint16(114)) rr.iadd(uint16(115)) var array [3]uint16 pos := 0 for itr := rr.getShortIterator(); itr.hasNext(); { array[pos] = itr.next() pos++ } assert.EqualValues(t, 110, array[0]) assert.EqualValues(t, 114, array[1]) assert.EqualValues(t, 115, array[2]) }) t.Run("cardinality test", func(t *testing.T) { N := 1024 for gap := 7; gap < 100000; gap *= 10 { for offset := 2; offset <= 1024; offset *= 2 { rb := NewBitmap() for k := 0; k < N; k++ { rb.AddInt(k * gap) assert.EqualValues(t, k+1, rb.GetCardinality()) } assert.EqualValues(t, N, rb.GetCardinality()) // check the add of existing values for k := 0; k < N; k++ { rb.AddInt(k * gap) assert.EqualValues(t, N, rb.GetCardinality()) } rb2 := NewBitmap() for k := 0; k < N; k++ { rb2.AddInt(k * gap * offset) assert.EqualValues(t, k+1, rb2.GetCardinality()) } assert.EqualValues(t, N, rb2.GetCardinality()) for k := 0; k < N; k++ { rb2.AddInt(k * gap * offset) assert.EqualValues(t, N, rb2.GetCardinality()) } assert.EqualValues(t, N/offset, And(rb, rb2).GetCardinality()) assert.EqualValues(t, 2*N-2*N/offset, Xor(rb, rb2).GetCardinality()) assert.EqualValues(t, 2*N-N/offset, Or(rb, rb2).GetCardinality()) } } }) t.Run("clear test", func(t *testing.T) { rb := NewBitmap() for i := 0; i < 200000; i += 7 { // dense rb.AddInt(i) } for i := 200000; i < 400000; i += 177 { // sparse rb.AddInt(i) } rb2 := NewBitmap() rb3 := NewBitmap() for i := 0; i < 200000; i += 4 { rb2.AddInt(i) } for i := 200000; i < 400000; i += 14 { rb2.AddInt(i) } rb.Clear() assert.EqualValues(t, 0, rb.GetCardinality()) assert.NotEqual(t, 0, rb2.GetCardinality()) rb.AddInt(4) rb3.AddInt(4) andresult := And(rb, rb2) orresult := Or(rb, rb2) assert.EqualValues(t, 1, andresult.GetCardinality()) assert.Equal(t, rb2.GetCardinality(), orresult.GetCardinality()) for i := 0; i < 200000; i += 4 { rb.AddInt(i) rb3.AddInt(i) } for i := 200000; i < 400000; i += 114 { rb.AddInt(i) rb3.AddInt(i) } arrayrr := rb.ToArray() arrayrr3 := rb3.ToArray() ok := true for i := range arrayrr { if arrayrr[i] != arrayrr3[i] { ok = false } } assert.Equal(t, len(arrayrr3), len(arrayrr)) assert.True(t, ok) }) t.Run("constainer factory ", func(t *testing.T) { bc1 := newBitmapContainer() bc2 := newBitmapContainer() bc3 := newBitmapContainer() ac1 := newArrayContainer() ac2 := newArrayContainer() ac3 := newArrayContainer() for i := 0; i < 5000; i++ { bc1.iadd(uint16(i * 70)) } for i := 0; i < 5000; i++ { bc2.iadd(uint16(i * 70)) } for i := 0; i < 5000; i++ { bc3.iadd(uint16(i * 70)) } for i := 0; i < 4000; i++ { ac1.iadd(uint16(i * 50)) } for i := 0; i < 4000; i++ { ac2.iadd(uint16(i * 50)) } for i := 0; i < 4000; i++ { ac3.iadd(uint16(i * 50)) } rbc := ac1.clone().(*arrayContainer).toBitmapContainer() assert.True(t, validate(rbc, ac1)) rbc = ac2.clone().(*arrayContainer).toBitmapContainer() assert.True(t, validate(rbc, ac2)) rbc = ac3.clone().(*arrayContainer).toBitmapContainer() assert.True(t, validate(rbc, ac3)) }) t.Run("flipTest1 ", func(t *testing.T) { rb := NewBitmap() rb.Flip(100000, 200000) // in-place on empty bitmap rbcard := rb.GetCardinality() assert.EqualValues(t, 100000, rbcard) bs := bitset.New(20000 - 10000) for i := uint(100000); i < 200000; i++ { bs.Set(i) } assert.True(t, equalsBitSet(bs, rb)) }) t.Run("flipTest1A", func(t *testing.T) { rb := NewBitmap() rb1 := Flip(rb, 100000, 200000) rbcard := rb1.GetCardinality() assert.EqualValues(t, 100000, rbcard) assert.EqualValues(t, 0, rb.GetCardinality()) bs := bitset.New(0) assert.True(t, equalsBitSet(bs, rb)) for i := uint(100000); i < 200000; i++ { bs.Set(i) } assert.True(t, equalsBitSet(bs, rb1)) }) t.Run("flipTest2", func(t *testing.T) { rb := NewBitmap() rb.Flip(100000, 100000) rbcard := rb.GetCardinality() assert.EqualValues(t, 0, rbcard) bs := bitset.New(0) assert.True(t, equalsBitSet(bs, rb)) }) t.Run("flipTest2A", func(t *testing.T) { rb := NewBitmap() rb1 := Flip(rb, 100000, 100000) rb.AddInt(1) rbcard := rb1.GetCardinality() assert.EqualValues(t, 0, rbcard) assert.EqualValues(t, 1, rb.GetCardinality()) bs := bitset.New(0) assert.True(t, equalsBitSet(bs, rb1)) bs.Set(1) assert.True(t, equalsBitSet(bs, rb)) }) t.Run("flipTest3A", func(t *testing.T) { rb := NewBitmap() rb.Flip(100000, 200000) // got 100k-199999 rb.Flip(100000, 199991) // give back 100k-199990 rbcard := rb.GetCardinality() assert.EqualValues(t, 9, rbcard) bs := bitset.New(0) for i := uint(199991); i < 200000; i++ { bs.Set(i) } assert.True(t, equalsBitSet(bs, rb)) }) t.Run("flipTest4A", func(t *testing.T) { // fits evenly on both ends rb := NewBitmap() rb.Flip(100000, 200000) // got 100k-199999 rb.Flip(65536, 4*65536) rbcard := rb.GetCardinality() // 65536 to 99999 are 1s // 200000 to 262143 are 1s: total card assert.EqualValues(t, 96608, rbcard) bs := bitset.New(0) for i := uint(65536); i < 100000; i++ { bs.Set(i) } for i := uint(200000); i < 262144; i++ { bs.Set(i) } assert.True(t, equalsBitSet(bs, rb)) }) t.Run("flipTest5", func(t *testing.T) { // fits evenly on small end, multiple // containers rb := NewBitmap() rb.Flip(100000, 132000) rb.Flip(65536, 120000) rbcard := rb.GetCardinality() // 65536 to 99999 are 1s // 120000 to 131999 assert.EqualValues(t, 46464, rbcard) bs := bitset.New(0) for i := uint(65536); i < 100000; i++ { bs.Set(i) } for i := uint(120000); i < 132000; i++ { bs.Set(i) } assert.True(t, equalsBitSet(bs, rb)) }) t.Run("flipTest6", func(t *testing.T) { rb := NewBitmap() rb1 := Flip(rb, 100000, 132000) rb2 := Flip(rb1, 65536, 120000) //rbcard := rb2.GetCardinality() bs := bitset.New(0) for i := uint(65536); i < 100000; i++ { bs.Set(i) } for i := uint(120000); i < 132000; i++ { bs.Set(i) } assert.True(t, equalsBitSet(bs, rb2)) }) t.Run("flipTest6A", func(t *testing.T) { rb := NewBitmap() rb1 := Flip(rb, 100000, 132000) rb2 := Flip(rb1, 99000, 2*65536) rbcard := rb2.GetCardinality() assert.EqualValues(t, rbcard, 1928) bs := bitset.New(0) for i := uint(99000); i < 100000; i++ { bs.Set(i) } for i := uint(2 * 65536); i < 132000; i++ { bs.Set(i) } assert.True(t, equalsBitSet(bs, rb2)) }) t.Run("flipTest7", func(t *testing.T) { // within 1 word, first container rb := NewBitmap() rb.Flip(650, 132000) rb.Flip(648, 651) rbcard := rb.GetCardinality() // 648, 649, 651-131999 assert.EqualValues(t, rbcard, 132000-651+2) bs := bitset.New(0) bs.Set(648) bs.Set(649) for i := uint(651); i < 132000; i++ { bs.Set(i) } assert.True(t, equalsBitSet(bs, rb)) }) t.Run("flipTestBig", func(t *testing.T) { numCases := 1000 rb := NewBitmap() bs := bitset.New(0) //Random r = new Random(3333); checkTime := 2.0 for i := 0; i < numCases; i++ { start := rand.Intn(65536 * 20) end := rand.Intn(65536 * 20) if rand.Float64() < float64(0.1) { end = start + rand.Intn(100) } rb.Flip(uint64(start), uint64(end)) if start < end { FlipRange(start, end, bs) // throws exception } // otherwise // insert some more ANDs to keep things sparser if rand.Float64() < 0.2 { mask := NewBitmap() mask1 := bitset.New(0) startM := rand.Intn(65536 * 20) endM := startM + 100000 mask.Flip(uint64(startM), uint64(endM)) FlipRange(startM, endM, mask1) mask.Flip(0, 65536*20+100000) FlipRange(0, 65536*20+100000, mask1) rb.And(mask) bs.InPlaceIntersection(mask1) } // see if we can detect incorrectly shared containers if rand.Float64() < 0.1 { irrelevant := Flip(rb, 10, 100000) irrelevant.Flip(5, 200000) irrelevant.Flip(190000, 260000) } if float64(i) > checkTime { assert.True(t, equalsBitSet(bs, rb)) checkTime *= 1.5 } } }) t.Run("ortest", func(t *testing.T) { rr := NewBitmap() for k := 0; k < 4000; k++ { rr.AddInt(k) } rr.AddInt(100000) rr.AddInt(110000) rr2 := NewBitmap() for k := 0; k < 4000; k++ { rr2.AddInt(k) } rror := Or(rr, rr2) array := rror.ToArray() rr.Or(rr2) arrayirr := rr.ToArray() assert.True(t, IntsEquals(array, arrayirr)) }) t.Run("ORtest", func(t *testing.T) { rr := NewBitmap() for k := 4000; k < 4256; k++ { rr.AddInt(k) } for k := 65536; k < 65536+4000; k++ { rr.AddInt(k) } for k := 3 * 65536; k < 3*65536+9000; k++ { rr.AddInt(k) } for k := 4 * 65535; k < 4*65535+7000; k++ { rr.AddInt(k) } for k := 6 * 65535; k < 6*65535+10000; k++ { rr.AddInt(k) } for k := 8 * 65535; k < 8*65535+1000; k++ { rr.AddInt(k) } for k := 9 * 65535; k < 9*65535+30000; k++ { rr.AddInt(k) } rr2 := NewBitmap() for k := 4000; k < 4256; k++ { rr2.AddInt(k) } for k := 65536; k < 65536+4000; k++ { rr2.AddInt(k) } for k := 3*65536 + 2000; k < 3*65536+6000; k++ { rr2.AddInt(k) } for k := 6 * 65535; k < 6*65535+1000; k++ { rr2.AddInt(k) } for k := 7 * 65535; k < 7*65535+1000; k++ { rr2.AddInt(k) } for k := 10 * 65535; k < 10*65535+5000; k++ { rr2.AddInt(k) } correct := Or(rr, rr2) rr.Or(rr2) assert.True(t, correct.Equals(rr)) }) t.Run("ortest2", func(t *testing.T) { arrayrr := make([]uint32, 4000+4000+2) pos := 0 rr := NewBitmap() for k := 0; k < 4000; k++ { rr.AddInt(k) arrayrr[pos] = uint32(k) pos++ } rr.AddInt(100000) rr.AddInt(110000) rr2 := NewBitmap() for k := 4000; k < 8000; k++ { rr2.AddInt(k) arrayrr[pos] = uint32(k) pos++ } arrayrr[pos] = 100000 pos++ arrayrr[pos] = 110000 pos++ rror := Or(rr, rr2) arrayor := rror.ToArray() assert.True(t, IntsEquals(arrayor, arrayrr)) }) t.Run("ortest3", func(t *testing.T) { V1 := make(map[int]bool) V2 := make(map[int]bool) rr := NewBitmap() rr2 := NewBitmap() for k := 0; k < 4000; k++ { rr2.AddInt(k) V1[k] = true } for k := 3500; k < 4500; k++ { rr.AddInt(k) V1[k] = true } for k := 4000; k < 65000; k++ { rr2.AddInt(k) V1[k] = true } // In the second node of each roaring bitmap, we have two bitmap // containers. // So, we will check the union between two BitmapContainers for k := 65536; k < 65536+10000; k++ { rr.AddInt(k) V1[k] = true } for k := 65536; k < 65536+14000; k++ { rr2.AddInt(k) V1[k] = true } // In the 3rd node of each Roaring Bitmap, we have an // ArrayContainer, so, we will try the union between two // ArrayContainers. for k := 4 * 65535; k < 4*65535+1000; k++ { rr.AddInt(k) V1[k] = true } for k := 4 * 65535; k < 4*65535+800; k++ { rr2.AddInt(k) V1[k] = true } // For the rest, we will check if the union will take them in // the result for k := 6 * 65535; k < 6*65535+1000; k++ { rr.AddInt(k) V1[k] = true } for k := 7 * 65535; k < 7*65535+2000; k++ { rr2.AddInt(k) V1[k] = true } rror := Or(rr, rr2) valide := true for _, k := range rror.ToArray() { _, found := V1[int(k)] if !found { valide = false } V2[int(k)] = true } for k := range V1 { _, found := V2[k] if !found { valide = false } } assert.True(t, valide) }) t.Run("ortest4", func(t *testing.T) { rb := NewBitmap() rb2 := NewBitmap() for i := 0; i < 200000; i += 4 { rb2.AddInt(i) } for i := 200000; i < 400000; i += 14 { rb2.AddInt(i) } rb2card := rb2.GetCardinality() // check or against an empty bitmap orresult := Or(rb, rb2) off := Or(rb2, rb) assert.True(t, orresult.Equals(off)) assert.Equal(t, orresult.GetCardinality(), rb2card) for i := 500000; i < 600000; i += 14 { rb.AddInt(i) } for i := 200000; i < 400000; i += 3 { rb2.AddInt(i) } // check or against an empty bitmap orresult2 := Or(rb, rb2) assert.Equal(t, orresult.GetCardinality(), rb2card) assert.Equal(t, rb2.GetCardinality()+rb.GetCardinality(), orresult2.GetCardinality()) rb.Or(rb2) assert.True(t, rb.Equals(orresult2)) }) t.Run("randomTest", func(t *testing.T) { rTest(t, 15) rTest(t, 1024) rTest(t, 4096) rTest(t, 65536) rTest(t, 65536*16) }) t.Run("SimpleCardinality", func(t *testing.T) { N := 512 gap := 70 rb := NewBitmap() for k := 0; k < N; k++ { rb.AddInt(k * gap) assert.EqualValues(t, k+1, rb.GetCardinality()) } assert.EqualValues(t, N, rb.GetCardinality()) for k := 0; k < N; k++ { rb.AddInt(k * gap) assert.EqualValues(t, N, rb.GetCardinality()) } }) t.Run("XORtest", func(t *testing.T) { rr := NewBitmap() for k := 4000; k < 4256; k++ { rr.AddInt(k) } for k := 65536; k < 65536+4000; k++ { rr.AddInt(k) } for k := 3 * 65536; k < 3*65536+9000; k++ { rr.AddInt(k) } for k := 4 * 65535; k < 4*65535+7000; k++ { rr.AddInt(k) } for k := 6 * 65535; k < 6*65535+10000; k++ { rr.AddInt(k) } for k := 8 * 65535; k < 8*65535+1000; k++ { rr.AddInt(k) } for k := 9 * 65535; k < 9*65535+30000; k++ { rr.AddInt(k) } rr2 := NewBitmap() for k := 4000; k < 4256; k++ { rr2.AddInt(k) } for k := 65536; k < 65536+4000; k++ { rr2.AddInt(k) } for k := 3*65536 + 2000; k < 3*65536+6000; k++ { rr2.AddInt(k) } for k := 6 * 65535; k < 6*65535+1000; k++ { rr2.AddInt(k) } for k := 7 * 65535; k < 7*65535+1000; k++ { rr2.AddInt(k) } for k := 10 * 65535; k < 10*65535+5000; k++ { rr2.AddInt(k) } correct := Xor(rr, rr2) rr.Xor(rr2) assert.True(t, correct.Equals(rr)) }) t.Run("xortest1", func(t *testing.T) { V1 := make(map[int]bool) V2 := make(map[int]bool) rr := NewBitmap() rr2 := NewBitmap() // For the first 65536: rr2 has a bitmap container, and rr has // an array container. // We will check the union between a BitmapCintainer and an // arrayContainer for k := 0; k < 4000; k++ { rr2.AddInt(k) if k < 3500 { V1[k] = true } } for k := 3500; k < 4500; k++ { rr.AddInt(k) } for k := 4000; k < 65000; k++ { rr2.AddInt(k) if k >= 4500 { V1[k] = true } } for k := 65536; k < 65536+30000; k++ { rr.AddInt(k) } for k := 65536; k < 65536+50000; k++ { rr2.AddInt(k) if k >= 65536+30000 { V1[k] = true } } // In the 3rd node of each Roaring Bitmap, we have an // ArrayContainer. So, we will try the union between two // ArrayContainers. for k := 4 * 65535; k < 4*65535+1000; k++ { rr.AddInt(k) if k >= (4*65535 + 800) { V1[k] = true } } for k := 4 * 65535; k < 4*65535+800; k++ { rr2.AddInt(k) } for k := 6 * 65535; k < 6*65535+1000; k++ { rr.AddInt(k) V1[k] = true } for k := 7 * 65535; k < 7*65535+2000; k++ { rr2.AddInt(k) V1[k] = true } rrxor := Xor(rr, rr2) valide := true for _, i := range rrxor.ToArray() { _, found := V1[int(i)] if !found { valide = false } V2[int(i)] = true } for k := range V1 { _, found := V2[k] if !found { valide = false } } assert.True(t, valide) }) } func TestXORtest4(t *testing.T) { t.Run("XORtest 4", func(t *testing.T) { rb := NewBitmap() rb2 := NewBitmap() counter := 0 for i := 0; i < 200000; i += 4 { rb2.AddInt(i) counter++ } assert.EqualValues(t, counter, rb2.GetCardinality()) for i := 200000; i < 400000; i += 14 { rb2.AddInt(i) counter++ } assert.EqualValues(t, counter, rb2.GetCardinality()) rb2card := rb2.GetCardinality() assert.EqualValues(t, counter, rb2card) // check or against an empty bitmap xorresult := Xor(rb, rb2) assert.EqualValues(t, counter, xorresult.GetCardinality()) off := Or(rb2, rb) assert.EqualValues(t, counter, off.GetCardinality()) assert.True(t, xorresult.Equals(off)) assert.Equal(t, xorresult.GetCardinality(), rb2card) for i := 500000; i < 600000; i += 14 { rb.AddInt(i) } for i := 200000; i < 400000; i += 3 { rb2.AddInt(i) } // check or against an empty bitmap xorresult2 := Xor(rb, rb2) assert.EqualValues(t, xorresult.GetCardinality(), rb2card) assert.Equal(t, xorresult2.GetCardinality(), rb2.GetCardinality()+rb.GetCardinality()) rb.Xor(rb2) assert.True(t, xorresult2.Equals(rb)) }) //need to add the massives } func TestNextMany(t *testing.T) { count := 70000 for _, gap := range []uint32{1, 8, 32, 128} { expected := make([]uint32, count) { v := uint32(0) for i := range expected { expected[i] = v v += gap } } bm := BitmapOf(expected...) for _, bufSize := range []int{1, 64, 4096, count} { buf := make([]uint32, bufSize) it := bm.ManyIterator() cur := 0 for n := it.NextMany(buf); n != 0; n = it.NextMany(buf) { // much faster tests... (10s -> 5ms) if cur+n > count { assert.LessOrEqual(t, count, cur+n) } for i, v := range buf[:n] { // much faster tests... if v != expected[cur+i] { assert.Equal(t, expected[cur+i], v) } } cur += n } assert.Equal(t, count, cur) } } } func TestBigRandom(t *testing.T) { rTest(t, 15) rTest(t, 100) rTest(t, 512) rTest(t, 1023) rTest(t, 1025) rTest(t, 4095) rTest(t, 4096) rTest(t, 4097) rTest(t, 65536) rTest(t, 65536*16) } func rTest(t *testing.T, N int) { log.Println("rtest N=", N) for gap := 1; gap <= 65536; gap *= 2 { bs1 := bitset.New(0) rb1 := NewBitmap() for x := 0; x <= N; x += gap { bs1.Set(uint(x)) rb1.AddInt(x) } assert.EqualValues(t, rb1.GetCardinality(), bs1.Count()) assert.True(t, equalsBitSet(bs1, rb1)) for offset := 1; offset <= gap; offset *= 2 { bs2 := bitset.New(0) rb2 := NewBitmap() for x := 0; x <= N; x += gap { bs2.Set(uint(x + offset)) rb2.AddInt(x + offset) } assert.EqualValues(t, rb2.GetCardinality(), bs2.Count()) assert.True(t, equalsBitSet(bs2, rb2)) clonebs1 := bs1.Clone() clonebs1.InPlaceIntersection(bs2) if !equalsBitSet(clonebs1, And(rb1, rb2)) { v := rb1.Clone() v.And(rb2) assert.True(t, equalsBitSet(clonebs1, v)) } // testing OR clonebs1 = bs1.Clone() clonebs1.InPlaceUnion(bs2) assert.True(t, equalsBitSet(clonebs1, Or(rb1, rb2))) // testing XOR clonebs1 = bs1.Clone() clonebs1.InPlaceSymmetricDifference(bs2) assert.True(t, equalsBitSet(clonebs1, Xor(rb1, rb2))) //testing NOTAND clonebs1 = bs1.Clone() clonebs1.InPlaceDifference(bs2) assert.True(t, equalsBitSet(clonebs1, AndNot(rb1, rb2))) } } } func equalsBitSet(a *bitset.BitSet, b *Bitmap) bool { for i, e := a.NextSet(0); e; i, e = a.NextSet(i + 1) { if !b.ContainsInt(int(i)) { return false } } i := b.Iterator() for i.HasNext() { if !a.Test(uint(i.Next())) { return false } } return true } func equalsArray(a []int, b *Bitmap) bool { if uint64(len(a)) != b.GetCardinality() { return false } for _, x := range a { if !b.ContainsInt(x) { return false } } return true } func IntsEquals(a, b []uint32) bool { if len(a) != len(b) { return false } for i, v := range a { if v != b[i] { return false } } return true } func validate(bc *bitmapContainer, ac *arrayContainer) bool { // Checking the cardinalities of each container if bc.getCardinality() != ac.getCardinality() { log.Println("cardinality differs") return false } // Checking that the two containers contain the same values counter := 0 for i := bc.NextSetBit(0); i >= 0; i = bc.NextSetBit(i + 1) { counter++ if !ac.contains(uint16(i)) { log.Println("content differs") log.Println(bc) log.Println(ac) return false } } // checking the cardinality of the BitmapContainer return counter == bc.getCardinality() } func TestRoaringArray(t *testing.T) { a := newRoaringArray() t.Run("Test Init", func(t *testing.T) { assert.Equal(t, 0, a.size()) }) t.Run("Test Insert", func(t *testing.T) { a.appendContainer(0, newArrayContainer(), false) assert.Equal(t, 1, a.size()) }) t.Run("Test Remove", func(t *testing.T) { a.remove(0) assert.Equal(t, 0, a.size()) }) t.Run("Test popcount Full", func(t *testing.T) { res := popcount(uint64(0xffffffffffffffff)) assert.EqualValues(t, 64, res) }) t.Run("Test popcount Empty", func(t *testing.T) { res := popcount(0) assert.EqualValues(t, 0, res) }) t.Run("Test popcount 16", func(t *testing.T) { res := popcount(0xff00ff) assert.EqualValues(t, 16, res) }) t.Run("Test ArrayContainer Add", func(t *testing.T) { ar := newArrayContainer() ar.iadd(1) assert.EqualValues(t, 1, ar.getCardinality()) }) t.Run("Test ArrayContainer Add wacky", func(t *testing.T) { ar := newArrayContainer() ar.iadd(0) ar.iadd(5000) assert.EqualValues(t, 2, ar.getCardinality()) }) t.Run("Test ArrayContainer Add Reverse", func(t *testing.T) { ar := newArrayContainer() ar.iadd(5000) ar.iadd(2048) ar.iadd(0) assert.EqualValues(t, 3, ar.getCardinality()) }) t.Run("Test BitmapContainer Add ", func(t *testing.T) { bm := newBitmapContainer() bm.iadd(0) assert.EqualValues(t, 1, bm.getCardinality()) }) } func TestFlipBigA(t *testing.T) { numCases := 1000 bs := bitset.New(0) checkTime := 2.0 rb1 := NewBitmap() rb2 := NewBitmap() for i := 0; i < numCases; i++ { start := rand.Intn(65536 * 20) end := rand.Intn(65536 * 20) if rand.Float64() < 0.1 { end = start + rand.Intn(100) } if (i & 1) == 0 { rb2 = FlipInt(rb1, start, end) // tweak the other, catch bad sharing rb1.FlipInt(rand.Intn(65536*20), rand.Intn(65536*20)) } else { rb1 = FlipInt(rb2, start, end) rb2.FlipInt(rand.Intn(65536*20), rand.Intn(65536*20)) } if start < end { FlipRange(start, end, bs) // throws exception } // otherwise // insert some more ANDs to keep things sparser if (rand.Float64() < 0.2) && (i&1) == 0 { mask := NewBitmap() mask1 := bitset.New(0) startM := rand.Intn(65536 * 20) endM := startM + 100000 mask.FlipInt(startM, endM) FlipRange(startM, endM, mask1) mask.FlipInt(0, 65536*20+100000) FlipRange(0, 65536*20+100000, mask1) rb2.And(mask) bs.InPlaceIntersection(mask1) } if float64(i) > checkTime { var rb *Bitmap if (i & 1) == 0 { rb = rb2 } else { rb = rb1 } assert.True(t, equalsBitSet(bs, rb)) checkTime *= 1.5 } } } func TestNextManyOfAddRangeAcrossContainers(t *testing.T) { rb := NewBitmap() rb.AddRange(65530, 65540) expectedCard := 10 expected := []uint32{65530, 65531, 65532, 65533, 65534, 65535, 65536, 65537, 65538, 65539, 0} // test where all values can be returned in a single buffer it := rb.ManyIterator() buf := make([]uint32, 11) n := it.NextMany(buf) assert.Equal(t, expectedCard, n) for i, e := range expected { assert.Equal(t, e, buf[i]) } // test where buf is size 1, so many iterations it = rb.ManyIterator() n = 0 buf = make([]uint32, 1) for i := 0; i < expectedCard; i++ { n = it.NextMany(buf) assert.Equal(t, 1, n) assert.Equal(t, expected[i], buf[0]) } n = it.NextMany(buf) assert.Equal(t, 0, n) } func TestDoubleAdd(t *testing.T) { t.Run("doubleadd ", func(t *testing.T) { rb := NewBitmap() rb.AddRange(65533, 65536) rb.AddRange(65530, 65536) rb2 := NewBitmap() rb2.AddRange(65530, 65536) assert.True(t, rb.Equals(rb2)) rb2.RemoveRange(65530, 65536) assert.EqualValues(t, 0, rb2.GetCardinality()) }) t.Run("doubleadd2 ", func(t *testing.T) { rb := NewBitmap() rb.AddRange(65533, 65536*20) rb.AddRange(65530, 65536*20) rb2 := NewBitmap() rb2.AddRange(65530, 65536*20) assert.True(t, rb.Equals(rb2)) rb2.RemoveRange(65530, 65536*20) assert.EqualValues(t, 0, rb2.GetCardinality()) }) t.Run("doubleadd3 ", func(t *testing.T) { rb := NewBitmap() rb.AddRange(65533, 65536*20+10) rb.AddRange(65530, 65536*20+10) rb2 := NewBitmap() rb2.AddRange(65530, 65536*20+10) assert.True(t, rb.Equals(rb2)) rb2.RemoveRange(65530, 65536*20+1) assert.EqualValues(t, 9, rb2.GetCardinality()) }) t.Run("doubleadd4 ", func(t *testing.T) { rb := NewBitmap() rb.AddRange(65533, 65536*20) rb.RemoveRange(65533+5, 65536*20) assert.EqualValues(t, 5, rb.GetCardinality()) }) t.Run("doubleadd5 ", func(t *testing.T) { rb := NewBitmap() rb.AddRange(65533, 65536*20) rb.RemoveRange(65533+5, 65536*20-5) assert.EqualValues(t, 10, rb.GetCardinality()) }) t.Run("doubleadd6 ", func(t *testing.T) { rb := NewBitmap() rb.AddRange(65533, 65536*20-5) rb.RemoveRange(65533+5, 65536*20-10) assert.EqualValues(t, 10, rb.GetCardinality()) }) t.Run("doubleadd7 ", func(t *testing.T) { rb := NewBitmap() rb.AddRange(65533, 65536*20+1) rb.RemoveRange(65533+1, 65536*20) assert.EqualValues(t, 2, rb.GetCardinality()) }) t.Run("AndNotBug01 ", func(t *testing.T) { rb1 := NewBitmap() rb1.AddRange(0, 60000) rb2 := NewBitmap() rb2.AddRange(60000-10, 60000+10) rb2.AndNot(rb1) rb3 := NewBitmap() rb3.AddRange(60000, 60000+10) assert.True(t, rb2.Equals(rb3)) }) } func TestAndNot(t *testing.T) { rr := NewBitmap() for k := 4000; k < 4256; k++ { rr.AddInt(k) } for k := 65536; k < 65536+4000; k++ { rr.AddInt(k) } for k := 3 * 65536; k < 3*65536+9000; k++ { rr.AddInt(k) } for k := 4 * 65535; k < 4*65535+7000; k++ { rr.AddInt(k) } for k := 6 * 65535; k < 6*65535+10000; k++ { rr.AddInt(k) } for k := 8 * 65535; k < 8*65535+1000; k++ { rr.AddInt(k) } for k := 9 * 65535; k < 9*65535+30000; k++ { rr.AddInt(k) } rr2 := NewBitmap() for k := 4000; k < 4256; k++ { rr2.AddInt(k) } for k := 65536; k < 65536+4000; k++ { rr2.AddInt(k) } for k := 3*65536 + 2000; k < 3*65536+6000; k++ { rr2.AddInt(k) } for k := 6 * 65535; k < 6*65535+1000; k++ { rr2.AddInt(k) } for k := 7 * 65535; k < 7*65535+1000; k++ { rr2.AddInt(k) } for k := 10 * 65535; k < 10*65535+5000; k++ { rr2.AddInt(k) } correct := AndNot(rr, rr2) rr.AndNot(rr2) assert.True(t, correct.Equals(rr)) } func TestStats(t *testing.T) { t.Run("Test Stats with empty bitmap", func(t *testing.T) { expectedStats := Statistics{} rr := NewBitmap() assert.EqualValues(t, expectedStats, rr.Stats()) }) t.Run("Test Stats with bitmap Container", func(t *testing.T) { // Given a bitmap that should have a single bitmap container expectedStats := Statistics{ Cardinality: 60000, Containers: 1, BitmapContainers: 1, BitmapContainerValues: 60000, BitmapContainerBytes: 8192, RunContainers: 0, RunContainerBytes: 0, RunContainerValues: 0, } rr := NewBitmap() for i := uint32(0); i < 60000; i++ { rr.Add(i) } assert.EqualValues(t, expectedStats, rr.Stats()) }) t.Run("Test Stats with run Container", func(t *testing.T) { // Given that we should have a single run container intSize := int(unsafe.Sizeof(int(0))) var runContainerBytes uint64 if intSize == 4 { runContainerBytes = 40 } else { runContainerBytes = 52 } expectedStats := Statistics{ Cardinality: 60000, Containers: 1, BitmapContainers: 0, BitmapContainerValues: 0, BitmapContainerBytes: 0, RunContainers: 1, RunContainerBytes: runContainerBytes, RunContainerValues: 60000, } rr := NewBitmap() rr.AddRange(0, 60000) assert.EqualValues(t, expectedStats, rr.Stats()) }) t.Run("Test Stats with Array Container", func(t *testing.T) { // Given a bitmap that should have a single array container expectedStats := Statistics{ Cardinality: 2, Containers: 1, ArrayContainers: 1, ArrayContainerValues: 2, ArrayContainerBytes: 4, } rr := NewBitmap() rr.Add(2) rr.Add(4) assert.EqualValues(t, expectedStats, rr.Stats()) }) } func TestFlipVerySmall(t *testing.T) { rb := NewBitmap() rb.Flip(0, 10) // got [0,9], card is 10 rb.Flip(0, 1) // give back the number 0, card goes to 9 rbcard := rb.GetCardinality() assert.EqualValues(t, 9, rbcard) } func TestReverseIterator(t *testing.T) { t.Run("#1", func(t *testing.T) { values := []uint32{0, 2, 15, 16, 31, 32, 33, 9999, MaxUint16, MaxUint32} bm := New() for n := 0; n < len(values); n++ { bm.Add(values[n]) } i := bm.ReverseIterator() n := len(values) - 1 for i.HasNext() { assert.EqualValues(t, i.Next(), values[n]) n-- } // HasNext() was terminating early - add test i = bm.ReverseIterator() n = len(values) - 1 for ; n >= 0; n-- { assert.EqualValues(t, i.Next(), values[n]) assert.False(t, n > 0 && !i.HasNext()) } }) t.Run("#2", func(t *testing.T) { bm := New() i := bm.ReverseIterator() assert.False(t, i.HasNext()) }) t.Run("#3", func(t *testing.T) { bm := New() bm.AddInt(0) i := bm.ReverseIterator() assert.True(t, i.HasNext()) assert.EqualValues(t, 0, i.Next()) assert.False(t, i.HasNext()) }) t.Run("#4", func(t *testing.T) { bm := New() bm.AddInt(9999) i := bm.ReverseIterator() assert.True(t, i.HasNext()) assert.EqualValues(t, 9999, i.Next()) assert.False(t, i.HasNext()) }) t.Run("#5", func(t *testing.T) { bm := New() bm.AddInt(MaxUint16) i := bm.ReverseIterator() assert.True(t, i.HasNext()) assert.EqualValues(t, MaxUint16, i.Next()) assert.False(t, i.HasNext()) }) t.Run("#6", func(t *testing.T) { bm := New() bm.Add(MaxUint32) i := bm.ReverseIterator() assert.True(t, i.HasNext()) assert.EqualValues(t, uint32(MaxUint32), i.Next()) assert.False(t, i.HasNext()) }) } func TestIteratorPeekNext(t *testing.T) { values := []uint32{0, 2, 15, 16, 31, 32, 33, 9999, MaxUint16, MaxUint32} bm := New() for n := 0; n < len(values); n++ { bm.Add(values[n]) } i := bm.Iterator() assert.True(t, i.HasNext()) for i.HasNext() { assert.Equal(t, i.PeekNext(), i.Next()) } } func TestIteratorAdvance(t *testing.T) { values := []uint32{1, 2, 15, 16, 31, 32, 33, 9999, MaxUint16} bm := New() for n := 0; n < len(values); n++ { bm.Add(values[n]) } cases := []struct { minval uint32 expected uint32 }{ {0, 1}, {1, 1}, {2, 2}, {3, 15}, {30, 31}, {33, 33}, {9998, 9999}, {MaxUint16, MaxUint16}, } t.Run("advance by using a new int iterator", func(t *testing.T) { for _, c := range cases { i := bm.Iterator() i.AdvanceIfNeeded(c.minval) assert.True(t, i.HasNext()) assert.Equal(t, c.expected, i.PeekNext()) } }) t.Run("advance by using the same int iterator", func(t *testing.T) { i := bm.Iterator() for _, c := range cases { i.AdvanceIfNeeded(c.minval) assert.True(t, i.HasNext()) assert.Equal(t, c.expected, i.PeekNext()) } }) t.Run("advance out of a container value", func(t *testing.T) { i := bm.Iterator() i.AdvanceIfNeeded(MaxUint32) assert.False(t, i.HasNext()) i.AdvanceIfNeeded(MaxUint32) assert.False(t, i.HasNext()) }) t.Run("advance on a value that is less than the pointed value", func(t *testing.T) { i := bm.Iterator() i.AdvanceIfNeeded(29) assert.True(t, i.HasNext()) assert.EqualValues(t, 31, i.PeekNext()) i.AdvanceIfNeeded(13) assert.True(t, i.HasNext()) assert.EqualValues(t, 31, i.PeekNext()) }) } func TestPackageFlipMaxRangeEnd(t *testing.T) { var empty Bitmap flipped := Flip(&empty, 0, MaxRange) assert.EqualValues(t, MaxRange, flipped.GetCardinality()) } func TestBitmapFlipMaxRangeEnd(t *testing.T) { var bm Bitmap bm.Flip(0, MaxRange) assert.EqualValues(t, MaxRange, bm.GetCardinality()) } roaring-0.4.21/roaringarray.go 0000664 0000000 0000000 00000051557 13542657257 0016323 0 ustar 00root root 0000000 0000000 package roaring import ( "bytes" "encoding/binary" "fmt" snappy "github.com/glycerine/go-unsnap-stream" "github.com/tinylib/msgp/msgp" "io" ) //go:generate msgp -unexported type container interface { addOffset(uint16) []container clone() container and(container) container andCardinality(container) int iand(container) container // i stands for inplace andNot(container) container iandNot(container) container // i stands for inplace getCardinality() int // rank returns the number of integers that are // smaller or equal to x. rank(infinity) would be getCardinality(). rank(uint16) int iadd(x uint16) bool // inplace, returns true if x was new. iaddReturnMinimized(uint16) container // may change return type to minimize storage. //addRange(start, final int) container // range is [firstOfRange,lastOfRange) (unused) iaddRange(start, endx int) container // i stands for inplace, range is [firstOfRange,endx) iremove(x uint16) bool // inplace, returns true if x was present. iremoveReturnMinimized(uint16) container // may change return type to minimize storage. not(start, final int) container // range is [firstOfRange,lastOfRange) inot(firstOfRange, endx int) container // i stands for inplace, range is [firstOfRange,endx) xor(r container) container getShortIterator() shortPeekable getReverseIterator() shortIterable getManyIterator() manyIterable contains(i uint16) bool maximum() uint16 minimum() uint16 // equals is now logical equals; it does not require the // same underlying container types, but compares across // any of the implementations. equals(r container) bool fillLeastSignificant16bits(array []uint32, i int, mask uint32) or(r container) container orCardinality(r container) int isFull() bool ior(r container) container // i stands for inplace intersects(r container) bool // whether the two containers intersect lazyOR(r container) container lazyIOR(r container) container getSizeInBytes() int //removeRange(start, final int) container // range is [firstOfRange,lastOfRange) (unused) iremoveRange(start, final int) container // i stands for inplace, range is [firstOfRange,lastOfRange) selectInt(x uint16) int // selectInt returns the xth integer in the container serializedSizeInBytes() int writeTo(io.Writer) (int, error) numberOfRuns() int toEfficientContainer() container String() string containerType() contype } type contype uint8 const ( bitmapContype contype = iota arrayContype run16Contype run32Contype ) // careful: range is [firstOfRange,lastOfRange] func rangeOfOnes(start, last int) container { if start > MaxUint16 { panic("rangeOfOnes called with start > MaxUint16") } if last > MaxUint16 { panic("rangeOfOnes called with last > MaxUint16") } if start < 0 { panic("rangeOfOnes called with start < 0") } if last < 0 { panic("rangeOfOnes called with last < 0") } return newRunContainer16Range(uint16(start), uint16(last)) } type roaringArray struct { keys []uint16 containers []container `msg:"-"` // don't try to serialize directly. needCopyOnWrite []bool copyOnWrite bool // conserz is used at serialization time // to serialize containers. Otherwise empty. conserz []containerSerz } // containerSerz facilitates serializing container (tricky to // serialize because it is an interface) by providing a // light wrapper with a type identifier. type containerSerz struct { t contype `msg:"t"` // type r msgp.Raw `msg:"r"` // Raw msgpack of the actual container type } func newRoaringArray() *roaringArray { return &roaringArray{} } // runOptimize compresses the element containers to minimize space consumed. // Q: how does this interact with copyOnWrite and needCopyOnWrite? // A: since we aren't changing the logical content, just the representation, // we don't bother to check the needCopyOnWrite bits. We replace // (possibly all) elements of ra.containers in-place with space // optimized versions. func (ra *roaringArray) runOptimize() { for i := range ra.containers { ra.containers[i] = ra.containers[i].toEfficientContainer() } } func (ra *roaringArray) appendContainer(key uint16, value container, mustCopyOnWrite bool) { ra.keys = append(ra.keys, key) ra.containers = append(ra.containers, value) ra.needCopyOnWrite = append(ra.needCopyOnWrite, mustCopyOnWrite) } func (ra *roaringArray) appendWithoutCopy(sa roaringArray, startingindex int) { mustCopyOnWrite := sa.needCopyOnWrite[startingindex] ra.appendContainer(sa.keys[startingindex], sa.containers[startingindex], mustCopyOnWrite) } func (ra *roaringArray) appendCopy(sa roaringArray, startingindex int) { // cow only if the two request it, or if we already have a lightweight copy copyonwrite := (ra.copyOnWrite && sa.copyOnWrite) || sa.needsCopyOnWrite(startingindex) if !copyonwrite { // since there is no copy-on-write, we need to clone the container (this is important) ra.appendContainer(sa.keys[startingindex], sa.containers[startingindex].clone(), copyonwrite) } else { ra.appendContainer(sa.keys[startingindex], sa.containers[startingindex], copyonwrite) if !sa.needsCopyOnWrite(startingindex) { sa.setNeedsCopyOnWrite(startingindex) } } } func (ra *roaringArray) appendWithoutCopyMany(sa roaringArray, startingindex, end int) { for i := startingindex; i < end; i++ { ra.appendWithoutCopy(sa, i) } } func (ra *roaringArray) appendCopyMany(sa roaringArray, startingindex, end int) { for i := startingindex; i < end; i++ { ra.appendCopy(sa, i) } } func (ra *roaringArray) appendCopiesUntil(sa roaringArray, stoppingKey uint16) { // cow only if the two request it, or if we already have a lightweight copy copyonwrite := ra.copyOnWrite && sa.copyOnWrite for i := 0; i < sa.size(); i++ { if sa.keys[i] >= stoppingKey { break } thiscopyonewrite := copyonwrite || sa.needsCopyOnWrite(i) if thiscopyonewrite { ra.appendContainer(sa.keys[i], sa.containers[i], thiscopyonewrite) if !sa.needsCopyOnWrite(i) { sa.setNeedsCopyOnWrite(i) } } else { // since there is no copy-on-write, we need to clone the container (this is important) ra.appendContainer(sa.keys[i], sa.containers[i].clone(), thiscopyonewrite) } } } func (ra *roaringArray) appendCopiesAfter(sa roaringArray, beforeStart uint16) { // cow only if the two request it, or if we already have a lightweight copy copyonwrite := ra.copyOnWrite && sa.copyOnWrite startLocation := sa.getIndex(beforeStart) if startLocation >= 0 { startLocation++ } else { startLocation = -startLocation - 1 } for i := startLocation; i < sa.size(); i++ { thiscopyonewrite := copyonwrite || sa.needsCopyOnWrite(i) if thiscopyonewrite { ra.appendContainer(sa.keys[i], sa.containers[i], thiscopyonewrite) if !sa.needsCopyOnWrite(i) { sa.setNeedsCopyOnWrite(i) } } else { // since there is no copy-on-write, we need to clone the container (this is important) ra.appendContainer(sa.keys[i], sa.containers[i].clone(), thiscopyonewrite) } } } func (ra *roaringArray) removeIndexRange(begin, end int) { if end <= begin { return } r := end - begin copy(ra.keys[begin:], ra.keys[end:]) copy(ra.containers[begin:], ra.containers[end:]) copy(ra.needCopyOnWrite[begin:], ra.needCopyOnWrite[end:]) ra.resize(len(ra.keys) - r) } func (ra *roaringArray) resize(newsize int) { for k := newsize; k < len(ra.containers); k++ { ra.containers[k] = nil } ra.keys = ra.keys[:newsize] ra.containers = ra.containers[:newsize] ra.needCopyOnWrite = ra.needCopyOnWrite[:newsize] } func (ra *roaringArray) clear() { ra.resize(0) ra.copyOnWrite = false ra.conserz = nil } func (ra *roaringArray) clone() *roaringArray { sa := roaringArray{} sa.copyOnWrite = ra.copyOnWrite // this is where copyOnWrite is used. if ra.copyOnWrite { sa.keys = make([]uint16, len(ra.keys)) copy(sa.keys, ra.keys) sa.containers = make([]container, len(ra.containers)) copy(sa.containers, ra.containers) sa.needCopyOnWrite = make([]bool, len(ra.needCopyOnWrite)) ra.markAllAsNeedingCopyOnWrite() sa.markAllAsNeedingCopyOnWrite() // sa.needCopyOnWrite is shared } else { // make a full copy sa.keys = make([]uint16, len(ra.keys)) copy(sa.keys, ra.keys) sa.containers = make([]container, len(ra.containers)) for i := range sa.containers { sa.containers[i] = ra.containers[i].clone() } sa.needCopyOnWrite = make([]bool, len(ra.needCopyOnWrite)) } return &sa } // clone all containers which have needCopyOnWrite set to true // This can be used to make sure it is safe to munmap a []byte // that the roaring array may still have a reference to. func (ra *roaringArray) cloneCopyOnWriteContainers() { for i, needCopyOnWrite := range ra.needCopyOnWrite { if needCopyOnWrite { ra.containers[i] = ra.containers[i].clone() ra.needCopyOnWrite[i] = false } } } // unused function: //func (ra *roaringArray) containsKey(x uint16) bool { // return (ra.binarySearch(0, int64(len(ra.keys)), x) >= 0) //} func (ra *roaringArray) getContainer(x uint16) container { i := ra.binarySearch(0, int64(len(ra.keys)), x) if i < 0 { return nil } return ra.containers[i] } func (ra *roaringArray) getContainerAtIndex(i int) container { return ra.containers[i] } func (ra *roaringArray) getFastContainerAtIndex(i int, needsWriteable bool) container { c := ra.getContainerAtIndex(i) switch t := c.(type) { case *arrayContainer: c = t.toBitmapContainer() case *runContainer16: if !t.isFull() { c = t.toBitmapContainer() } case *bitmapContainer: if needsWriteable && ra.needCopyOnWrite[i] { c = ra.containers[i].clone() } } return c } func (ra *roaringArray) getWritableContainerAtIndex(i int) container { if ra.needCopyOnWrite[i] { ra.containers[i] = ra.containers[i].clone() ra.needCopyOnWrite[i] = false } return ra.containers[i] } func (ra *roaringArray) getIndex(x uint16) int { // before the binary search, we optimize for frequent cases size := len(ra.keys) if (size == 0) || (ra.keys[size-1] == x) { return size - 1 } return ra.binarySearch(0, int64(size), x) } func (ra *roaringArray) getKeyAtIndex(i int) uint16 { return ra.keys[i] } func (ra *roaringArray) insertNewKeyValueAt(i int, key uint16, value container) { ra.keys = append(ra.keys, 0) ra.containers = append(ra.containers, nil) copy(ra.keys[i+1:], ra.keys[i:]) copy(ra.containers[i+1:], ra.containers[i:]) ra.keys[i] = key ra.containers[i] = value ra.needCopyOnWrite = append(ra.needCopyOnWrite, false) copy(ra.needCopyOnWrite[i+1:], ra.needCopyOnWrite[i:]) ra.needCopyOnWrite[i] = false } func (ra *roaringArray) remove(key uint16) bool { i := ra.binarySearch(0, int64(len(ra.keys)), key) if i >= 0 { // if a new key ra.removeAtIndex(i) return true } return false } func (ra *roaringArray) removeAtIndex(i int) { copy(ra.keys[i:], ra.keys[i+1:]) copy(ra.containers[i:], ra.containers[i+1:]) copy(ra.needCopyOnWrite[i:], ra.needCopyOnWrite[i+1:]) ra.resize(len(ra.keys) - 1) } func (ra *roaringArray) setContainerAtIndex(i int, c container) { ra.containers[i] = c } func (ra *roaringArray) replaceKeyAndContainerAtIndex(i int, key uint16, c container, mustCopyOnWrite bool) { ra.keys[i] = key ra.containers[i] = c ra.needCopyOnWrite[i] = mustCopyOnWrite } func (ra *roaringArray) size() int { return len(ra.keys) } func (ra *roaringArray) binarySearch(begin, end int64, ikey uint16) int { low := begin high := end - 1 for low+16 <= high { middleIndex := low + (high-low)/2 // avoid overflow middleValue := ra.keys[middleIndex] if middleValue < ikey { low = middleIndex + 1 } else if middleValue > ikey { high = middleIndex - 1 } else { return int(middleIndex) } } for ; low <= high; low++ { val := ra.keys[low] if val >= ikey { if val == ikey { return int(low) } break } } return -int(low + 1) } func (ra *roaringArray) equals(o interface{}) bool { srb, ok := o.(roaringArray) if ok { if srb.size() != ra.size() { return false } for i, k := range ra.keys { if k != srb.keys[i] { return false } } for i, c := range ra.containers { if !c.equals(srb.containers[i]) { return false } } return true } return false } func (ra *roaringArray) headerSize() uint64 { size := uint64(len(ra.keys)) if ra.hasRunCompression() { if size < noOffsetThreshold { // for small bitmaps, we omit the offsets return 4 + (size+7)/8 + 4*size } return 4 + (size+7)/8 + 8*size // - 4 because we pack the size with the cookie } return 4 + 4 + 8*size } // should be dirt cheap func (ra *roaringArray) serializedSizeInBytes() uint64 { answer := ra.headerSize() for _, c := range ra.containers { answer += uint64(c.serializedSizeInBytes()) } return answer } // // spec: https://github.com/RoaringBitmap/RoaringFormatSpec // func (ra *roaringArray) writeTo(w io.Writer) (n int64, err error) { hasRun := ra.hasRunCompression() isRunSizeInBytes := 0 cookieSize := 8 if hasRun { cookieSize = 4 isRunSizeInBytes = (len(ra.keys) + 7) / 8 } descriptiveHeaderSize := 4 * len(ra.keys) preambleSize := cookieSize + isRunSizeInBytes + descriptiveHeaderSize buf := make([]byte, preambleSize+4*len(ra.keys)) nw := 0 if hasRun { binary.LittleEndian.PutUint16(buf[0:], uint16(serialCookie)) nw += 2 binary.LittleEndian.PutUint16(buf[2:], uint16(len(ra.keys)-1)) nw += 2 // compute isRun bitmap var ir []byte isRun := newBitmapContainer() for i, c := range ra.containers { switch c.(type) { case *runContainer16: isRun.iadd(uint16(i)) } } // convert to little endian ir = isRun.asLittleEndianByteSlice()[:isRunSizeInBytes] nw += copy(buf[nw:], ir) } else { binary.LittleEndian.PutUint32(buf[0:], uint32(serialCookieNoRunContainer)) nw += 4 binary.LittleEndian.PutUint32(buf[4:], uint32(len(ra.keys))) nw += 4 } // descriptive header for i, key := range ra.keys { binary.LittleEndian.PutUint16(buf[nw:], key) nw += 2 c := ra.containers[i] binary.LittleEndian.PutUint16(buf[nw:], uint16(c.getCardinality()-1)) nw += 2 } startOffset := int64(preambleSize + 4*len(ra.keys)) if !hasRun || (len(ra.keys) >= noOffsetThreshold) { // offset header for _, c := range ra.containers { binary.LittleEndian.PutUint32(buf[nw:], uint32(startOffset)) nw += 4 switch rc := c.(type) { case *runContainer16: startOffset += 2 + int64(len(rc.iv))*4 default: startOffset += int64(getSizeInBytesFromCardinality(c.getCardinality())) } } } written, err := w.Write(buf[:nw]) if err != nil { return n, err } n += int64(written) for _, c := range ra.containers { written, err := c.writeTo(w) if err != nil { return n, err } n += int64(written) } return n, nil } // // spec: https://github.com/RoaringBitmap/RoaringFormatSpec // func (ra *roaringArray) toBytes() ([]byte, error) { var buf bytes.Buffer _, err := ra.writeTo(&buf) return buf.Bytes(), err } func (ra *roaringArray) readFrom(stream byteInput) (int64, error) { cookie, err := stream.readUInt32() if err != nil { return stream.getReadBytes(), fmt.Errorf("error in roaringArray.readFrom: could not read initial cookie: %s", err) } var size uint32 var isRunBitmap []byte if cookie&0x0000FFFF == serialCookie { size = uint32(uint16(cookie>>16) + 1) // create is-run-container bitmap isRunBitmapSize := (int(size) + 7) / 8 isRunBitmap, err = stream.next(isRunBitmapSize) if err != nil { return stream.getReadBytes(), fmt.Errorf("malformed bitmap, failed to read is-run bitmap, got: %s", err) } } else if cookie == serialCookieNoRunContainer { size, err = stream.readUInt32() if err != nil { return stream.getReadBytes(), fmt.Errorf("malformed bitmap, failed to read a bitmap size: %s", err) } } else { return stream.getReadBytes(), fmt.Errorf("error in roaringArray.readFrom: did not find expected serialCookie in header") } if size > (1 << 16) { return stream.getReadBytes(), fmt.Errorf("it is logically impossible to have more than (1<<16) containers") } // descriptive header buf, err := stream.next(2 * 2 * int(size)) if err != nil { return stream.getReadBytes(), fmt.Errorf("failed to read descriptive header: %s", err) } keycard := byteSliceAsUint16Slice(buf) if isRunBitmap == nil || size >= noOffsetThreshold { if err := stream.skipBytes(int(size) * 4); err != nil { return stream.getReadBytes(), fmt.Errorf("failed to skip bytes: %s", err) } } // Allocate slices upfront as number of containers is known if cap(ra.containers) >= int(size) { ra.containers = ra.containers[:size] } else { ra.containers = make([]container, size) } if cap(ra.keys) >= int(size) { ra.keys = ra.keys[:size] } else { ra.keys = make([]uint16, size) } if cap(ra.needCopyOnWrite) >= int(size) { ra.needCopyOnWrite = ra.needCopyOnWrite[:size] } else { ra.needCopyOnWrite = make([]bool, size) } for i := uint32(0); i < size; i++ { key := keycard[2*i] card := int(keycard[2*i+1]) + 1 ra.keys[i] = key ra.needCopyOnWrite[i] = true if isRunBitmap != nil && isRunBitmap[i/8]&(1<<(i%8)) != 0 { // run container nr, err := stream.readUInt16() if err != nil { return 0, fmt.Errorf("failed to read runtime container size: %s", err) } buf, err := stream.next(int(nr) * 4) if err != nil { return stream.getReadBytes(), fmt.Errorf("failed to read runtime container content: %s", err) } nb := runContainer16{ iv: byteSliceAsInterval16Slice(buf), card: int64(card), } ra.containers[i] = &nb } else if card > arrayDefaultMaxSize { // bitmap container buf, err := stream.next(arrayDefaultMaxSize * 2) if err != nil { return stream.getReadBytes(), fmt.Errorf("failed to read bitmap container: %s", err) } nb := bitmapContainer{ cardinality: card, bitmap: byteSliceAsUint64Slice(buf), } ra.containers[i] = &nb } else { // array container buf, err := stream.next(card * 2) if err != nil { return stream.getReadBytes(), fmt.Errorf("failed to read array container: %s", err) } nb := arrayContainer{ byteSliceAsUint16Slice(buf), } ra.containers[i] = &nb } } return stream.getReadBytes(), nil } func (ra *roaringArray) hasRunCompression() bool { for _, c := range ra.containers { switch c.(type) { case *runContainer16: return true } } return false } func (ra *roaringArray) writeToMsgpack(stream io.Writer) error { ra.conserz = make([]containerSerz, len(ra.containers)) for i, v := range ra.containers { switch cn := v.(type) { case *bitmapContainer: bts, err := cn.MarshalMsg(nil) if err != nil { return err } ra.conserz[i].t = bitmapContype ra.conserz[i].r = bts case *arrayContainer: bts, err := cn.MarshalMsg(nil) if err != nil { return err } ra.conserz[i].t = arrayContype ra.conserz[i].r = bts case *runContainer16: bts, err := cn.MarshalMsg(nil) if err != nil { return err } ra.conserz[i].t = run16Contype ra.conserz[i].r = bts default: panic(fmt.Errorf("Unrecognized container implementation: %T", cn)) } } w := snappy.NewWriter(stream) err := msgp.Encode(w, ra) ra.conserz = nil return err } func (ra *roaringArray) readFromMsgpack(stream io.Reader) error { r := snappy.NewReader(stream) err := msgp.Decode(r, ra) if err != nil { return err } if len(ra.containers) != len(ra.keys) { ra.containers = make([]container, len(ra.keys)) } for i, v := range ra.conserz { switch v.t { case bitmapContype: c := &bitmapContainer{} _, err = c.UnmarshalMsg(v.r) if err != nil { return err } ra.containers[i] = c case arrayContype: c := &arrayContainer{} _, err = c.UnmarshalMsg(v.r) if err != nil { return err } ra.containers[i] = c case run16Contype: c := &runContainer16{} _, err = c.UnmarshalMsg(v.r) if err != nil { return err } ra.containers[i] = c default: return fmt.Errorf("unrecognized contype serialization code: '%v'", v.t) } } ra.conserz = nil return nil } func (ra *roaringArray) advanceUntil(min uint16, pos int) int { lower := pos + 1 if lower >= len(ra.keys) || ra.keys[lower] >= min { return lower } spansize := 1 for lower+spansize < len(ra.keys) && ra.keys[lower+spansize] < min { spansize *= 2 } var upper int if lower+spansize < len(ra.keys) { upper = lower + spansize } else { upper = len(ra.keys) - 1 } if ra.keys[upper] == min { return upper } if ra.keys[upper] < min { // means // array // has no // item // >= min // pos = array.length; return len(ra.keys) } // we know that the next-smallest span was too small lower += (spansize >> 1) mid := 0 for lower+1 != upper { mid = (lower + upper) >> 1 if ra.keys[mid] == min { return mid } else if ra.keys[mid] < min { lower = mid } else { upper = mid } } return upper } func (ra *roaringArray) markAllAsNeedingCopyOnWrite() { for i := range ra.needCopyOnWrite { ra.needCopyOnWrite[i] = true } } func (ra *roaringArray) needsCopyOnWrite(i int) bool { return ra.needCopyOnWrite[i] } func (ra *roaringArray) setNeedsCopyOnWrite(i int) { ra.needCopyOnWrite[i] = true } roaring-0.4.21/roaringarray_gen.go 0000664 0000000 0000000 00000026165 13542657257 0017151 0 ustar 00root root 0000000 0000000 package roaring // NOTE: THIS FILE WAS PRODUCED BY THE // MSGP CODE GENERATION TOOL (github.com/tinylib/msgp) // DO NOT EDIT import ( "github.com/tinylib/msgp/msgp" ) // Deprecated: DecodeMsg implements msgp.Decodable func (z *containerSerz) DecodeMsg(dc *msgp.Reader) (err error) { var field []byte _ = field var zxvk uint32 zxvk, err = dc.ReadMapHeader() if err != nil { return } for zxvk > 0 { zxvk-- field, err = dc.ReadMapKeyPtr() if err != nil { return } switch msgp.UnsafeString(field) { case "t": { var zbzg uint8 zbzg, err = dc.ReadUint8() z.t = contype(zbzg) } if err != nil { return } case "r": err = z.r.DecodeMsg(dc) if err != nil { return } default: err = dc.Skip() if err != nil { return } } } return } // Deprecated: EncodeMsg implements msgp.Encodable func (z *containerSerz) EncodeMsg(en *msgp.Writer) (err error) { // map header, size 2 // write "t" err = en.Append(0x82, 0xa1, 0x74) if err != nil { return err } err = en.WriteUint8(uint8(z.t)) if err != nil { return } // write "r" err = en.Append(0xa1, 0x72) if err != nil { return err } err = z.r.EncodeMsg(en) if err != nil { return } return } // Deprecated: MarshalMsg implements msgp.Marshaler func (z *containerSerz) MarshalMsg(b []byte) (o []byte, err error) { o = msgp.Require(b, z.Msgsize()) // map header, size 2 // string "t" o = append(o, 0x82, 0xa1, 0x74) o = msgp.AppendUint8(o, uint8(z.t)) // string "r" o = append(o, 0xa1, 0x72) o, err = z.r.MarshalMsg(o) if err != nil { return } return } // Deprecated: UnmarshalMsg implements msgp.Unmarshaler func (z *containerSerz) UnmarshalMsg(bts []byte) (o []byte, err error) { var field []byte _ = field var zbai uint32 zbai, bts, err = msgp.ReadMapHeaderBytes(bts) if err != nil { return } for zbai > 0 { zbai-- field, bts, err = msgp.ReadMapKeyZC(bts) if err != nil { return } switch msgp.UnsafeString(field) { case "t": { var zcmr uint8 zcmr, bts, err = msgp.ReadUint8Bytes(bts) z.t = contype(zcmr) } if err != nil { return } case "r": bts, err = z.r.UnmarshalMsg(bts) if err != nil { return } default: bts, err = msgp.Skip(bts) if err != nil { return } } } o = bts return } // Deprecated: Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message func (z *containerSerz) Msgsize() (s int) { s = 1 + 2 + msgp.Uint8Size + 2 + z.r.Msgsize() return } // Deprecated: DecodeMsg implements msgp.Decodable func (z *contype) DecodeMsg(dc *msgp.Reader) (err error) { { var zajw uint8 zajw, err = dc.ReadUint8() (*z) = contype(zajw) } if err != nil { return } return } // Deprecated: EncodeMsg implements msgp.Encodable func (z contype) EncodeMsg(en *msgp.Writer) (err error) { err = en.WriteUint8(uint8(z)) if err != nil { return } return } // Deprecated: MarshalMsg implements msgp.Marshaler func (z contype) MarshalMsg(b []byte) (o []byte, err error) { o = msgp.Require(b, z.Msgsize()) o = msgp.AppendUint8(o, uint8(z)) return } // Deprecated: UnmarshalMsg implements msgp.Unmarshaler func (z *contype) UnmarshalMsg(bts []byte) (o []byte, err error) { { var zwht uint8 zwht, bts, err = msgp.ReadUint8Bytes(bts) (*z) = contype(zwht) } if err != nil { return } o = bts return } // Deprecated: Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message func (z contype) Msgsize() (s int) { s = msgp.Uint8Size return } // Deprecated: DecodeMsg implements msgp.Decodable func (z *roaringArray) DecodeMsg(dc *msgp.Reader) (err error) { var field []byte _ = field var zlqf uint32 zlqf, err = dc.ReadMapHeader() if err != nil { return } for zlqf > 0 { zlqf-- field, err = dc.ReadMapKeyPtr() if err != nil { return } switch msgp.UnsafeString(field) { case "keys": var zdaf uint32 zdaf, err = dc.ReadArrayHeader() if err != nil { return } if cap(z.keys) >= int(zdaf) { z.keys = (z.keys)[:zdaf] } else { z.keys = make([]uint16, zdaf) } for zhct := range z.keys { z.keys[zhct], err = dc.ReadUint16() if err != nil { return } } case "needCopyOnWrite": var zpks uint32 zpks, err = dc.ReadArrayHeader() if err != nil { return } if cap(z.needCopyOnWrite) >= int(zpks) { z.needCopyOnWrite = (z.needCopyOnWrite)[:zpks] } else { z.needCopyOnWrite = make([]bool, zpks) } for zcua := range z.needCopyOnWrite { z.needCopyOnWrite[zcua], err = dc.ReadBool() if err != nil { return } } case "copyOnWrite": z.copyOnWrite, err = dc.ReadBool() if err != nil { return } case "conserz": var zjfb uint32 zjfb, err = dc.ReadArrayHeader() if err != nil { return } if cap(z.conserz) >= int(zjfb) { z.conserz = (z.conserz)[:zjfb] } else { z.conserz = make([]containerSerz, zjfb) } for zxhx := range z.conserz { var zcxo uint32 zcxo, err = dc.ReadMapHeader() if err != nil { return } for zcxo > 0 { zcxo-- field, err = dc.ReadMapKeyPtr() if err != nil { return } switch msgp.UnsafeString(field) { case "t": { var zeff uint8 zeff, err = dc.ReadUint8() z.conserz[zxhx].t = contype(zeff) } if err != nil { return } case "r": err = z.conserz[zxhx].r.DecodeMsg(dc) if err != nil { return } default: err = dc.Skip() if err != nil { return } } } } default: err = dc.Skip() if err != nil { return } } } return } // Deprecated: EncodeMsg implements msgp.Encodable func (z *roaringArray) EncodeMsg(en *msgp.Writer) (err error) { // map header, size 4 // write "keys" err = en.Append(0x84, 0xa4, 0x6b, 0x65, 0x79, 0x73) if err != nil { return err } err = en.WriteArrayHeader(uint32(len(z.keys))) if err != nil { return } for zhct := range z.keys { err = en.WriteUint16(z.keys[zhct]) if err != nil { return } } // write "needCopyOnWrite" err = en.Append(0xaf, 0x6e, 0x65, 0x65, 0x64, 0x43, 0x6f, 0x70, 0x79, 0x4f, 0x6e, 0x57, 0x72, 0x69, 0x74, 0x65) if err != nil { return err } err = en.WriteArrayHeader(uint32(len(z.needCopyOnWrite))) if err != nil { return } for zcua := range z.needCopyOnWrite { err = en.WriteBool(z.needCopyOnWrite[zcua]) if err != nil { return } } // write "copyOnWrite" err = en.Append(0xab, 0x63, 0x6f, 0x70, 0x79, 0x4f, 0x6e, 0x57, 0x72, 0x69, 0x74, 0x65) if err != nil { return err } err = en.WriteBool(z.copyOnWrite) if err != nil { return } // write "conserz" err = en.Append(0xa7, 0x63, 0x6f, 0x6e, 0x73, 0x65, 0x72, 0x7a) if err != nil { return err } err = en.WriteArrayHeader(uint32(len(z.conserz))) if err != nil { return } for zxhx := range z.conserz { // map header, size 2 // write "t" err = en.Append(0x82, 0xa1, 0x74) if err != nil { return err } err = en.WriteUint8(uint8(z.conserz[zxhx].t)) if err != nil { return } // write "r" err = en.Append(0xa1, 0x72) if err != nil { return err } err = z.conserz[zxhx].r.EncodeMsg(en) if err != nil { return } } return } // Deprecated: MarshalMsg implements msgp.Marshaler func (z *roaringArray) MarshalMsg(b []byte) (o []byte, err error) { o = msgp.Require(b, z.Msgsize()) // map header, size 4 // string "keys" o = append(o, 0x84, 0xa4, 0x6b, 0x65, 0x79, 0x73) o = msgp.AppendArrayHeader(o, uint32(len(z.keys))) for zhct := range z.keys { o = msgp.AppendUint16(o, z.keys[zhct]) } // string "needCopyOnWrite" o = append(o, 0xaf, 0x6e, 0x65, 0x65, 0x64, 0x43, 0x6f, 0x70, 0x79, 0x4f, 0x6e, 0x57, 0x72, 0x69, 0x74, 0x65) o = msgp.AppendArrayHeader(o, uint32(len(z.needCopyOnWrite))) for zcua := range z.needCopyOnWrite { o = msgp.AppendBool(o, z.needCopyOnWrite[zcua]) } // string "copyOnWrite" o = append(o, 0xab, 0x63, 0x6f, 0x70, 0x79, 0x4f, 0x6e, 0x57, 0x72, 0x69, 0x74, 0x65) o = msgp.AppendBool(o, z.copyOnWrite) // string "conserz" o = append(o, 0xa7, 0x63, 0x6f, 0x6e, 0x73, 0x65, 0x72, 0x7a) o = msgp.AppendArrayHeader(o, uint32(len(z.conserz))) for zxhx := range z.conserz { // map header, size 2 // string "t" o = append(o, 0x82, 0xa1, 0x74) o = msgp.AppendUint8(o, uint8(z.conserz[zxhx].t)) // string "r" o = append(o, 0xa1, 0x72) o, err = z.conserz[zxhx].r.MarshalMsg(o) if err != nil { return } } return } // Deprecated: UnmarshalMsg implements msgp.Unmarshaler func (z *roaringArray) UnmarshalMsg(bts []byte) (o []byte, err error) { var field []byte _ = field var zrsw uint32 zrsw, bts, err = msgp.ReadMapHeaderBytes(bts) if err != nil { return } for zrsw > 0 { zrsw-- field, bts, err = msgp.ReadMapKeyZC(bts) if err != nil { return } switch msgp.UnsafeString(field) { case "keys": var zxpk uint32 zxpk, bts, err = msgp.ReadArrayHeaderBytes(bts) if err != nil { return } if cap(z.keys) >= int(zxpk) { z.keys = (z.keys)[:zxpk] } else { z.keys = make([]uint16, zxpk) } for zhct := range z.keys { z.keys[zhct], bts, err = msgp.ReadUint16Bytes(bts) if err != nil { return } } case "needCopyOnWrite": var zdnj uint32 zdnj, bts, err = msgp.ReadArrayHeaderBytes(bts) if err != nil { return } if cap(z.needCopyOnWrite) >= int(zdnj) { z.needCopyOnWrite = (z.needCopyOnWrite)[:zdnj] } else { z.needCopyOnWrite = make([]bool, zdnj) } for zcua := range z.needCopyOnWrite { z.needCopyOnWrite[zcua], bts, err = msgp.ReadBoolBytes(bts) if err != nil { return } } case "copyOnWrite": z.copyOnWrite, bts, err = msgp.ReadBoolBytes(bts) if err != nil { return } case "conserz": var zobc uint32 zobc, bts, err = msgp.ReadArrayHeaderBytes(bts) if err != nil { return } if cap(z.conserz) >= int(zobc) { z.conserz = (z.conserz)[:zobc] } else { z.conserz = make([]containerSerz, zobc) } for zxhx := range z.conserz { var zsnv uint32 zsnv, bts, err = msgp.ReadMapHeaderBytes(bts) if err != nil { return } for zsnv > 0 { zsnv-- field, bts, err = msgp.ReadMapKeyZC(bts) if err != nil { return } switch msgp.UnsafeString(field) { case "t": { var zkgt uint8 zkgt, bts, err = msgp.ReadUint8Bytes(bts) z.conserz[zxhx].t = contype(zkgt) } if err != nil { return } case "r": bts, err = z.conserz[zxhx].r.UnmarshalMsg(bts) if err != nil { return } default: bts, err = msgp.Skip(bts) if err != nil { return } } } } default: bts, err = msgp.Skip(bts) if err != nil { return } } } o = bts return } // Deprecated: Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message func (z *roaringArray) Msgsize() (s int) { s = 1 + 5 + msgp.ArrayHeaderSize + (len(z.keys) * (msgp.Uint16Size)) + 16 + msgp.ArrayHeaderSize + (len(z.needCopyOnWrite) * (msgp.BoolSize)) + 12 + msgp.BoolSize + 8 + msgp.ArrayHeaderSize for zxhx := range z.conserz { s += 1 + 2 + msgp.Uint8Size + 2 + z.conserz[zxhx].r.Msgsize() } return } roaring-0.4.21/roaringarray_gen_test.go 0000664 0000000 0000000 00000011016 13542657257 0020175 0 ustar 00root root 0000000 0000000 package roaring // NOTE: THIS FILE WAS PRODUCED BY THE // MSGP CODE GENERATION TOOL (github.com/tinylib/msgp) // DO NOT EDIT import ( "bytes" "testing" "github.com/tinylib/msgp/msgp" ) func TestMarshalUnmarshalcontainerSerz(t *testing.T) { v := containerSerz{} bts, err := v.MarshalMsg(nil) if err != nil { t.Fatal(err) } left, err := v.UnmarshalMsg(bts) if err != nil { t.Fatal(err) } if len(left) > 0 { t.Errorf("%d bytes left over after UnmarshalMsg(): %q", len(left), left) } left, err = msgp.Skip(bts) if err != nil { t.Fatal(err) } if len(left) > 0 { t.Errorf("%d bytes left over after Skip(): %q", len(left), left) } } func BenchmarkMarshalMsgcontainerSerz(b *testing.B) { v := containerSerz{} b.ReportAllocs() b.ResetTimer() for i := 0; i < b.N; i++ { v.MarshalMsg(nil) } } func BenchmarkAppendMsgcontainerSerz(b *testing.B) { v := containerSerz{} bts := make([]byte, 0, v.Msgsize()) bts, _ = v.MarshalMsg(bts[0:0]) b.SetBytes(int64(len(bts))) b.ReportAllocs() b.ResetTimer() for i := 0; i < b.N; i++ { bts, _ = v.MarshalMsg(bts[0:0]) } } func BenchmarkUnmarshalcontainerSerz(b *testing.B) { v := containerSerz{} bts, _ := v.MarshalMsg(nil) b.ReportAllocs() b.SetBytes(int64(len(bts))) b.ResetTimer() for i := 0; i < b.N; i++ { _, err := v.UnmarshalMsg(bts) if err != nil { b.Fatal(err) } } } func TestEncodeDecodecontainerSerz(t *testing.T) { v := containerSerz{} var buf bytes.Buffer msgp.Encode(&buf, &v) m := v.Msgsize() if buf.Len() > m { t.Logf("WARNING: Msgsize() for %v is inaccurate", v) } vn := containerSerz{} err := msgp.Decode(&buf, &vn) if err != nil { t.Error(err) } buf.Reset() msgp.Encode(&buf, &v) err = msgp.NewReader(&buf).Skip() if err != nil { t.Error(err) } } func BenchmarkEncodecontainerSerz(b *testing.B) { v := containerSerz{} var buf bytes.Buffer msgp.Encode(&buf, &v) b.SetBytes(int64(buf.Len())) en := msgp.NewWriter(msgp.Nowhere) b.ReportAllocs() b.ResetTimer() for i := 0; i < b.N; i++ { v.EncodeMsg(en) } en.Flush() } func BenchmarkDecodecontainerSerz(b *testing.B) { v := containerSerz{} var buf bytes.Buffer msgp.Encode(&buf, &v) b.SetBytes(int64(buf.Len())) rd := msgp.NewEndlessReader(buf.Bytes(), b) dc := msgp.NewReader(rd) b.ReportAllocs() b.ResetTimer() for i := 0; i < b.N; i++ { err := v.DecodeMsg(dc) if err != nil { b.Fatal(err) } } } func TestMarshalUnmarshalroaringArray(t *testing.T) { v := roaringArray{} bts, err := v.MarshalMsg(nil) if err != nil { t.Fatal(err) } left, err := v.UnmarshalMsg(bts) if err != nil { t.Fatal(err) } if len(left) > 0 { t.Errorf("%d bytes left over after UnmarshalMsg(): %q", len(left), left) } left, err = msgp.Skip(bts) if err != nil { t.Fatal(err) } if len(left) > 0 { t.Errorf("%d bytes left over after Skip(): %q", len(left), left) } } func BenchmarkMarshalMsgroaringArray(b *testing.B) { v := roaringArray{} b.ReportAllocs() b.ResetTimer() for i := 0; i < b.N; i++ { v.MarshalMsg(nil) } } func BenchmarkAppendMsgroaringArray(b *testing.B) { v := roaringArray{} bts := make([]byte, 0, v.Msgsize()) bts, _ = v.MarshalMsg(bts[0:0]) b.SetBytes(int64(len(bts))) b.ReportAllocs() b.ResetTimer() for i := 0; i < b.N; i++ { bts, _ = v.MarshalMsg(bts[0:0]) } } func BenchmarkUnmarshalroaringArray(b *testing.B) { v := roaringArray{} bts, _ := v.MarshalMsg(nil) b.ReportAllocs() b.SetBytes(int64(len(bts))) b.ResetTimer() for i := 0; i < b.N; i++ { _, err := v.UnmarshalMsg(bts) if err != nil { b.Fatal(err) } } } func TestEncodeDecoderoaringArray(t *testing.T) { v := roaringArray{} var buf bytes.Buffer msgp.Encode(&buf, &v) m := v.Msgsize() if buf.Len() > m { t.Logf("WARNING: Msgsize() for %v is inaccurate", v) } vn := roaringArray{} err := msgp.Decode(&buf, &vn) if err != nil { t.Error(err) } buf.Reset() msgp.Encode(&buf, &v) err = msgp.NewReader(&buf).Skip() if err != nil { t.Error(err) } } func BenchmarkEncoderoaringArray(b *testing.B) { v := roaringArray{} var buf bytes.Buffer msgp.Encode(&buf, &v) b.SetBytes(int64(buf.Len())) en := msgp.NewWriter(msgp.Nowhere) b.ReportAllocs() b.ResetTimer() for i := 0; i < b.N; i++ { v.EncodeMsg(en) } en.Flush() } func BenchmarkDecoderoaringArray(b *testing.B) { v := roaringArray{} var buf bytes.Buffer msgp.Encode(&buf, &v) b.SetBytes(int64(buf.Len())) rd := msgp.NewEndlessReader(buf.Bytes(), b) dc := msgp.NewReader(rd) b.ReportAllocs() b.ResetTimer() for i := 0; i < b.N; i++ { err := v.DecodeMsg(dc) if err != nil { b.Fatal(err) } } } roaring-0.4.21/roaringcow_test.go 0000664 0000000 0000000 00000121573 13542657257 0017030 0 ustar 00root root 0000000 0000000 package roaring import ( "bytes" "log" "math/rand" "strconv" "testing" "unsafe" "github.com/stretchr/testify/assert" "github.com/willf/bitset" ) func TestCloneOfCOW(t *testing.T) { rb1 := NewBitmap() rb1.SetCopyOnWrite(true) rb1.Add(10) rb1.Add(12) rb1.Remove(12) rb2 := rb1.Clone() rb3 := rb1.Clone() rb2.Remove(10) rb3.AddRange(100, 200) assert.NotEmpty(t, rb2.IsEmpty()) assert.EqualValues(t, 100+1, rb3.GetCardinality()) assert.True(t, rb1.Contains(10)) assert.EqualValues(t, 1, rb1.GetCardinality()) } func TestRoaringBitmapBitmapOfCOW(t *testing.T) { array := []uint32{5580, 33722, 44031, 57276, 83097} bmp := BitmapOf(array...) bmp.SetCopyOnWrite(true) assert.EqualValues(t, len(array), bmp.GetCardinality()) } func TestRoaringBitmapAddCOW(t *testing.T) { array := []uint32{5580, 33722, 44031, 57276, 83097} bmp := New() bmp.SetCopyOnWrite(true) for _, v := range array { bmp.Add(v) } assert.EqualValues(t, len(array), bmp.GetCardinality()) } func TestRoaringBitmapAddManyCOW(t *testing.T) { array := []uint32{5580, 33722, 44031, 57276, 83097} bmp := NewBitmap() bmp.SetCopyOnWrite(true) bmp.AddMany(array) assert.EqualValues(t, len(array), bmp.GetCardinality()) } // https://github.com/RoaringBitmap/roaring/issues/64 func TestFlip64COW(t *testing.T) { bm := New() bm.SetCopyOnWrite(true) bm.AddInt(0) bm.Flip(1, 2) i := bm.Iterator() assert.False(t, i.Next() != 0 || i.Next() != 1 || i.HasNext()) } // https://github.com/RoaringBitmap/roaring/issues/64 func TestFlip64OffCOW(t *testing.T) { bm := New() bm.SetCopyOnWrite(true) bm.AddInt(10) bm.Flip(11, 12) i := bm.Iterator() assert.False(t, i.Next() != 10 || i.Next() != 11 || i.HasNext()) } func TestStringerCOW(t *testing.T) { v := NewBitmap() v.SetCopyOnWrite(true) for i := uint32(0); i < 10; i++ { v.Add(i) } assert.Equal(t, "{0,1,2,3,4,5,6,7,8,9}", v.String()) v.RunOptimize() assert.Equal(t, "{0,1,2,3,4,5,6,7,8,9}", v.String()) } func TestFastCardCOW(t *testing.T) { bm := NewBitmap() bm.SetCopyOnWrite(true) bm.Add(1) bm.AddRange(21, 260000) bm2 := NewBitmap() bm2.SetCopyOnWrite(true) bm2.Add(25) assert.EqualValues(t, 1, bm2.AndCardinality(bm)) assert.Equal(t, bm.GetCardinality(), bm2.OrCardinality(bm)) assert.EqualValues(t, 1, bm.AndCardinality(bm2)) assert.Equal(t, bm.GetCardinality(), bm.OrCardinality(bm2)) assert.EqualValues(t, 1, bm2.AndCardinality(bm)) assert.Equal(t, bm.GetCardinality(), bm2.OrCardinality(bm)) bm.RunOptimize() assert.EqualValues(t, 1, bm2.AndCardinality(bm)) assert.Equal(t, bm.GetCardinality(), bm2.OrCardinality(bm)) assert.EqualValues(t, 1, bm.AndCardinality(bm2)) assert.Equal(t, bm.GetCardinality(), bm.OrCardinality(bm2)) assert.EqualValues(t, 1, bm2.AndCardinality(bm)) assert.Equal(t, bm.GetCardinality(), bm2.OrCardinality(bm)) } func TestIntersects1COW(t *testing.T) { bm := NewBitmap() bm.SetCopyOnWrite(true) bm.Add(1) bm.AddRange(21, 26) bm2 := NewBitmap() bm2.SetCopyOnWrite(true) bm2.Add(25) assert.True(t, bm2.Intersects(bm)) bm.Remove(25) assert.False(t, bm2.Intersects(bm)) bm.AddRange(1, 100000) assert.True(t, bm2.Intersects(bm)) } func TestRangePanicCOW(t *testing.T) { bm := NewBitmap() bm.SetCopyOnWrite(true) bm.Add(1) bm.AddRange(21, 26) bm.AddRange(9, 14) bm.AddRange(11, 16) } func TestRangeRemovalCOW(t *testing.T) { bm := NewBitmap() bm.SetCopyOnWrite(true) bm.Add(1) bm.AddRange(21, 26) bm.AddRange(9, 14) bm.RemoveRange(11, 16) bm.RemoveRange(1, 26) assert.EqualValues(t, 0, bm.GetCardinality()) bm.AddRange(1, 10000) assert.EqualValues(t, 10000-1, bm.GetCardinality()) bm.RemoveRange(1, 10000) assert.EqualValues(t, 0, bm.GetCardinality()) } func TestRangeRemovalFromContentCOW(t *testing.T) { bm := NewBitmap() bm.SetCopyOnWrite(true) for i := 100; i < 10000; i++ { bm.AddInt(i * 3) } bm.AddRange(21, 26) bm.AddRange(9, 14) bm.RemoveRange(11, 16) bm.RemoveRange(0, 30000) assert.EqualValues(t, 0, bm.GetCardinality()) } func TestFlipOnEmptyCOW(t *testing.T) { t.Run("TestFlipOnEmpty in-place", func(t *testing.T) { bm := NewBitmap() bm.SetCopyOnWrite(true) bm.Flip(0, 10) c := bm.GetCardinality() assert.EqualValues(t, 10, c) }) t.Run("TestFlipOnEmpty, generating new result", func(t *testing.T) { bm := NewBitmap() bm.SetCopyOnWrite(true) bm = Flip(bm, 0, 10) c := bm.GetCardinality() assert.EqualValues(t, 10, c) }) } func TestBitmapRankCOW(t *testing.T) { for N := uint32(1); N <= 1048576; N *= 2 { t.Run("rank tests"+strconv.Itoa(int(N)), func(t *testing.T) { for gap := uint32(1); gap <= 65536; gap *= 2 { rb1 := NewBitmap() rb1.SetCopyOnWrite(true) for x := uint32(0); x <= N; x += gap { rb1.Add(x) } for y := uint32(0); y <= N; y++ { if rb1.Rank(y) != uint64((y+1+gap-1)/gap) { assert.Equal(t, (y+1+gap-1)/gap, rb1.Rank(y)) } } } }) } } func TestBitmapSelectCOW(t *testing.T) { for N := uint32(1); N <= 1048576; N *= 2 { t.Run("rank tests"+strconv.Itoa(int(N)), func(t *testing.T) { for gap := uint32(1); gap <= 65536; gap *= 2 { rb1 := NewBitmap() rb1.SetCopyOnWrite(true) for x := uint32(0); x <= N; x += gap { rb1.Add(x) } for y := uint32(0); y <= N/gap; y++ { expectedInt := y * gap i, err := rb1.Select(y) if err != nil { t.Fatal(err) } if i != expectedInt { assert.Equal(t, expectedInt, i) } } } }) } } // some extra tests func TestBitmapExtraCOW(t *testing.T) { for N := uint32(1); N <= 65536; N *= 2 { t.Run("extra tests"+strconv.Itoa(int(N)), func(t *testing.T) { for gap := uint32(1); gap <= 65536; gap *= 2 { bs1 := bitset.New(0) rb1 := NewBitmap() rb1.SetCopyOnWrite(true) for x := uint32(0); x <= N; x += gap { bs1.Set(uint(x)) rb1.Add(x) } assert.EqualValues(t, rb1.GetCardinality(), bs1.Count()) assert.True(t, equalsBitSet(bs1, rb1)) for offset := uint32(1); offset <= gap; offset *= 2 { bs2 := bitset.New(0) rb2 := NewBitmap() rb2.SetCopyOnWrite(true) for x := uint32(0); x <= N; x += gap { bs2.Set(uint(x + offset)) rb2.Add(x + offset) } assert.EqualValues(t, rb2.GetCardinality(), bs2.Count()) assert.True(t, equalsBitSet(bs2, rb2)) clonebs1 := bs1.Clone() clonebs1.InPlaceIntersection(bs2) if !equalsBitSet(clonebs1, And(rb1, rb2)) { v := rb1.Clone() v.And(rb2) assert.True(t, equalsBitSet(clonebs1, v)) } // testing OR clonebs1 = bs1.Clone() clonebs1.InPlaceUnion(bs2) assert.True(t, equalsBitSet(clonebs1, Or(rb1, rb2))) // testing XOR clonebs1 = bs1.Clone() clonebs1.InPlaceSymmetricDifference(bs2) assert.True(t, equalsBitSet(clonebs1, Xor(rb1, rb2))) //testing NOTAND clonebs1 = bs1.Clone() clonebs1.InPlaceDifference(bs2) assert.True(t, equalsBitSet(clonebs1, AndNot(rb1, rb2))) } } }) } } func TestBitmapCOW(t *testing.T) { t.Run("Test Contains", func(t *testing.T) { rbm1 := NewBitmap() rbm1.SetCopyOnWrite(true) for k := 0; k < 1000; k++ { rbm1.AddInt(17 * k) } for k := 0; k < 17*1000; k++ { assert.Equal(t, k/17*17 == k, rbm1.ContainsInt(k)) } }) t.Run("Test Clone", func(t *testing.T) { rb1 := NewBitmap() rb1.SetCopyOnWrite(true) rb1.Add(10) rb2 := rb1.Clone() rb2.Remove(10) assert.True(t, rb1.Contains(10)) }) t.Run("Test ANDNOT4", func(t *testing.T) { rb := NewBitmap() rb.SetCopyOnWrite(true) rb2 := NewBitmap() rb2.SetCopyOnWrite(true) for i := 0; i < 200000; i += 4 { rb2.AddInt(i) } for i := 200000; i < 400000; i += 14 { rb2.AddInt(i) } off := AndNot(rb2, rb) andNotresult := AndNot(rb, rb2) assert.True(t, rb.Equals(andNotresult)) assert.True(t, rb2.Equals(off)) rb2.AndNot(rb) assert.True(t, rb2.Equals(off)) }) t.Run("Test AND", func(t *testing.T) { rr := NewBitmap() rr.SetCopyOnWrite(true) for k := 0; k < 4000; k++ { rr.AddInt(k) } rr.Add(100000) rr.Add(110000) rr2 := NewBitmap() rr2.SetCopyOnWrite(true) rr2.Add(13) rrand := And(rr, rr2) array := rrand.ToArray() assert.Equal(t, 1, len(array)) assert.EqualValues(t, 13, array[0]) rr.And(rr2) array = rr.ToArray() assert.Equal(t, 1, len(array)) assert.EqualValues(t, 13, array[0]) }) t.Run("Test AND 2", func(t *testing.T) { rr := NewBitmap() rr.SetCopyOnWrite(true) for k := 4000; k < 4256; k++ { rr.AddInt(k) } for k := 65536; k < 65536+4000; k++ { rr.AddInt(k) } for k := 3 * 65536; k < 3*65536+9000; k++ { rr.AddInt(k) } for k := 4 * 65535; k < 4*65535+7000; k++ { rr.AddInt(k) } for k := 6 * 65535; k < 6*65535+10000; k++ { rr.AddInt(k) } for k := 8 * 65535; k < 8*65535+1000; k++ { rr.AddInt(k) } for k := 9 * 65535; k < 9*65535+30000; k++ { rr.AddInt(k) } rr2 := NewBitmap() rr2.SetCopyOnWrite(true) for k := 4000; k < 4256; k++ { rr2.AddInt(k) } for k := 65536; k < 65536+4000; k++ { rr2.AddInt(k) } for k := 3*65536 + 2000; k < 3*65536+6000; k++ { rr2.AddInt(k) } for k := 6 * 65535; k < 6*65535+1000; k++ { rr2.AddInt(k) } for k := 7 * 65535; k < 7*65535+1000; k++ { rr2.AddInt(k) } for k := 10 * 65535; k < 10*65535+5000; k++ { rr2.AddInt(k) } correct := And(rr, rr2) rr.And(rr2) assert.True(t, correct.Equals(rr)) }) t.Run("Test AND 2", func(t *testing.T) { rr := NewBitmap() rr.SetCopyOnWrite(true) for k := 0; k < 4000; k++ { rr.AddInt(k) } rr.AddInt(100000) rr.AddInt(110000) rr2 := NewBitmap() rr2.SetCopyOnWrite(true) rr2.AddInt(13) rrand := And(rr, rr2) array := rrand.ToArray() assert.Equal(t, 1, len(array)) assert.EqualValues(t, 13, array[0]) }) t.Run("Test AND 3a", func(t *testing.T) { rr := NewBitmap() rr.SetCopyOnWrite(true) rr2 := NewBitmap() rr2.SetCopyOnWrite(true) for k := 6 * 65536; k < 6*65536+10000; k++ { rr.AddInt(k) } for k := 6 * 65536; k < 6*65536+1000; k++ { rr2.AddInt(k) } result := And(rr, rr2) assert.EqualValues(t, 1000, result.GetCardinality()) }) t.Run("Test AND 3", func(t *testing.T) { var arrayand [11256]uint32 //393,216 pos := 0 rr := NewBitmap() rr.SetCopyOnWrite(true) for k := 4000; k < 4256; k++ { rr.AddInt(k) } for k := 65536; k < 65536+4000; k++ { rr.AddInt(k) } for k := 3 * 65536; k < 3*65536+1000; k++ { rr.AddInt(k) } for k := 3*65536 + 1000; k < 3*65536+7000; k++ { rr.AddInt(k) } for k := 3*65536 + 7000; k < 3*65536+9000; k++ { rr.AddInt(k) } for k := 4 * 65536; k < 4*65536+7000; k++ { rr.AddInt(k) } for k := 8 * 65536; k < 8*65536+1000; k++ { rr.AddInt(k) } for k := 9 * 65536; k < 9*65536+30000; k++ { rr.AddInt(k) } rr2 := NewBitmap() rr2.SetCopyOnWrite(true) for k := 4000; k < 4256; k++ { rr2.AddInt(k) arrayand[pos] = uint32(k) pos++ } for k := 65536; k < 65536+4000; k++ { rr2.AddInt(k) arrayand[pos] = uint32(k) pos++ } for k := 3*65536 + 1000; k < 3*65536+7000; k++ { rr2.AddInt(k) arrayand[pos] = uint32(k) pos++ } for k := 6 * 65536; k < 6*65536+10000; k++ { rr.AddInt(k) } for k := 6 * 65536; k < 6*65536+1000; k++ { rr2.AddInt(k) arrayand[pos] = uint32(k) pos++ } for k := 7 * 65536; k < 7*65536+1000; k++ { rr2.AddInt(k) } for k := 10 * 65536; k < 10*65536+5000; k++ { rr2.AddInt(k) } rrand := And(rr, rr2) arrayres := rrand.ToArray() ok := true for i := range arrayres { if i < len(arrayand) { if arrayres[i] != arrayand[i] { log.Println(i, arrayres[i], arrayand[i]) ok = false } } else { log.Println('x', arrayres[i]) ok = false } } assert.Equal(t, len(arrayres), len(arrayand)) assert.True(t, ok) }) t.Run("Test AND 4", func(t *testing.T) { rb := NewBitmap() rb.SetCopyOnWrite(true) rb2 := NewBitmap() rb2.SetCopyOnWrite(true) for i := 0; i < 200000; i += 4 { rb2.AddInt(i) } for i := 200000; i < 400000; i += 14 { rb2.AddInt(i) } //TODO: Bitmap.And(bm,bm2) andresult := And(rb, rb2) off := And(rb2, rb) assert.True(t, andresult.Equals(off)) assert.EqualValues(t, 0, andresult.GetCardinality()) for i := 500000; i < 600000; i += 14 { rb.AddInt(i) } for i := 200000; i < 400000; i += 3 { rb2.AddInt(i) } andresult2 := And(rb, rb2) assert.EqualValues(t, 0, andresult.GetCardinality()) assert.EqualValues(t, 0, andresult2.GetCardinality()) for i := 0; i < 200000; i += 4 { rb.AddInt(i) } for i := 200000; i < 400000; i += 14 { rb.AddInt(i) } assert.EqualValues(t, 0, andresult.GetCardinality()) rc := And(rb, rb2) rb.And(rb2) assert.Equal(t, rb.GetCardinality(), rc.GetCardinality()) }) t.Run("ArrayContainerCardinalityTest", func(t *testing.T) { ac := newArrayContainer() for k := uint16(0); k < 100; k++ { ac.iadd(k) assert.EqualValues(t, k+1, ac.getCardinality()) } for k := uint16(0); k < 100; k++ { ac.iadd(k) assert.EqualValues(t, 100, ac.getCardinality()) } }) t.Run("or test", func(t *testing.T) { rr := NewBitmap() rr.SetCopyOnWrite(true) for k := 0; k < 4000; k++ { rr.AddInt(k) } rr2 := NewBitmap() rr2.SetCopyOnWrite(true) for k := 4000; k < 8000; k++ { rr2.AddInt(k) } result := Or(rr, rr2) assert.Equal(t, rr.GetCardinality()+rr2.GetCardinality(), result.GetCardinality()) }) t.Run("basic test", func(t *testing.T) { rr := NewBitmap() rr.SetCopyOnWrite(true) var a [4002]uint32 pos := 0 for k := 0; k < 4000; k++ { rr.AddInt(k) a[pos] = uint32(k) pos++ } rr.AddInt(100000) a[pos] = 100000 pos++ rr.AddInt(110000) a[pos] = 110000 pos++ array := rr.ToArray() ok := true for i := range a { if array[i] != a[i] { log.Println("rr : ", array[i], " a : ", a[i]) ok = false } } assert.Equal(t, len(a), len(array)) assert.True(t, ok) }) t.Run("BitmapContainerCardinalityTest", func(t *testing.T) { ac := newBitmapContainer() for k := uint16(0); k < 100; k++ { ac.iadd(k) assert.EqualValues(t, k+1, ac.getCardinality()) } for k := uint16(0); k < 100; k++ { ac.iadd(k) assert.EqualValues(t, 100, ac.getCardinality()) } }) t.Run("BitmapContainerTest", func(t *testing.T) { rr := newBitmapContainer() rr.iadd(uint16(110)) rr.iadd(uint16(114)) rr.iadd(uint16(115)) var array [3]uint16 pos := 0 for itr := rr.getShortIterator(); itr.hasNext(); { array[pos] = itr.next() pos++ } assert.EqualValues(t, 110, array[0]) assert.EqualValues(t, 114, array[1]) assert.EqualValues(t, 115, array[2]) }) t.Run("cardinality test", func(t *testing.T) { N := 1024 for gap := 7; gap < 100000; gap *= 10 { for offset := 2; offset <= 1024; offset *= 2 { rb := NewBitmap() rb.SetCopyOnWrite(true) for k := 0; k < N; k++ { rb.AddInt(k * gap) assert.EqualValues(t, k+1, rb.GetCardinality()) } assert.EqualValues(t, N, rb.GetCardinality()) // check the add of existing values for k := 0; k < N; k++ { rb.AddInt(k * gap) assert.EqualValues(t, N, rb.GetCardinality()) } rb2 := NewBitmap() rb2.SetCopyOnWrite(true) for k := 0; k < N; k++ { rb2.AddInt(k * gap * offset) assert.EqualValues(t, k+1, rb2.GetCardinality()) } assert.EqualValues(t, N, rb2.GetCardinality()) for k := 0; k < N; k++ { rb2.AddInt(k * gap * offset) assert.EqualValues(t, N, rb2.GetCardinality()) } assert.EqualValues(t, N/offset, And(rb, rb2).GetCardinality()) assert.EqualValues(t, 2*N-2*N/offset, Xor(rb, rb2).GetCardinality()) assert.EqualValues(t, 2*N-N/offset, Or(rb, rb2).GetCardinality()) } } }) t.Run("clear test", func(t *testing.T) { rb := NewBitmap() rb.SetCopyOnWrite(true) for i := 0; i < 200000; i += 7 { // dense rb.AddInt(i) } for i := 200000; i < 400000; i += 177 { // sparse rb.AddInt(i) } rb2 := NewBitmap() rb2.SetCopyOnWrite(true) rb3 := NewBitmap() rb3.SetCopyOnWrite(true) for i := 0; i < 200000; i += 4 { rb2.AddInt(i) } for i := 200000; i < 400000; i += 14 { rb2.AddInt(i) } rb.Clear() assert.EqualValues(t, 0, rb.GetCardinality()) assert.NotEqual(t, 0, rb2.GetCardinality()) rb.AddInt(4) rb3.AddInt(4) andresult := And(rb, rb2) orresult := Or(rb, rb2) assert.EqualValues(t, 1, andresult.GetCardinality()) assert.Equal(t, rb2.GetCardinality(), orresult.GetCardinality()) for i := 0; i < 200000; i += 4 { rb.AddInt(i) rb3.AddInt(i) } for i := 200000; i < 400000; i += 114 { rb.AddInt(i) rb3.AddInt(i) } arrayrr := rb.ToArray() arrayrr3 := rb3.ToArray() ok := true for i := range arrayrr { if arrayrr[i] != arrayrr3[i] { ok = false } } assert.Equal(t, len(arrayrr3), len(arrayrr)) assert.True(t, ok) }) t.Run("constainer factory ", func(t *testing.T) { bc1 := newBitmapContainer() bc2 := newBitmapContainer() bc3 := newBitmapContainer() ac1 := newArrayContainer() ac2 := newArrayContainer() ac3 := newArrayContainer() for i := 0; i < 5000; i++ { bc1.iadd(uint16(i * 70)) } for i := 0; i < 5000; i++ { bc2.iadd(uint16(i * 70)) } for i := 0; i < 5000; i++ { bc3.iadd(uint16(i * 70)) } for i := 0; i < 4000; i++ { ac1.iadd(uint16(i * 50)) } for i := 0; i < 4000; i++ { ac2.iadd(uint16(i * 50)) } for i := 0; i < 4000; i++ { ac3.iadd(uint16(i * 50)) } rbc := ac1.clone().(*arrayContainer).toBitmapContainer() assert.True(t, validate(rbc, ac1)) rbc = ac2.clone().(*arrayContainer).toBitmapContainer() assert.True(t, validate(rbc, ac2)) rbc = ac3.clone().(*arrayContainer).toBitmapContainer() assert.True(t, validate(rbc, ac3)) }) t.Run("flipTest1 ", func(t *testing.T) { rb := NewBitmap() rb.SetCopyOnWrite(true) rb.Flip(100000, 200000) // in-place on empty bitmap rbcard := rb.GetCardinality() assert.EqualValues(t, rbcard, 100000) bs := bitset.New(20000 - 10000) for i := uint(100000); i < 200000; i++ { bs.Set(i) } assert.True(t, equalsBitSet(bs, rb)) }) t.Run("flipTest1A", func(t *testing.T) { rb := NewBitmap() rb.SetCopyOnWrite(true) rb1 := Flip(rb, 100000, 200000) rbcard := rb1.GetCardinality() assert.EqualValues(t, rbcard, 100000) assert.EqualValues(t, rb.GetCardinality(), 0) bs := bitset.New(0) assert.True(t, equalsBitSet(bs, rb)) for i := uint(100000); i < 200000; i++ { bs.Set(i) } assert.True(t, equalsBitSet(bs, rb1)) }) t.Run("flipTest2", func(t *testing.T) { rb := NewBitmap() rb.SetCopyOnWrite(true) rb.Flip(100000, 100000) rbcard := rb.GetCardinality() assert.EqualValues(t, rbcard, 0) bs := bitset.New(0) assert.True(t, equalsBitSet(bs, rb)) }) t.Run("flipTest2A", func(t *testing.T) { rb := NewBitmap() rb.SetCopyOnWrite(true) rb1 := Flip(rb, 100000, 100000) rb.AddInt(1) rbcard := rb1.GetCardinality() assert.EqualValues(t, 0, rbcard) assert.EqualValues(t, rb.GetCardinality(), 1) bs := bitset.New(0) assert.True(t, equalsBitSet(bs, rb1)) bs.Set(1) assert.True(t, equalsBitSet(bs, rb)) }) t.Run("flipTest3A", func(t *testing.T) { rb := NewBitmap() rb.SetCopyOnWrite(true) rb.Flip(100000, 200000) // got 100k-199999 rb.Flip(100000, 199991) // give back 100k-199990 rbcard := rb.GetCardinality() assert.EqualValues(t, 9, rbcard) bs := bitset.New(0) for i := uint(199991); i < 200000; i++ { bs.Set(i) } assert.True(t, equalsBitSet(bs, rb)) }) t.Run("flipTest4A", func(t *testing.T) { // fits evenly on both ends rb := NewBitmap() rb.SetCopyOnWrite(true) rb.Flip(100000, 200000) // got 100k-199999 rb.Flip(65536, 4*65536) rbcard := rb.GetCardinality() // 65536 to 99999 are 1s // 200000 to 262143 are 1s: total card assert.EqualValues(t, 96608, rbcard) bs := bitset.New(0) for i := uint(65536); i < 100000; i++ { bs.Set(i) } for i := uint(200000); i < 262144; i++ { bs.Set(i) } assert.True(t, equalsBitSet(bs, rb)) }) t.Run("flipTest5", func(t *testing.T) { // fits evenly on small end, multiple // containers rb := NewBitmap() rb.SetCopyOnWrite(true) rb.Flip(100000, 132000) rb.Flip(65536, 120000) rbcard := rb.GetCardinality() // 65536 to 99999 are 1s // 120000 to 131999 assert.EqualValues(t, 46464, rbcard) bs := bitset.New(0) for i := uint(65536); i < 100000; i++ { bs.Set(i) } for i := uint(120000); i < 132000; i++ { bs.Set(i) } assert.True(t, equalsBitSet(bs, rb)) }) t.Run("flipTest6", func(t *testing.T) { rb := NewBitmap() rb.SetCopyOnWrite(true) rb1 := Flip(rb, 100000, 132000) rb2 := Flip(rb1, 65536, 120000) //rbcard := rb2.GetCardinality() bs := bitset.New(0) for i := uint(65536); i < 100000; i++ { bs.Set(i) } for i := uint(120000); i < 132000; i++ { bs.Set(i) } assert.True(t, equalsBitSet(bs, rb2)) }) t.Run("flipTest6A", func(t *testing.T) { rb := NewBitmap() rb.SetCopyOnWrite(true) rb1 := Flip(rb, 100000, 132000) rb2 := Flip(rb1, 99000, 2*65536) rbcard := rb2.GetCardinality() assert.EqualValues(t, 1928, rbcard) bs := bitset.New(0) for i := uint(99000); i < 100000; i++ { bs.Set(i) } for i := uint(2 * 65536); i < 132000; i++ { bs.Set(i) } assert.True(t, equalsBitSet(bs, rb2)) }) t.Run("flipTest7", func(t *testing.T) { // within 1 word, first container rb := NewBitmap() rb.SetCopyOnWrite(true) rb.Flip(650, 132000) rb.Flip(648, 651) rbcard := rb.GetCardinality() // 648, 649, 651-131999 assert.EqualValues(t, 132000-651+2, rbcard) bs := bitset.New(0) bs.Set(648) bs.Set(649) for i := uint(651); i < 132000; i++ { bs.Set(i) } assert.True(t, equalsBitSet(bs, rb)) }) t.Run("flipTestBig", func(t *testing.T) { numCases := 1000 rb := NewBitmap() rb.SetCopyOnWrite(true) bs := bitset.New(0) //Random r = new Random(3333); checkTime := 2.0 for i := 0; i < numCases; i++ { start := rand.Intn(65536 * 20) end := rand.Intn(65536 * 20) if rand.Float64() < float64(0.1) { end = start + rand.Intn(100) } rb.Flip(uint64(start), uint64(end)) if start < end { FlipRange(start, end, bs) // throws exception } // otherwise // insert some more ANDs to keep things sparser if rand.Float64() < 0.2 { mask := NewBitmap() mask.SetCopyOnWrite(true) mask1 := bitset.New(0) startM := rand.Intn(65536 * 20) endM := startM + 100000 mask.Flip(uint64(startM), uint64(endM)) FlipRange(startM, endM, mask1) mask.Flip(0, 65536*20+100000) FlipRange(0, 65536*20+100000, mask1) rb.And(mask) bs.InPlaceIntersection(mask1) } // see if we can detect incorrectly shared containers if rand.Float64() < 0.1 { irrelevant := Flip(rb, 10, 100000) irrelevant.Flip(5, 200000) irrelevant.Flip(190000, 260000) } if float64(i) > checkTime { assert.True(t, equalsBitSet(bs, rb)) checkTime *= 1.5 } } }) t.Run("ortest", func(t *testing.T) { rr := NewBitmap() rr.SetCopyOnWrite(true) for k := 0; k < 4000; k++ { rr.AddInt(k) } rr.AddInt(100000) rr.AddInt(110000) rr2 := NewBitmap() rr2.SetCopyOnWrite(true) for k := 0; k < 4000; k++ { rr2.AddInt(k) } rror := Or(rr, rr2) array := rror.ToArray() rr.Or(rr2) arrayirr := rr.ToArray() assert.True(t, IntsEquals(array, arrayirr)) }) t.Run("ORtest", func(t *testing.T) { rr := NewBitmap() rr.SetCopyOnWrite(true) for k := 4000; k < 4256; k++ { rr.AddInt(k) } for k := 65536; k < 65536+4000; k++ { rr.AddInt(k) } for k := 3 * 65536; k < 3*65536+9000; k++ { rr.AddInt(k) } for k := 4 * 65535; k < 4*65535+7000; k++ { rr.AddInt(k) } for k := 6 * 65535; k < 6*65535+10000; k++ { rr.AddInt(k) } for k := 8 * 65535; k < 8*65535+1000; k++ { rr.AddInt(k) } for k := 9 * 65535; k < 9*65535+30000; k++ { rr.AddInt(k) } rr2 := NewBitmap() rr2.SetCopyOnWrite(true) for k := 4000; k < 4256; k++ { rr2.AddInt(k) } for k := 65536; k < 65536+4000; k++ { rr2.AddInt(k) } for k := 3*65536 + 2000; k < 3*65536+6000; k++ { rr2.AddInt(k) } for k := 6 * 65535; k < 6*65535+1000; k++ { rr2.AddInt(k) } for k := 7 * 65535; k < 7*65535+1000; k++ { rr2.AddInt(k) } for k := 10 * 65535; k < 10*65535+5000; k++ { rr2.AddInt(k) } correct := Or(rr, rr2) rr.Or(rr2) assert.True(t, correct.Equals(rr)) }) t.Run("ortest2", func(t *testing.T) { arrayrr := make([]uint32, 4000+4000+2) pos := 0 rr := NewBitmap() rr.SetCopyOnWrite(true) for k := 0; k < 4000; k++ { rr.AddInt(k) arrayrr[pos] = uint32(k) pos++ } rr.AddInt(100000) rr.AddInt(110000) rr2 := NewBitmap() rr2.SetCopyOnWrite(true) for k := 4000; k < 8000; k++ { rr2.AddInt(k) arrayrr[pos] = uint32(k) pos++ } arrayrr[pos] = 100000 pos++ arrayrr[pos] = 110000 pos++ rror := Or(rr, rr2) arrayor := rror.ToArray() assert.True(t, IntsEquals(arrayor, arrayrr)) }) t.Run("ortest3", func(t *testing.T) { V1 := make(map[int]bool) V2 := make(map[int]bool) rr := NewBitmap() rr.SetCopyOnWrite(true) rr2 := NewBitmap() rr2.SetCopyOnWrite(true) for k := 0; k < 4000; k++ { rr2.AddInt(k) V1[k] = true } for k := 3500; k < 4500; k++ { rr.AddInt(k) V1[k] = true } for k := 4000; k < 65000; k++ { rr2.AddInt(k) V1[k] = true } // In the second node of each roaring bitmap, we have two bitmap // containers. // So, we will check the union between two BitmapContainers for k := 65536; k < 65536+10000; k++ { rr.AddInt(k) V1[k] = true } for k := 65536; k < 65536+14000; k++ { rr2.AddInt(k) V1[k] = true } // In the 3rd node of each Roaring Bitmap, we have an // ArrayContainer, so, we will try the union between two // ArrayContainers. for k := 4 * 65535; k < 4*65535+1000; k++ { rr.AddInt(k) V1[k] = true } for k := 4 * 65535; k < 4*65535+800; k++ { rr2.AddInt(k) V1[k] = true } // For the rest, we will check if the union will take them in // the result for k := 6 * 65535; k < 6*65535+1000; k++ { rr.AddInt(k) V1[k] = true } for k := 7 * 65535; k < 7*65535+2000; k++ { rr2.AddInt(k) V1[k] = true } rror := Or(rr, rr2) valide := true for _, k := range rror.ToArray() { _, found := V1[int(k)] if !found { valide = false } V2[int(k)] = true } for k := range V1 { _, found := V2[k] if !found { valide = false } } assert.True(t, valide) }) t.Run("ortest4", func(t *testing.T) { rb := NewBitmap() rb.SetCopyOnWrite(true) rb2 := NewBitmap() rb2.SetCopyOnWrite(true) for i := 0; i < 200000; i += 4 { rb2.AddInt(i) } for i := 200000; i < 400000; i += 14 { rb2.AddInt(i) } rb2card := rb2.GetCardinality() // check or against an empty bitmap orresult := Or(rb, rb2) off := Or(rb2, rb) assert.True(t, orresult.Equals(off)) assert.Equal(t, orresult.GetCardinality(), rb2card) for i := 500000; i < 600000; i += 14 { rb.AddInt(i) } for i := 200000; i < 400000; i += 3 { rb2.AddInt(i) } // check or against an empty bitmap orresult2 := Or(rb, rb2) assert.Equal(t, orresult.GetCardinality(), rb2card) assert.Equal(t, rb2.GetCardinality()+rb.GetCardinality(), orresult2.GetCardinality()) rb.Or(rb2) assert.True(t, rb.Equals(orresult2)) }) t.Run("randomTest", func(t *testing.T) { rTestCOW(t, 15) rTestCOW(t, 1024) rTestCOW(t, 4096) rTestCOW(t, 65536) rTestCOW(t, 65536*16) }) t.Run("SimpleCardinality", func(t *testing.T) { N := 512 gap := 70 rb := NewBitmap() rb.SetCopyOnWrite(true) for k := 0; k < N; k++ { rb.AddInt(k * gap) assert.EqualValues(t, k+1, rb.GetCardinality()) } assert.EqualValues(t, N, rb.GetCardinality()) for k := 0; k < N; k++ { rb.AddInt(k * gap) assert.EqualValues(t, N, rb.GetCardinality()) } }) t.Run("XORtest", func(t *testing.T) { rr := NewBitmap() rr.SetCopyOnWrite(true) for k := 4000; k < 4256; k++ { rr.AddInt(k) } for k := 65536; k < 65536+4000; k++ { rr.AddInt(k) } for k := 3 * 65536; k < 3*65536+9000; k++ { rr.AddInt(k) } for k := 4 * 65535; k < 4*65535+7000; k++ { rr.AddInt(k) } for k := 6 * 65535; k < 6*65535+10000; k++ { rr.AddInt(k) } for k := 8 * 65535; k < 8*65535+1000; k++ { rr.AddInt(k) } for k := 9 * 65535; k < 9*65535+30000; k++ { rr.AddInt(k) } rr2 := NewBitmap() rr2.SetCopyOnWrite(true) for k := 4000; k < 4256; k++ { rr2.AddInt(k) } for k := 65536; k < 65536+4000; k++ { rr2.AddInt(k) } for k := 3*65536 + 2000; k < 3*65536+6000; k++ { rr2.AddInt(k) } for k := 6 * 65535; k < 6*65535+1000; k++ { rr2.AddInt(k) } for k := 7 * 65535; k < 7*65535+1000; k++ { rr2.AddInt(k) } for k := 10 * 65535; k < 10*65535+5000; k++ { rr2.AddInt(k) } correct := Xor(rr, rr2) rr.Xor(rr2) assert.True(t, correct.Equals(rr)) }) t.Run("xortest1", func(t *testing.T) { V1 := make(map[int]bool) V2 := make(map[int]bool) rr := NewBitmap() rr.SetCopyOnWrite(true) rr2 := NewBitmap() rr2.SetCopyOnWrite(true) // For the first 65536: rr2 has a bitmap container, and rr has // an array container. // We will check the union between a BitmapCintainer and an // arrayContainer for k := 0; k < 4000; k++ { rr2.AddInt(k) if k < 3500 { V1[k] = true } } for k := 3500; k < 4500; k++ { rr.AddInt(k) } for k := 4000; k < 65000; k++ { rr2.AddInt(k) if k >= 4500 { V1[k] = true } } for k := 65536; k < 65536+30000; k++ { rr.AddInt(k) } for k := 65536; k < 65536+50000; k++ { rr2.AddInt(k) if k >= 65536+30000 { V1[k] = true } } // In the 3rd node of each Roaring Bitmap, we have an // ArrayContainer. So, we will try the union between two // ArrayContainers. for k := 4 * 65535; k < 4*65535+1000; k++ { rr.AddInt(k) if k >= (4*65535 + 800) { V1[k] = true } } for k := 4 * 65535; k < 4*65535+800; k++ { rr2.AddInt(k) } for k := 6 * 65535; k < 6*65535+1000; k++ { rr.AddInt(k) V1[k] = true } for k := 7 * 65535; k < 7*65535+2000; k++ { rr2.AddInt(k) V1[k] = true } rrxor := Xor(rr, rr2) valide := true for _, i := range rrxor.ToArray() { _, found := V1[int(i)] if !found { valide = false } V2[int(i)] = true } for k := range V1 { _, found := V2[k] if !found { valide = false } } assert.True(t, valide) }) } func TestXORtest4COW(t *testing.T) { rb := NewBitmap() rb.SetCopyOnWrite(true) rb2 := NewBitmap() rb2.SetCopyOnWrite(true) counter := 0 for i := 0; i < 200000; i += 4 { rb2.AddInt(i) counter++ } assert.EqualValues(t, counter, rb2.GetCardinality()) for i := 200000; i < 400000; i += 14 { rb2.AddInt(i) counter++ } assert.EqualValues(t, counter, rb2.GetCardinality()) rb2card := rb2.GetCardinality() assert.EqualValues(t, counter, rb2card) // check or against an empty bitmap xorresult := Xor(rb, rb2) assert.EqualValues(t, counter, xorresult.GetCardinality()) off := Or(rb2, rb) assert.EqualValues(t, counter, off.GetCardinality()) assert.True(t, xorresult.Equals(off)) assert.Equal(t, xorresult.GetCardinality(), rb2card) for i := 500000; i < 600000; i += 14 { rb.AddInt(i) } for i := 200000; i < 400000; i += 3 { rb2.AddInt(i) } // check or against an empty bitmap xorresult2 := Xor(rb, rb2) assert.Equal(t, xorresult.GetCardinality(), rb2card) assert.Equal(t, xorresult2.GetCardinality(), rb2.GetCardinality()+rb.GetCardinality()) rb.Xor(rb2) assert.True(t, xorresult2.Equals(rb)) //need to add the massives } func TestBigRandomCOW(t *testing.T) { t.Run("randomTest", func(t *testing.T) { rTestCOW(t, 15) rTestCOW(t, 100) rTestCOW(t, 512) rTestCOW(t, 1023) rTestCOW(t, 1025) rTestCOW(t, 4095) rTestCOW(t, 4096) rTestCOW(t, 4097) rTestCOW(t, 65536) rTestCOW(t, 65536*16) }) } func rTestCOW(t *testing.T, N int) { log.Println("rtest N=", N) for gap := 1; gap <= 65536; gap *= 2 { bs1 := bitset.New(0) rb1 := NewBitmap() rb1.SetCopyOnWrite(true) for x := 0; x <= N; x += gap { bs1.Set(uint(x)) rb1.AddInt(x) } assert.EqualValues(t, rb1.GetCardinality(), bs1.Count()) assert.True(t, equalsBitSet(bs1, rb1)) for offset := 1; offset <= gap; offset *= 2 { bs2 := bitset.New(0) rb2 := NewBitmap() rb2.SetCopyOnWrite(true) for x := 0; x <= N; x += gap { bs2.Set(uint(x + offset)) rb2.AddInt(x + offset) } assert.EqualValues(t, rb2.GetCardinality(), bs2.Count()) assert.True(t, equalsBitSet(bs2, rb2)) clonebs1 := bs1.Clone() clonebs1.InPlaceIntersection(bs2) if !equalsBitSet(clonebs1, And(rb1, rb2)) { v := rb1.Clone() v.And(rb2) assert.True(t, equalsBitSet(clonebs1, v)) } // testing OR clonebs1 = bs1.Clone() clonebs1.InPlaceUnion(bs2) assert.True(t, equalsBitSet(clonebs1, Or(rb1, rb2))) // testing XOR clonebs1 = bs1.Clone() clonebs1.InPlaceSymmetricDifference(bs2) assert.True(t, equalsBitSet(clonebs1, Xor(rb1, rb2))) //testing NOTAND clonebs1 = bs1.Clone() clonebs1.InPlaceDifference(bs2) assert.True(t, equalsBitSet(clonebs1, AndNot(rb1, rb2))) } } } func TestRoaringArrayCOW(t *testing.T) { a := newRoaringArray() t.Run("Test Init", func(t *testing.T) { assert.Equal(t, 0, a.size()) }) t.Run("Test Insert", func(t *testing.T) { a.appendContainer(0, newArrayContainer(), false) assert.Equal(t, 1, a.size()) }) t.Run("Test Remove", func(t *testing.T) { a.remove(0) assert.Equal(t, 0, a.size()) }) t.Run("Test popcount Full", func(t *testing.T) { res := popcount(uint64(0xffffffffffffffff)) assert.EqualValues(t, 64, res) }) t.Run("Test popcount Empty", func(t *testing.T) { res := popcount(0) assert.EqualValues(t, 0, res) }) t.Run("Test popcount 16", func(t *testing.T) { res := popcount(0xff00ff) assert.EqualValues(t, 16, res) }) t.Run("Test ArrayContainer Add", func(t *testing.T) { ar := newArrayContainer() ar.iadd(1) assert.EqualValues(t, 1, ar.getCardinality()) }) t.Run("Test ArrayContainer Add wacky", func(t *testing.T) { ar := newArrayContainer() ar.iadd(0) ar.iadd(5000) assert.EqualValues(t, 2, ar.getCardinality()) }) t.Run("Test ArrayContainer Add Reverse", func(t *testing.T) { ar := newArrayContainer() ar.iadd(5000) ar.iadd(2048) ar.iadd(0) assert.EqualValues(t, 3, ar.getCardinality()) }) t.Run("Test BitmapContainer Add ", func(t *testing.T) { bm := newBitmapContainer() bm.iadd(0) assert.EqualValues(t, 1, bm.getCardinality()) }) } func TestFlipBigACOW(t *testing.T) { numCases := 1000 bs := bitset.New(0) checkTime := 2.0 rb1 := NewBitmap() rb1.SetCopyOnWrite(true) rb2 := NewBitmap() rb2.SetCopyOnWrite(true) for i := 0; i < numCases; i++ { start := rand.Intn(65536 * 20) end := rand.Intn(65536 * 20) if rand.Float64() < 0.1 { end = start + rand.Intn(100) } if (i & 1) == 0 { rb2 = FlipInt(rb1, start, end) // tweak the other, catch bad sharing rb1.FlipInt(rand.Intn(65536*20), rand.Intn(65536*20)) } else { rb1 = FlipInt(rb2, start, end) rb2.FlipInt(rand.Intn(65536*20), rand.Intn(65536*20)) } if start < end { FlipRange(start, end, bs) // throws exception } // otherwise // insert some more ANDs to keep things sparser if (rand.Float64() < 0.2) && (i&1) == 0 { mask := NewBitmap() mask.SetCopyOnWrite(true) mask1 := bitset.New(0) startM := rand.Intn(65536 * 20) endM := startM + 100000 mask.FlipInt(startM, endM) FlipRange(startM, endM, mask1) mask.FlipInt(0, 65536*20+100000) FlipRange(0, 65536*20+100000, mask1) rb2.And(mask) bs.InPlaceIntersection(mask1) } if float64(i) > checkTime { var rb *Bitmap if (i & 1) == 0 { rb = rb2 } else { rb = rb1 } assert.True(t, equalsBitSet(bs, rb)) checkTime *= 1.5 } } } func TestDoubleAddCOW(t *testing.T) { t.Run("doubleadd ", func(t *testing.T) { rb := NewBitmap() rb.SetCopyOnWrite(true) rb.AddRange(65533, 65536) rb.AddRange(65530, 65536) rb2 := NewBitmap() rb2.SetCopyOnWrite(true) rb2.AddRange(65530, 65536) assert.True(t, rb.Equals(rb2)) rb2.RemoveRange(65530, 65536) assert.EqualValues(t, 0, rb2.GetCardinality()) }) t.Run("doubleadd2 ", func(t *testing.T) { rb := NewBitmap() rb.SetCopyOnWrite(true) rb.AddRange(65533, 65536*20) rb.AddRange(65530, 65536*20) rb2 := NewBitmap() rb2.SetCopyOnWrite(true) rb2.AddRange(65530, 65536*20) assert.True(t, rb.Equals(rb2)) rb2.RemoveRange(65530, 65536*20) assert.EqualValues(t, 0, rb2.GetCardinality()) }) t.Run("doubleadd3 ", func(t *testing.T) { rb := NewBitmap() rb.SetCopyOnWrite(true) rb.AddRange(65533, 65536*20+10) rb.AddRange(65530, 65536*20+10) rb2 := NewBitmap() rb2.SetCopyOnWrite(true) rb2.AddRange(65530, 65536*20+10) assert.True(t, rb.Equals(rb2)) rb2.RemoveRange(65530, 65536*20+1) assert.EqualValues(t, 9, rb2.GetCardinality()) }) t.Run("doubleadd4 ", func(t *testing.T) { rb := NewBitmap() rb.SetCopyOnWrite(true) rb.AddRange(65533, 65536*20) rb.RemoveRange(65533+5, 65536*20) assert.EqualValues(t, 5, rb.GetCardinality()) }) t.Run("doubleadd5 ", func(t *testing.T) { rb := NewBitmap() rb.SetCopyOnWrite(true) rb.AddRange(65533, 65536*20) rb.RemoveRange(65533+5, 65536*20-5) assert.EqualValues(t, 10, rb.GetCardinality()) }) t.Run("doubleadd6 ", func(t *testing.T) { rb := NewBitmap() rb.SetCopyOnWrite(true) rb.AddRange(65533, 65536*20-5) rb.RemoveRange(65533+5, 65536*20-10) assert.EqualValues(t, 10, rb.GetCardinality()) }) t.Run("doubleadd7 ", func(t *testing.T) { rb := NewBitmap() rb.SetCopyOnWrite(true) rb.AddRange(65533, 65536*20+1) rb.RemoveRange(65533+1, 65536*20) assert.EqualValues(t, 2, rb.GetCardinality()) }) t.Run("AndNotBug01 ", func(t *testing.T) { rb1 := NewBitmap() rb1.SetCopyOnWrite(true) rb1.AddRange(0, 60000) rb2 := NewBitmap() rb2.SetCopyOnWrite(true) rb2.AddRange(60000-10, 60000+10) rb2.AndNot(rb1) rb3 := NewBitmap() rb3.SetCopyOnWrite(true) rb3.AddRange(60000, 60000+10) assert.True(t, rb2.Equals(rb3)) }) } func TestAndNotCOW(t *testing.T) { rr := NewBitmap() rr.SetCopyOnWrite(true) for k := 4000; k < 4256; k++ { rr.AddInt(k) } for k := 65536; k < 65536+4000; k++ { rr.AddInt(k) } for k := 3 * 65536; k < 3*65536+9000; k++ { rr.AddInt(k) } for k := 4 * 65535; k < 4*65535+7000; k++ { rr.AddInt(k) } for k := 6 * 65535; k < 6*65535+10000; k++ { rr.AddInt(k) } for k := 8 * 65535; k < 8*65535+1000; k++ { rr.AddInt(k) } for k := 9 * 65535; k < 9*65535+30000; k++ { rr.AddInt(k) } rr2 := NewBitmap() rr2.SetCopyOnWrite(true) for k := 4000; k < 4256; k++ { rr2.AddInt(k) } for k := 65536; k < 65536+4000; k++ { rr2.AddInt(k) } for k := 3*65536 + 2000; k < 3*65536+6000; k++ { rr2.AddInt(k) } for k := 6 * 65535; k < 6*65535+1000; k++ { rr2.AddInt(k) } for k := 7 * 65535; k < 7*65535+1000; k++ { rr2.AddInt(k) } for k := 10 * 65535; k < 10*65535+5000; k++ { rr2.AddInt(k) } correct := AndNot(rr, rr2) rr.AndNot(rr2) assert.True(t, correct.Equals(rr)) } func TestStatsCOW(t *testing.T) { t.Run("Test Stats with empty bitmap", func(t *testing.T) { expectedStats := Statistics{} rr := NewBitmap() rr.SetCopyOnWrite(true) assert.EqualValues(t, expectedStats, rr.Stats()) }) t.Run("Test Stats with bitmap Container", func(t *testing.T) { // Given a bitmap that should have a single bitmap container expectedStats := Statistics{ Cardinality: 60000, Containers: 1, BitmapContainers: 1, BitmapContainerValues: 60000, BitmapContainerBytes: 8192, RunContainers: 0, RunContainerBytes: 0, RunContainerValues: 0, } rr := NewBitmap() rr.SetCopyOnWrite(true) for i := uint32(0); i < 60000; i++ { rr.Add(i) } assert.EqualValues(t, expectedStats, rr.Stats()) }) t.Run("Test Stats with run Container", func(t *testing.T) { // Given that we should have a single run container intSize := int(unsafe.Sizeof(int(0))) var runContainerBytes uint64 if intSize == 4 { runContainerBytes = 40 } else { runContainerBytes = 52 } expectedStats := Statistics{ Cardinality: 60000, Containers: 1, BitmapContainers: 0, BitmapContainerValues: 0, BitmapContainerBytes: 0, RunContainers: 1, RunContainerBytes: runContainerBytes, RunContainerValues: 60000, } rr := NewBitmap() rr.SetCopyOnWrite(true) rr.AddRange(0, 60000) assert.EqualValues(t, expectedStats, rr.Stats()) }) t.Run("Test Stats with Array Container", func(t *testing.T) { // Given a bitmap that should have a single array container expectedStats := Statistics{ Cardinality: 2, Containers: 1, ArrayContainers: 1, ArrayContainerValues: 2, ArrayContainerBytes: 4, } rr := NewBitmap() rr.SetCopyOnWrite(true) rr.Add(2) rr.Add(4) assert.EqualValues(t, expectedStats, rr.Stats()) }) } func TestFlipVerySmallCOW(t *testing.T) { rb := NewBitmap() rb.SetCopyOnWrite(true) rb.Flip(0, 10) // got [0,9], card is 10 rb.Flip(0, 1) // give back the number 0, card goes to 9 rbcard := rb.GetCardinality() assert.EqualValues(t, 9, rbcard) } func TestCloneCOWContainers(t *testing.T) { rb := NewBitmap() rb.AddRange(0, 3000) buf := &bytes.Buffer{} rb.WriteTo(buf) newRb1 := NewBitmap() newRb1.FromBuffer(buf.Bytes()) newRb1.CloneCopyOnWriteContainers() rb2 := NewBitmap() rb2.AddRange(3000, 6000) buf.Reset() rb2.WriteTo(buf) assert.EqualValues(t, rb.ToArray(), newRb1.ToArray()) } roaring-0.4.21/runcontainer.go 0000664 0000000 0000000 00000176710 13542657257 0016331 0 ustar 00root root 0000000 0000000 package roaring // // Copyright (c) 2016 by the roaring authors. // Licensed under the Apache License, Version 2.0. // // We derive a few lines of code from the sort.Search // function in the golang standard library. That function // is Copyright 2009 The Go Authors, and licensed // under the following BSD-style license. /* Copyright (c) 2009 The Go Authors. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of Google Inc. nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ import ( "fmt" "sort" "unsafe" ) //go:generate msgp -unexported // runContainer16 does run-length encoding of sets of // uint16 integers. type runContainer16 struct { iv []interval16 card int64 // avoid allocation during search myOpts searchOptions `msg:"-"` } // interval16 is the internal to runContainer16 // structure that maintains the individual [start, last] // closed intervals. type interval16 struct { start uint16 length uint16 // length minus 1 } func newInterval16Range(start, last uint16) interval16 { if last < start { panic(fmt.Sprintf("last (%d) cannot be smaller than start (%d)", last, start)) } return interval16{ start, last - start, } } // runlen returns the count of integers in the interval. func (iv interval16) runlen() int64 { return int64(iv.length) + 1 } func (iv interval16) last() uint16 { return iv.start + iv.length } // String produces a human viewable string of the contents. func (iv interval16) String() string { return fmt.Sprintf("[%d, %d]", iv.start, iv.length) } func ivalString16(iv []interval16) string { var s string var j int var p interval16 for j, p = range iv { s += fmt.Sprintf("%v:[%d, %d], ", j, p.start, p.last()) } return s } // String produces a human viewable string of the contents. func (rc *runContainer16) String() string { if len(rc.iv) == 0 { return "runContainer16{}" } is := ivalString16(rc.iv) return `runContainer16{` + is + `}` } // uint16Slice is a sort.Sort convenience method type uint16Slice []uint16 // Len returns the length of p. func (p uint16Slice) Len() int { return len(p) } // Less returns p[i] < p[j] func (p uint16Slice) Less(i, j int) bool { return p[i] < p[j] } // Swap swaps elements i and j. func (p uint16Slice) Swap(i, j int) { p[i], p[j] = p[j], p[i] } //msgp:ignore addHelper // addHelper helps build a runContainer16. type addHelper16 struct { runstart uint16 runlen uint16 actuallyAdded uint16 m []interval16 rc *runContainer16 } func (ah *addHelper16) storeIval(runstart, runlen uint16) { mi := interval16{start: runstart, length: runlen} ah.m = append(ah.m, mi) } func (ah *addHelper16) add(cur, prev uint16, i int) { if cur == prev+1 { ah.runlen++ ah.actuallyAdded++ } else { if cur < prev { panic(fmt.Sprintf("newRunContainer16FromVals sees "+ "unsorted vals; vals[%v]=cur=%v < prev=%v. Sort your vals"+ " before calling us with alreadySorted == true.", i, cur, prev)) } if cur == prev { // ignore duplicates } else { ah.actuallyAdded++ ah.storeIval(ah.runstart, ah.runlen) ah.runstart = cur ah.runlen = 0 } } } // newRunContainerRange makes a new container made of just the specified closed interval [rangestart,rangelast] func newRunContainer16Range(rangestart uint16, rangelast uint16) *runContainer16 { rc := &runContainer16{} rc.iv = append(rc.iv, newInterval16Range(rangestart, rangelast)) return rc } // newRunContainer16FromVals makes a new container from vals. // // For efficiency, vals should be sorted in ascending order. // Ideally vals should not contain duplicates, but we detect and // ignore them. If vals is already sorted in ascending order, then // pass alreadySorted = true. Otherwise, for !alreadySorted, // we will sort vals before creating a runContainer16 of them. // We sort the original vals, so this will change what the // caller sees in vals as a side effect. func newRunContainer16FromVals(alreadySorted bool, vals ...uint16) *runContainer16 { // keep this in sync with newRunContainer16FromArray below rc := &runContainer16{} ah := addHelper16{rc: rc} if !alreadySorted { sort.Sort(uint16Slice(vals)) } n := len(vals) var cur, prev uint16 switch { case n == 0: // nothing more case n == 1: ah.m = append(ah.m, newInterval16Range(vals[0], vals[0])) ah.actuallyAdded++ default: ah.runstart = vals[0] ah.actuallyAdded++ for i := 1; i < n; i++ { prev = vals[i-1] cur = vals[i] ah.add(cur, prev, i) } ah.storeIval(ah.runstart, ah.runlen) } rc.iv = ah.m rc.card = int64(ah.actuallyAdded) return rc } // newRunContainer16FromBitmapContainer makes a new run container from bc, // somewhat efficiently. For reference, see the Java // https://github.com/RoaringBitmap/RoaringBitmap/blob/master/src/main/java/org/roaringbitmap/RunContainer.java#L145-L192 func newRunContainer16FromBitmapContainer(bc *bitmapContainer) *runContainer16 { rc := &runContainer16{} nbrRuns := bc.numberOfRuns() if nbrRuns == 0 { return rc } rc.iv = make([]interval16, nbrRuns) longCtr := 0 // index of current long in bitmap curWord := bc.bitmap[0] // its value runCount := 0 for { // potentially multiword advance to first 1 bit for curWord == 0 && longCtr < len(bc.bitmap)-1 { longCtr++ curWord = bc.bitmap[longCtr] } if curWord == 0 { // wrap up, no more runs return rc } localRunStart := countTrailingZeros(curWord) runStart := localRunStart + 64*longCtr // stuff 1s into number's LSBs curWordWith1s := curWord | (curWord - 1) // find the next 0, potentially in a later word runEnd := 0 for curWordWith1s == maxWord && longCtr < len(bc.bitmap)-1 { longCtr++ curWordWith1s = bc.bitmap[longCtr] } if curWordWith1s == maxWord { // a final unterminated run of 1s runEnd = wordSizeInBits + longCtr*64 rc.iv[runCount].start = uint16(runStart) rc.iv[runCount].length = uint16(runEnd) - uint16(runStart) - 1 return rc } localRunEnd := countTrailingZeros(^curWordWith1s) runEnd = localRunEnd + longCtr*64 rc.iv[runCount].start = uint16(runStart) rc.iv[runCount].length = uint16(runEnd) - 1 - uint16(runStart) runCount++ // now, zero out everything right of runEnd. curWord = curWordWith1s & (curWordWith1s + 1) // We've lathered and rinsed, so repeat... } } // // newRunContainer16FromArray populates a new // runContainer16 from the contents of arr. // func newRunContainer16FromArray(arr *arrayContainer) *runContainer16 { // keep this in sync with newRunContainer16FromVals above rc := &runContainer16{} ah := addHelper16{rc: rc} n := arr.getCardinality() var cur, prev uint16 switch { case n == 0: // nothing more case n == 1: ah.m = append(ah.m, newInterval16Range(arr.content[0], arr.content[0])) ah.actuallyAdded++ default: ah.runstart = arr.content[0] ah.actuallyAdded++ for i := 1; i < n; i++ { prev = arr.content[i-1] cur = arr.content[i] ah.add(cur, prev, i) } ah.storeIval(ah.runstart, ah.runlen) } rc.iv = ah.m rc.card = int64(ah.actuallyAdded) return rc } // set adds the integers in vals to the set. Vals // must be sorted in increasing order; if not, you should set // alreadySorted to false, and we will sort them in place for you. // (Be aware of this side effect -- it will affect the callers // view of vals). // // If you have a small number of additions to an already // big runContainer16, calling Add() may be faster. func (rc *runContainer16) set(alreadySorted bool, vals ...uint16) { rc2 := newRunContainer16FromVals(alreadySorted, vals...) un := rc.union(rc2) rc.iv = un.iv rc.card = 0 } // canMerge returns true iff the intervals // a and b either overlap or they are // contiguous and so can be merged into // a single interval. func canMerge16(a, b interval16) bool { if int64(a.last())+1 < int64(b.start) { return false } return int64(b.last())+1 >= int64(a.start) } // haveOverlap differs from canMerge in that // it tells you if the intersection of a // and b would contain an element (otherwise // it would be the empty set, and we return // false). func haveOverlap16(a, b interval16) bool { if int64(a.last())+1 <= int64(b.start) { return false } return int64(b.last())+1 > int64(a.start) } // mergeInterval16s joins a and b into a // new interval, and panics if it cannot. func mergeInterval16s(a, b interval16) (res interval16) { if !canMerge16(a, b) { panic(fmt.Sprintf("cannot merge %#v and %#v", a, b)) } if b.start < a.start { res.start = b.start } else { res.start = a.start } if b.last() > a.last() { res.length = b.last() - res.start } else { res.length = a.last() - res.start } return } // intersectInterval16s returns the intersection // of a and b. The isEmpty flag will be true if // a and b were disjoint. func intersectInterval16s(a, b interval16) (res interval16, isEmpty bool) { if !haveOverlap16(a, b) { isEmpty = true return } if b.start > a.start { res.start = b.start } else { res.start = a.start } bEnd := b.last() aEnd := a.last() var resEnd uint16 if bEnd < aEnd { resEnd = bEnd } else { resEnd = aEnd } res.length = resEnd - res.start return } // union merges two runContainer16s, producing // a new runContainer16 with the union of rc and b. func (rc *runContainer16) union(b *runContainer16) *runContainer16 { // rc is also known as 'a' here, but golint insisted we // call it rc for consistency with the rest of the methods. var m []interval16 alim := int64(len(rc.iv)) blim := int64(len(b.iv)) var na int64 // next from a var nb int64 // next from b // merged holds the current merge output, which might // get additional merges before being appended to m. var merged interval16 var mergedUsed bool // is merged being used at the moment? var cura interval16 // currently considering this interval16 from a var curb interval16 // currently considering this interval16 from b pass := 0 for na < alim && nb < blim { pass++ cura = rc.iv[na] curb = b.iv[nb] if mergedUsed { mergedUpdated := false if canMerge16(cura, merged) { merged = mergeInterval16s(cura, merged) na = rc.indexOfIntervalAtOrAfter(int64(merged.last())+1, na+1) mergedUpdated = true } if canMerge16(curb, merged) { merged = mergeInterval16s(curb, merged) nb = b.indexOfIntervalAtOrAfter(int64(merged.last())+1, nb+1) mergedUpdated = true } if !mergedUpdated { // we know that merged is disjoint from cura and curb m = append(m, merged) mergedUsed = false } continue } else { // !mergedUsed if !canMerge16(cura, curb) { if cura.start < curb.start { m = append(m, cura) na++ } else { m = append(m, curb) nb++ } } else { merged = mergeInterval16s(cura, curb) mergedUsed = true na = rc.indexOfIntervalAtOrAfter(int64(merged.last())+1, na+1) nb = b.indexOfIntervalAtOrAfter(int64(merged.last())+1, nb+1) } } } var aDone, bDone bool if na >= alim { aDone = true } if nb >= blim { bDone = true } // finish by merging anything remaining into merged we can: if mergedUsed { if !aDone { aAdds: for na < alim { cura = rc.iv[na] if canMerge16(cura, merged) { merged = mergeInterval16s(cura, merged) na = rc.indexOfIntervalAtOrAfter(int64(merged.last())+1, na+1) } else { break aAdds } } } if !bDone { bAdds: for nb < blim { curb = b.iv[nb] if canMerge16(curb, merged) { merged = mergeInterval16s(curb, merged) nb = b.indexOfIntervalAtOrAfter(int64(merged.last())+1, nb+1) } else { break bAdds } } } m = append(m, merged) } if na < alim { m = append(m, rc.iv[na:]...) } if nb < blim { m = append(m, b.iv[nb:]...) } res := &runContainer16{iv: m} return res } // unionCardinality returns the cardinality of the merger of two runContainer16s, the union of rc and b. func (rc *runContainer16) unionCardinality(b *runContainer16) uint64 { // rc is also known as 'a' here, but golint insisted we // call it rc for consistency with the rest of the methods. answer := uint64(0) alim := int64(len(rc.iv)) blim := int64(len(b.iv)) var na int64 // next from a var nb int64 // next from b // merged holds the current merge output, which might // get additional merges before being appended to m. var merged interval16 var mergedUsed bool // is merged being used at the moment? var cura interval16 // currently considering this interval16 from a var curb interval16 // currently considering this interval16 from b pass := 0 for na < alim && nb < blim { pass++ cura = rc.iv[na] curb = b.iv[nb] if mergedUsed { mergedUpdated := false if canMerge16(cura, merged) { merged = mergeInterval16s(cura, merged) na = rc.indexOfIntervalAtOrAfter(int64(merged.last())+1, na+1) mergedUpdated = true } if canMerge16(curb, merged) { merged = mergeInterval16s(curb, merged) nb = b.indexOfIntervalAtOrAfter(int64(merged.last())+1, nb+1) mergedUpdated = true } if !mergedUpdated { // we know that merged is disjoint from cura and curb //m = append(m, merged) answer += uint64(merged.last()) - uint64(merged.start) + 1 mergedUsed = false } continue } else { // !mergedUsed if !canMerge16(cura, curb) { if cura.start < curb.start { answer += uint64(cura.last()) - uint64(cura.start) + 1 //m = append(m, cura) na++ } else { answer += uint64(curb.last()) - uint64(curb.start) + 1 //m = append(m, curb) nb++ } } else { merged = mergeInterval16s(cura, curb) mergedUsed = true na = rc.indexOfIntervalAtOrAfter(int64(merged.last())+1, na+1) nb = b.indexOfIntervalAtOrAfter(int64(merged.last())+1, nb+1) } } } var aDone, bDone bool if na >= alim { aDone = true } if nb >= blim { bDone = true } // finish by merging anything remaining into merged we can: if mergedUsed { if !aDone { aAdds: for na < alim { cura = rc.iv[na] if canMerge16(cura, merged) { merged = mergeInterval16s(cura, merged) na = rc.indexOfIntervalAtOrAfter(int64(merged.last())+1, na+1) } else { break aAdds } } } if !bDone { bAdds: for nb < blim { curb = b.iv[nb] if canMerge16(curb, merged) { merged = mergeInterval16s(curb, merged) nb = b.indexOfIntervalAtOrAfter(int64(merged.last())+1, nb+1) } else { break bAdds } } } //m = append(m, merged) answer += uint64(merged.last()) - uint64(merged.start) + 1 } for _, r := range rc.iv[na:] { answer += uint64(r.last()) - uint64(r.start) + 1 } for _, r := range b.iv[nb:] { answer += uint64(r.last()) - uint64(r.start) + 1 } return answer } // indexOfIntervalAtOrAfter is a helper for union. func (rc *runContainer16) indexOfIntervalAtOrAfter(key int64, startIndex int64) int64 { rc.myOpts.startIndex = startIndex rc.myOpts.endxIndex = 0 w, already, _ := rc.search(key, &rc.myOpts) if already { return w } return w + 1 } // intersect returns a new runContainer16 holding the // intersection of rc (also known as 'a') and b. func (rc *runContainer16) intersect(b *runContainer16) *runContainer16 { a := rc numa := int64(len(a.iv)) numb := int64(len(b.iv)) res := &runContainer16{} if numa == 0 || numb == 0 { return res } if numa == 1 && numb == 1 { if !haveOverlap16(a.iv[0], b.iv[0]) { return res } } var output []interval16 var acuri int64 var bcuri int64 astart := int64(a.iv[acuri].start) bstart := int64(b.iv[bcuri].start) var intersection interval16 var leftoverstart int64 var isOverlap, isLeftoverA, isLeftoverB bool var done bool toploop: for acuri < numa && bcuri < numb { isOverlap, isLeftoverA, isLeftoverB, leftoverstart, intersection = intersectWithLeftover16(astart, int64(a.iv[acuri].last()), bstart, int64(b.iv[bcuri].last())) if !isOverlap { switch { case astart < bstart: acuri, done = a.findNextIntervalThatIntersectsStartingFrom(acuri+1, bstart) if done { break toploop } astart = int64(a.iv[acuri].start) case astart > bstart: bcuri, done = b.findNextIntervalThatIntersectsStartingFrom(bcuri+1, astart) if done { break toploop } bstart = int64(b.iv[bcuri].start) //default: // panic("impossible that astart == bstart, since !isOverlap") } } else { // isOverlap output = append(output, intersection) switch { case isLeftoverA: // note that we change astart without advancing acuri, // since we need to capture any 2ndary intersections with a.iv[acuri] astart = leftoverstart bcuri++ if bcuri >= numb { break toploop } bstart = int64(b.iv[bcuri].start) case isLeftoverB: // note that we change bstart without advancing bcuri, // since we need to capture any 2ndary intersections with b.iv[bcuri] bstart = leftoverstart acuri++ if acuri >= numa { break toploop } astart = int64(a.iv[acuri].start) default: // neither had leftover, both completely consumed // optionally, assert for sanity: //if a.iv[acuri].endx != b.iv[bcuri].endx { // panic("huh? should only be possible that endx agree now!") //} // advance to next a interval acuri++ if acuri >= numa { break toploop } astart = int64(a.iv[acuri].start) // advance to next b interval bcuri++ if bcuri >= numb { break toploop } bstart = int64(b.iv[bcuri].start) } } } // end for toploop if len(output) == 0 { return res } res.iv = output return res } // intersectCardinality returns the cardinality of the // intersection of rc (also known as 'a') and b. func (rc *runContainer16) intersectCardinality(b *runContainer16) int64 { answer := int64(0) a := rc numa := int64(len(a.iv)) numb := int64(len(b.iv)) if numa == 0 || numb == 0 { return 0 } if numa == 1 && numb == 1 { if !haveOverlap16(a.iv[0], b.iv[0]) { return 0 } } var acuri int64 var bcuri int64 astart := int64(a.iv[acuri].start) bstart := int64(b.iv[bcuri].start) var intersection interval16 var leftoverstart int64 var isOverlap, isLeftoverA, isLeftoverB bool var done bool pass := 0 toploop: for acuri < numa && bcuri < numb { pass++ isOverlap, isLeftoverA, isLeftoverB, leftoverstart, intersection = intersectWithLeftover16(astart, int64(a.iv[acuri].last()), bstart, int64(b.iv[bcuri].last())) if !isOverlap { switch { case astart < bstart: acuri, done = a.findNextIntervalThatIntersectsStartingFrom(acuri+1, bstart) if done { break toploop } astart = int64(a.iv[acuri].start) case astart > bstart: bcuri, done = b.findNextIntervalThatIntersectsStartingFrom(bcuri+1, astart) if done { break toploop } bstart = int64(b.iv[bcuri].start) //default: // panic("impossible that astart == bstart, since !isOverlap") } } else { // isOverlap answer += int64(intersection.last()) - int64(intersection.start) + 1 switch { case isLeftoverA: // note that we change astart without advancing acuri, // since we need to capture any 2ndary intersections with a.iv[acuri] astart = leftoverstart bcuri++ if bcuri >= numb { break toploop } bstart = int64(b.iv[bcuri].start) case isLeftoverB: // note that we change bstart without advancing bcuri, // since we need to capture any 2ndary intersections with b.iv[bcuri] bstart = leftoverstart acuri++ if acuri >= numa { break toploop } astart = int64(a.iv[acuri].start) default: // neither had leftover, both completely consumed // optionally, assert for sanity: //if a.iv[acuri].endx != b.iv[bcuri].endx { // panic("huh? should only be possible that endx agree now!") //} // advance to next a interval acuri++ if acuri >= numa { break toploop } astart = int64(a.iv[acuri].start) // advance to next b interval bcuri++ if bcuri >= numb { break toploop } bstart = int64(b.iv[bcuri].start) } } } // end for toploop return answer } // get returns true iff key is in the container. func (rc *runContainer16) contains(key uint16) bool { _, in, _ := rc.search(int64(key), nil) return in } // numIntervals returns the count of intervals in the container. func (rc *runContainer16) numIntervals() int { return len(rc.iv) } // searchOptions allows us to accelerate search with // prior knowledge of (mostly lower) bounds. This is used by Union // and Intersect. type searchOptions struct { // start here instead of at 0 startIndex int64 // upper bound instead of len(rc.iv); // endxIndex == 0 means ignore the bound and use // endxIndex == n ==len(rc.iv) which is also // naturally the default for search() // when opt = nil. endxIndex int64 } // search returns alreadyPresent to indicate if the // key is already in one of our interval16s. // // If key is alreadyPresent, then whichInterval16 tells // you where. // // If key is not already present, then whichInterval16 is // set as follows: // // a) whichInterval16 == len(rc.iv)-1 if key is beyond our // last interval16 in rc.iv; // // b) whichInterval16 == -1 if key is before our first // interval16 in rc.iv; // // c) whichInterval16 is set to the minimum index of rc.iv // which comes strictly before the key; // so rc.iv[whichInterval16].last < key, // and if whichInterval16+1 exists, then key < rc.iv[whichInterval16+1].start // (Note that whichInterval16+1 won't exist when // whichInterval16 is the last interval.) // // runContainer16.search always returns whichInterval16 < len(rc.iv). // // If not nil, opts can be used to further restrict // the search space. // func (rc *runContainer16) search(key int64, opts *searchOptions) (whichInterval16 int64, alreadyPresent bool, numCompares int) { n := int64(len(rc.iv)) if n == 0 { return -1, false, 0 } startIndex := int64(0) endxIndex := n if opts != nil { startIndex = opts.startIndex // let endxIndex == 0 mean no effect if opts.endxIndex > 0 { endxIndex = opts.endxIndex } } // sort.Search returns the smallest index i // in [0, n) at which f(i) is true, assuming that on the range [0, n), // f(i) == true implies f(i+1) == true. // If there is no such index, Search returns n. // For correctness, this began as verbatim snippet from // sort.Search in the Go standard lib. // We inline our comparison function for speed, and // annotate with numCompares // to observe and test that extra bounds are utilized. i, j := startIndex, endxIndex for i < j { h := i + (j-i)/2 // avoid overflow when computing h as the bisector // i <= h < j numCompares++ if !(key < int64(rc.iv[h].start)) { i = h + 1 } else { j = h } } below := i // end std lib snippet. // The above is a simple in-lining and annotation of: /* below := sort.Search(n, func(i int) bool { return key < rc.iv[i].start }) */ whichInterval16 = below - 1 if below == n { // all falses => key is >= start of all interval16s // ... so does it belong to the last interval16? if key < int64(rc.iv[n-1].last())+1 { // yes, it belongs to the last interval16 alreadyPresent = true return } // no, it is beyond the last interval16. // leave alreadyPreset = false return } // INVAR: key is below rc.iv[below] if below == 0 { // key is before the first first interval16. // leave alreadyPresent = false return } // INVAR: key is >= rc.iv[below-1].start and // key is < rc.iv[below].start // is key in below-1 interval16? if key >= int64(rc.iv[below-1].start) && key < int64(rc.iv[below-1].last())+1 { // yes, it is. key is in below-1 interval16. alreadyPresent = true return } // INVAR: key >= rc.iv[below-1].endx && key < rc.iv[below].start // leave alreadyPresent = false return } // cardinality returns the count of the integers stored in the // runContainer16. func (rc *runContainer16) cardinality() int64 { if len(rc.iv) == 0 { rc.card = 0 return 0 } if rc.card > 0 { return rc.card // already cached } // have to compute it var n int64 for _, p := range rc.iv { n += p.runlen() } rc.card = n // cache it return n } // AsSlice decompresses the contents into a []uint16 slice. func (rc *runContainer16) AsSlice() []uint16 { s := make([]uint16, rc.cardinality()) j := 0 for _, p := range rc.iv { for i := p.start; i <= p.last(); i++ { s[j] = i j++ } } return s } // newRunContainer16 creates an empty run container. func newRunContainer16() *runContainer16 { return &runContainer16{} } // newRunContainer16CopyIv creates a run container, initializing // with a copy of the supplied iv slice. // func newRunContainer16CopyIv(iv []interval16) *runContainer16 { rc := &runContainer16{ iv: make([]interval16, len(iv)), } copy(rc.iv, iv) return rc } func (rc *runContainer16) Clone() *runContainer16 { rc2 := newRunContainer16CopyIv(rc.iv) return rc2 } // newRunContainer16TakeOwnership returns a new runContainer16 // backed by the provided iv slice, which we will // assume exclusive control over from now on. // func newRunContainer16TakeOwnership(iv []interval16) *runContainer16 { rc := &runContainer16{ iv: iv, } return rc } const baseRc16Size = int(unsafe.Sizeof(runContainer16{})) const perIntervalRc16Size = int(unsafe.Sizeof(interval16{})) const baseDiskRc16Size = int(unsafe.Sizeof(uint16(0))) // see also runContainer16SerializedSizeInBytes(numRuns int) int // getSizeInBytes returns the number of bytes of memory // required by this runContainer16. func (rc *runContainer16) getSizeInBytes() int { return perIntervalRc16Size*len(rc.iv) + baseRc16Size } // runContainer16SerializedSizeInBytes returns the number of bytes of disk // required to hold numRuns in a runContainer16. func runContainer16SerializedSizeInBytes(numRuns int) int { return perIntervalRc16Size*numRuns + baseDiskRc16Size } // Add adds a single value k to the set. func (rc *runContainer16) Add(k uint16) (wasNew bool) { // TODO comment from runContainer16.java: // it might be better and simpler to do return // toBitmapOrArrayContainer(getCardinality()).add(k) // but note that some unit tests use this method to build up test // runcontainers without calling runOptimize k64 := int64(k) index, present, _ := rc.search(k64, nil) if present { return // already there } wasNew = true // increment card if it is cached already if rc.card > 0 { rc.card++ } n := int64(len(rc.iv)) if index == -1 { // we may need to extend the first run if n > 0 { if rc.iv[0].start == k+1 { rc.iv[0].start = k rc.iv[0].length++ return } } // nope, k stands alone, starting the new first interval16. rc.iv = append([]interval16{newInterval16Range(k, k)}, rc.iv...) return } // are we off the end? handle both index == n and index == n-1: if index >= n-1 { if int64(rc.iv[n-1].last())+1 == k64 { rc.iv[n-1].length++ return } rc.iv = append(rc.iv, newInterval16Range(k, k)) return } // INVAR: index and index+1 both exist, and k goes between them. // // Now: add k into the middle, // possibly fusing with index or index+1 interval16 // and possibly resulting in fusing of two interval16s // that had a one integer gap. left := index right := index + 1 // are we fusing left and right by adding k? if int64(rc.iv[left].last())+1 == k64 && int64(rc.iv[right].start) == k64+1 { // fuse into left rc.iv[left].length = rc.iv[right].last() - rc.iv[left].start // remove redundant right rc.iv = append(rc.iv[:left+1], rc.iv[right+1:]...) return } // are we an addition to left? if int64(rc.iv[left].last())+1 == k64 { // yes rc.iv[left].length++ return } // are we an addition to right? if int64(rc.iv[right].start) == k64+1 { // yes rc.iv[right].start = k rc.iv[right].length++ return } // k makes a standalone new interval16, inserted in the middle tail := append([]interval16{newInterval16Range(k, k)}, rc.iv[right:]...) rc.iv = append(rc.iv[:left+1], tail...) return } //msgp:ignore runIterator // runIterator16 advice: you must call hasNext() // before calling next()/peekNext() to insure there are contents. type runIterator16 struct { rc *runContainer16 curIndex int64 curPosInIndex uint16 } // newRunIterator16 returns a new empty run container. func (rc *runContainer16) newRunIterator16() *runIterator16 { return &runIterator16{rc: rc, curIndex: 0, curPosInIndex: 0} } // hasNext returns false if calling next will panic. It // returns true when there is at least one more value // available in the iteration sequence. func (ri *runIterator16) hasNext() bool { return int64(len(ri.rc.iv)) > ri.curIndex+1 || (int64(len(ri.rc.iv)) == ri.curIndex+1 && ri.rc.iv[ri.curIndex].length >= ri.curPosInIndex) } // next returns the next value in the iteration sequence. func (ri *runIterator16) next() uint16 { next := ri.rc.iv[ri.curIndex].start + ri.curPosInIndex if ri.curPosInIndex == ri.rc.iv[ri.curIndex].length { ri.curPosInIndex = 0 ri.curIndex++ } else { ri.curPosInIndex++ } return next } // peekNext returns the next value in the iteration sequence without advancing the iterator func (ri *runIterator16) peekNext() uint16 { return ri.rc.iv[ri.curIndex].start + ri.curPosInIndex } // advanceIfNeeded advances as long as the next value is smaller than minval func (ri *runIterator16) advanceIfNeeded(minval uint16) { if !ri.hasNext() || ri.peekNext() >= minval { return } opt := &searchOptions{ startIndex: ri.curIndex, endxIndex: int64(len(ri.rc.iv)), } // interval cannot be -1 because of minval > peekNext interval, isPresent, _ := ri.rc.search(int64(minval), opt) // if the minval is present, set the curPosIndex at the right position if isPresent { ri.curIndex = interval ri.curPosInIndex = minval - ri.rc.iv[ri.curIndex].start } else { // otherwise interval is set to to the minimum index of rc.iv // which comes strictly before the key, that's why we set the next interval ri.curIndex = interval + 1 ri.curPosInIndex = 0 } } // runReverseIterator16 advice: you must call hasNext() // before calling next() to insure there are contents. type runReverseIterator16 struct { rc *runContainer16 curIndex int64 // index into rc.iv curPosInIndex uint16 // offset in rc.iv[curIndex] } // newRunReverseIterator16 returns a new empty run iterator. func (rc *runContainer16) newRunReverseIterator16() *runReverseIterator16 { index := int64(len(rc.iv)) - 1 pos := uint16(0) if index >= 0 { pos = rc.iv[index].length } return &runReverseIterator16{ rc: rc, curIndex: index, curPosInIndex: pos, } } // hasNext returns false if calling next will panic. It // returns true when there is at least one more value // available in the iteration sequence. func (ri *runReverseIterator16) hasNext() bool { return ri.curIndex > 0 || ri.curIndex == 0 && ri.curPosInIndex >= 0 } // next returns the next value in the iteration sequence. func (ri *runReverseIterator16) next() uint16 { next := ri.rc.iv[ri.curIndex].start + ri.curPosInIndex if ri.curPosInIndex > 0 { ri.curPosInIndex-- } else { ri.curIndex-- if ri.curIndex >= 0 { ri.curPosInIndex = ri.rc.iv[ri.curIndex].length } } return next } func (rc *runContainer16) newManyRunIterator16() *runIterator16 { return rc.newRunIterator16() } // hs are the high bits to include to avoid needing to reiterate over the buffer in NextMany func (ri *runIterator16) nextMany(hs uint32, buf []uint32) int { n := 0 if !ri.hasNext() { return n } // start and end are inclusive for n < len(buf) { moreVals := 0 if ri.rc.iv[ri.curIndex].length >= ri.curPosInIndex { // add as many as you can from this seq moreVals = minOfInt(int(ri.rc.iv[ri.curIndex].length-ri.curPosInIndex)+1, len(buf)-n) base := uint32(ri.rc.iv[ri.curIndex].start+ri.curPosInIndex) | hs // allows BCE buf2 := buf[n : n+moreVals] for i := range buf2 { buf2[i] = base + uint32(i) } // update values n += moreVals } if moreVals+int(ri.curPosInIndex) > int(ri.rc.iv[ri.curIndex].length) { ri.curPosInIndex = 0 ri.curIndex++ if ri.curIndex == int64(len(ri.rc.iv)) { break } } else { ri.curPosInIndex += uint16(moreVals) //moreVals always fits in uint16 } } return n } // remove removes key from the container. func (rc *runContainer16) removeKey(key uint16) (wasPresent bool) { var index int64 index, wasPresent, _ = rc.search(int64(key), nil) if !wasPresent { return // already removed, nothing to do. } pos := key - rc.iv[index].start rc.deleteAt(&index, &pos) return } // internal helper functions func (rc *runContainer16) deleteAt(curIndex *int64, curPosInIndex *uint16) { rc.card-- ci := *curIndex pos := *curPosInIndex // are we first, last, or in the middle of our interval16? switch { case pos == 0: if int64(rc.iv[ci].length) == 0 { // our interval disappears rc.iv = append(rc.iv[:ci], rc.iv[ci+1:]...) // curIndex stays the same, since the delete did // the advance for us. *curPosInIndex = 0 } else { rc.iv[ci].start++ // no longer overflowable rc.iv[ci].length-- } case pos == rc.iv[ci].length: // length rc.iv[ci].length-- // our interval16 cannot disappear, else we would have been pos == 0, case first above. *curPosInIndex-- // if we leave *curIndex alone, then Next() will work properly even after the delete. default: //middle // split into two, adding an interval16 new0 := newInterval16Range(rc.iv[ci].start, rc.iv[ci].start+*curPosInIndex-1) new1start := int64(rc.iv[ci].start+*curPosInIndex) + 1 if new1start > int64(MaxUint16) { panic("overflow?!?!") } new1 := newInterval16Range(uint16(new1start), rc.iv[ci].last()) tail := append([]interval16{new0, new1}, rc.iv[ci+1:]...) rc.iv = append(rc.iv[:ci], tail...) // update curIndex and curPosInIndex *curIndex++ *curPosInIndex = 0 } } func have4Overlap16(astart, alast, bstart, blast int64) bool { if alast+1 <= bstart { return false } return blast+1 > astart } func intersectWithLeftover16(astart, alast, bstart, blast int64) (isOverlap, isLeftoverA, isLeftoverB bool, leftoverstart int64, intersection interval16) { if !have4Overlap16(astart, alast, bstart, blast) { return } isOverlap = true // do the intersection: if bstart > astart { intersection.start = uint16(bstart) } else { intersection.start = uint16(astart) } switch { case blast < alast: isLeftoverA = true leftoverstart = blast + 1 intersection.length = uint16(blast) - intersection.start case alast < blast: isLeftoverB = true leftoverstart = alast + 1 intersection.length = uint16(alast) - intersection.start default: // alast == blast intersection.length = uint16(alast) - intersection.start } return } func (rc *runContainer16) findNextIntervalThatIntersectsStartingFrom(startIndex int64, key int64) (index int64, done bool) { rc.myOpts.startIndex = startIndex rc.myOpts.endxIndex = 0 w, _, _ := rc.search(key, &rc.myOpts) // rc.search always returns w < len(rc.iv) if w < startIndex { // not found and comes before lower bound startIndex, // so just use the lower bound. if startIndex == int64(len(rc.iv)) { // also this bump up means that we are done return startIndex, true } return startIndex, false } return w, false } func sliceToString16(m []interval16) string { s := "" for i := range m { s += fmt.Sprintf("%v: %s, ", i, m[i]) } return s } // selectInt16 returns the j-th value in the container. // We panic of j is out of bounds. func (rc *runContainer16) selectInt16(j uint16) int { n := rc.cardinality() if int64(j) > n { panic(fmt.Sprintf("Cannot select %v since Cardinality is %v", j, n)) } var offset int64 for k := range rc.iv { nextOffset := offset + rc.iv[k].runlen() if nextOffset > int64(j) { return int(int64(rc.iv[k].start) + (int64(j) - offset)) } offset = nextOffset } panic(fmt.Sprintf("Cannot select %v since Cardinality is %v", j, n)) } // helper for invert func (rc *runContainer16) invertlastInterval(origin uint16, lastIdx int) []interval16 { cur := rc.iv[lastIdx] if cur.last() == MaxUint16 { if cur.start == origin { return nil // empty container } return []interval16{newInterval16Range(origin, cur.start-1)} } if cur.start == origin { return []interval16{newInterval16Range(cur.last()+1, MaxUint16)} } // invert splits return []interval16{ newInterval16Range(origin, cur.start-1), newInterval16Range(cur.last()+1, MaxUint16), } } // invert returns a new container (not inplace), that is // the inversion of rc. For each bit b in rc, the // returned value has !b func (rc *runContainer16) invert() *runContainer16 { ni := len(rc.iv) var m []interval16 switch ni { case 0: return &runContainer16{iv: []interval16{newInterval16Range(0, MaxUint16)}} case 1: return &runContainer16{iv: rc.invertlastInterval(0, 0)} } var invstart int64 ult := ni - 1 for i, cur := range rc.iv { if i == ult { // invertlastInteval will add both intervals (b) and (c) in // diagram below. m = append(m, rc.invertlastInterval(uint16(invstart), i)...) break } // INVAR: i and cur are not the last interval, there is a next at i+1 // // ........[cur.start, cur.last] ...... [next.start, next.last].... // ^ ^ ^ // (a) (b) (c) // // Now: we add interval (a); but if (a) is empty, for cur.start==0, we skip it. if cur.start > 0 { m = append(m, newInterval16Range(uint16(invstart), cur.start-1)) } invstart = int64(cur.last() + 1) } return &runContainer16{iv: m} } func (iv interval16) equal(b interval16) bool { return iv.start == b.start && iv.length == b.length } func (iv interval16) isSuperSetOf(b interval16) bool { return iv.start <= b.start && b.last() <= iv.last() } func (iv interval16) subtractInterval(del interval16) (left []interval16, delcount int64) { isect, isEmpty := intersectInterval16s(iv, del) if isEmpty { return nil, 0 } if del.isSuperSetOf(iv) { return nil, iv.runlen() } switch { case isect.start > iv.start && isect.last() < iv.last(): new0 := newInterval16Range(iv.start, isect.start-1) new1 := newInterval16Range(isect.last()+1, iv.last()) return []interval16{new0, new1}, isect.runlen() case isect.start == iv.start: return []interval16{newInterval16Range(isect.last()+1, iv.last())}, isect.runlen() default: return []interval16{newInterval16Range(iv.start, isect.start-1)}, isect.runlen() } } func (rc *runContainer16) isubtract(del interval16) { origiv := make([]interval16, len(rc.iv)) copy(origiv, rc.iv) n := int64(len(rc.iv)) if n == 0 { return // already done. } _, isEmpty := intersectInterval16s(newInterval16Range(rc.iv[0].start, rc.iv[n-1].last()), del) if isEmpty { return // done } // INVAR there is some intersection between rc and del istart, startAlready, _ := rc.search(int64(del.start), nil) ilast, lastAlready, _ := rc.search(int64(del.last()), nil) rc.card = -1 if istart == -1 { if ilast == n-1 && !lastAlready { rc.iv = nil return } } // some intervals will remain switch { case startAlready && lastAlready: res0, _ := rc.iv[istart].subtractInterval(del) // would overwrite values in iv b/c res0 can have len 2. so // write to origiv instead. lost := 1 + ilast - istart changeSize := int64(len(res0)) - lost newSize := int64(len(rc.iv)) + changeSize // rc.iv = append(pre, caboose...) // return if ilast != istart { res1, _ := rc.iv[ilast].subtractInterval(del) res0 = append(res0, res1...) changeSize = int64(len(res0)) - lost newSize = int64(len(rc.iv)) + changeSize } switch { case changeSize < 0: // shrink copy(rc.iv[istart+int64(len(res0)):], rc.iv[ilast+1:]) copy(rc.iv[istart:istart+int64(len(res0))], res0) rc.iv = rc.iv[:newSize] return case changeSize == 0: // stay the same copy(rc.iv[istart:istart+int64(len(res0))], res0) return default: // changeSize > 0 is only possible when ilast == istart. // Hence we now know: changeSize == 1 and len(res0) == 2 rc.iv = append(rc.iv, interval16{}) // len(rc.iv) is correct now, no need to rc.iv = rc.iv[:newSize] // copy the tail into place copy(rc.iv[ilast+2:], rc.iv[ilast+1:]) // copy the new item(s) into place copy(rc.iv[istart:istart+2], res0) return } case !startAlready && !lastAlready: // we get to discard whole intervals // from the search() definition: // if del.start is not present, then istart is // set as follows: // // a) istart == n-1 if del.start is beyond our // last interval16 in rc.iv; // // b) istart == -1 if del.start is before our first // interval16 in rc.iv; // // c) istart is set to the minimum index of rc.iv // which comes strictly before the del.start; // so del.start > rc.iv[istart].last, // and if istart+1 exists, then del.start < rc.iv[istart+1].startx // if del.last is not present, then ilast is // set as follows: // // a) ilast == n-1 if del.last is beyond our // last interval16 in rc.iv; // // b) ilast == -1 if del.last is before our first // interval16 in rc.iv; // // c) ilast is set to the minimum index of rc.iv // which comes strictly before the del.last; // so del.last > rc.iv[ilast].last, // and if ilast+1 exists, then del.last < rc.iv[ilast+1].start // INVAR: istart >= 0 pre := rc.iv[:istart+1] if ilast == n-1 { rc.iv = pre return } // INVAR: ilast < n-1 lost := ilast - istart changeSize := -lost newSize := int64(len(rc.iv)) + changeSize if changeSize != 0 { copy(rc.iv[ilast+1+changeSize:], rc.iv[ilast+1:]) } rc.iv = rc.iv[:newSize] return case startAlready && !lastAlready: // we can only shrink or stay the same size // i.e. we either eliminate the whole interval, // or just cut off the right side. res0, _ := rc.iv[istart].subtractInterval(del) if len(res0) > 0 { // len(res) must be 1 rc.iv[istart] = res0[0] } lost := 1 + (ilast - istart) changeSize := int64(len(res0)) - lost newSize := int64(len(rc.iv)) + changeSize if changeSize != 0 { copy(rc.iv[ilast+1+changeSize:], rc.iv[ilast+1:]) } rc.iv = rc.iv[:newSize] return case !startAlready && lastAlready: // we can only shrink or stay the same size res1, _ := rc.iv[ilast].subtractInterval(del) lost := ilast - istart changeSize := int64(len(res1)) - lost newSize := int64(len(rc.iv)) + changeSize if changeSize != 0 { // move the tail first to make room for res1 copy(rc.iv[ilast+1+changeSize:], rc.iv[ilast+1:]) } copy(rc.iv[istart+1:], res1) rc.iv = rc.iv[:newSize] return } } // compute rc minus b, and return the result as a new value (not inplace). // port of run_container_andnot from CRoaring... // https://github.com/RoaringBitmap/CRoaring/blob/master/src/containers/run.c#L435-L496 func (rc *runContainer16) AndNotRunContainer16(b *runContainer16) *runContainer16 { if len(b.iv) == 0 || len(rc.iv) == 0 { return rc } dst := newRunContainer16() apos := 0 bpos := 0 a := rc astart := a.iv[apos].start alast := a.iv[apos].last() bstart := b.iv[bpos].start blast := b.iv[bpos].last() alen := len(a.iv) blen := len(b.iv) for apos < alen && bpos < blen { switch { case alast < bstart: // output the first run dst.iv = append(dst.iv, newInterval16Range(astart, alast)) apos++ if apos < alen { astart = a.iv[apos].start alast = a.iv[apos].last() } case blast < astart: // exit the second run bpos++ if bpos < blen { bstart = b.iv[bpos].start blast = b.iv[bpos].last() } default: // a: [ ] // b: [ ] // alast >= bstart // blast >= astart if astart < bstart { dst.iv = append(dst.iv, newInterval16Range(astart, bstart-1)) } if alast > blast { astart = blast + 1 } else { apos++ if apos < alen { astart = a.iv[apos].start alast = a.iv[apos].last() } } } } if apos < alen { dst.iv = append(dst.iv, newInterval16Range(astart, alast)) apos++ if apos < alen { dst.iv = append(dst.iv, a.iv[apos:]...) } } return dst } func (rc *runContainer16) numberOfRuns() (nr int) { return len(rc.iv) } func (rc *runContainer16) containerType() contype { return run16Contype } func (rc *runContainer16) equals16(srb *runContainer16) bool { // Check if the containers are the same object. if rc == srb { return true } if len(srb.iv) != len(rc.iv) { return false } for i, v := range rc.iv { if v != srb.iv[i] { return false } } return true } // compile time verify we meet interface requirements var _ container = &runContainer16{} func (rc *runContainer16) clone() container { return newRunContainer16CopyIv(rc.iv) } func (rc *runContainer16) minimum() uint16 { return rc.iv[0].start // assume not empty } func (rc *runContainer16) maximum() uint16 { return rc.iv[len(rc.iv)-1].last() // assume not empty } func (rc *runContainer16) isFull() bool { return (len(rc.iv) == 1) && ((rc.iv[0].start == 0) && (rc.iv[0].last() == MaxUint16)) } func (rc *runContainer16) and(a container) container { if rc.isFull() { return a.clone() } switch c := a.(type) { case *runContainer16: return rc.intersect(c) case *arrayContainer: return rc.andArray(c) case *bitmapContainer: return rc.andBitmapContainer(c) } panic("unsupported container type") } func (rc *runContainer16) andCardinality(a container) int { switch c := a.(type) { case *runContainer16: return int(rc.intersectCardinality(c)) case *arrayContainer: return rc.andArrayCardinality(c) case *bitmapContainer: return rc.andBitmapContainerCardinality(c) } panic("unsupported container type") } // andBitmapContainer finds the intersection of rc and b. func (rc *runContainer16) andBitmapContainer(bc *bitmapContainer) container { bc2 := newBitmapContainerFromRun(rc) return bc2.andBitmap(bc) } func (rc *runContainer16) andArrayCardinality(ac *arrayContainer) int { pos := 0 answer := 0 maxpos := ac.getCardinality() if maxpos == 0 { return 0 // won't happen in actual code } v := ac.content[pos] mainloop: for _, p := range rc.iv { for v < p.start { pos++ if pos == maxpos { break mainloop } v = ac.content[pos] } for v <= p.last() { answer++ pos++ if pos == maxpos { break mainloop } v = ac.content[pos] } } return answer } func (rc *runContainer16) iand(a container) container { if rc.isFull() { return a.clone() } switch c := a.(type) { case *runContainer16: return rc.inplaceIntersect(c) case *arrayContainer: return rc.andArray(c) case *bitmapContainer: return rc.iandBitmapContainer(c) } panic("unsupported container type") } func (rc *runContainer16) inplaceIntersect(rc2 *runContainer16) container { // TODO: optimize by doing less allocation, possibly? // sect will be new sect := rc.intersect(rc2) *rc = *sect return rc } func (rc *runContainer16) iandBitmapContainer(bc *bitmapContainer) container { isect := rc.andBitmapContainer(bc) *rc = *newRunContainer16FromContainer(isect) return rc } func (rc *runContainer16) andArray(ac *arrayContainer) container { if len(rc.iv) == 0 { return newArrayContainer() } acCardinality := ac.getCardinality() c := newArrayContainerCapacity(acCardinality) for rlePos, arrayPos := 0, 0; arrayPos < acCardinality; { iv := rc.iv[rlePos] arrayVal := ac.content[arrayPos] for iv.last() < arrayVal { rlePos++ if rlePos == len(rc.iv) { return c } iv = rc.iv[rlePos] } if iv.start > arrayVal { arrayPos = advanceUntil(ac.content, arrayPos, len(ac.content), iv.start) } else { c.content = append(c.content, arrayVal) arrayPos++ } } return c } func (rc *runContainer16) andNot(a container) container { switch c := a.(type) { case *arrayContainer: return rc.andNotArray(c) case *bitmapContainer: return rc.andNotBitmap(c) case *runContainer16: return rc.andNotRunContainer16(c) } panic("unsupported container type") } func (rc *runContainer16) fillLeastSignificant16bits(x []uint32, i int, mask uint32) { k := 0 var val int64 for _, p := range rc.iv { n := p.runlen() for j := int64(0); j < n; j++ { val = int64(p.start) + j x[k+i] = uint32(val) | mask k++ } } } func (rc *runContainer16) getShortIterator() shortPeekable { return rc.newRunIterator16() } func (rc *runContainer16) getReverseIterator() shortIterable { return rc.newRunReverseIterator16() } func (rc *runContainer16) getManyIterator() manyIterable { return rc.newManyRunIterator16() } // add the values in the range [firstOfRange, endx). endx // is still abe to express 2^16 because it is an int not an uint16. func (rc *runContainer16) iaddRange(firstOfRange, endx int) container { if firstOfRange >= endx { panic(fmt.Sprintf("invalid %v = endx >= firstOfRange", endx)) } addme := newRunContainer16TakeOwnership([]interval16{ { start: uint16(firstOfRange), length: uint16(endx - 1 - firstOfRange), }, }) *rc = *rc.union(addme) return rc } // remove the values in the range [firstOfRange,endx) func (rc *runContainer16) iremoveRange(firstOfRange, endx int) container { if firstOfRange >= endx { panic(fmt.Sprintf("request to iremove empty set [%v, %v),"+ " nothing to do.", firstOfRange, endx)) //return rc } x := newInterval16Range(uint16(firstOfRange), uint16(endx-1)) rc.isubtract(x) return rc } // not flip the values in the range [firstOfRange,endx) func (rc *runContainer16) not(firstOfRange, endx int) container { if firstOfRange >= endx { panic(fmt.Sprintf("invalid %v = endx >= firstOfRange = %v", endx, firstOfRange)) } return rc.Not(firstOfRange, endx) } // Not flips the values in the range [firstOfRange,endx). // This is not inplace. Only the returned value has the flipped bits. // // Currently implemented as (!A intersect B) union (A minus B), // where A is rc, and B is the supplied [firstOfRange, endx) interval. // // TODO(time optimization): convert this to a single pass // algorithm by copying AndNotRunContainer16() and modifying it. // Current routine is correct but // makes 2 more passes through the arrays than should be // strictly necessary. Measure both ways though--this may not matter. // func (rc *runContainer16) Not(firstOfRange, endx int) *runContainer16 { if firstOfRange >= endx { panic(fmt.Sprintf("invalid %v = endx >= firstOfRange == %v", endx, firstOfRange)) } if firstOfRange >= endx { return rc.Clone() } a := rc // algo: // (!A intersect B) union (A minus B) nota := a.invert() bs := []interval16{newInterval16Range(uint16(firstOfRange), uint16(endx-1))} b := newRunContainer16TakeOwnership(bs) notAintersectB := nota.intersect(b) aMinusB := a.AndNotRunContainer16(b) rc2 := notAintersectB.union(aMinusB) return rc2 } // equals is now logical equals; it does not require the // same underlying container type. func (rc *runContainer16) equals(o container) bool { srb, ok := o.(*runContainer16) if !ok { // maybe value instead of pointer val, valok := o.(*runContainer16) if valok { srb = val ok = true } } if ok { // Check if the containers are the same object. if rc == srb { return true } if len(srb.iv) != len(rc.iv) { return false } for i, v := range rc.iv { if v != srb.iv[i] { return false } } return true } // use generic comparison if o.getCardinality() != rc.getCardinality() { return false } rit := rc.getShortIterator() bit := o.getShortIterator() //k := 0 for rit.hasNext() { if bit.next() != rit.next() { return false } //k++ } return true } func (rc *runContainer16) iaddReturnMinimized(x uint16) container { rc.Add(x) return rc } func (rc *runContainer16) iadd(x uint16) (wasNew bool) { return rc.Add(x) } func (rc *runContainer16) iremoveReturnMinimized(x uint16) container { rc.removeKey(x) return rc } func (rc *runContainer16) iremove(x uint16) bool { return rc.removeKey(x) } func (rc *runContainer16) or(a container) container { if rc.isFull() { return rc.clone() } switch c := a.(type) { case *runContainer16: return rc.union(c) case *arrayContainer: return rc.orArray(c) case *bitmapContainer: return rc.orBitmapContainer(c) } panic("unsupported container type") } func (rc *runContainer16) orCardinality(a container) int { switch c := a.(type) { case *runContainer16: return int(rc.unionCardinality(c)) case *arrayContainer: return rc.orArrayCardinality(c) case *bitmapContainer: return rc.orBitmapContainerCardinality(c) } panic("unsupported container type") } // orBitmapContainer finds the union of rc and bc. func (rc *runContainer16) orBitmapContainer(bc *bitmapContainer) container { bc2 := newBitmapContainerFromRun(rc) return bc2.iorBitmap(bc) } func (rc *runContainer16) andBitmapContainerCardinality(bc *bitmapContainer) int { answer := 0 for i := range rc.iv { answer += bc.getCardinalityInRange(uint(rc.iv[i].start), uint(rc.iv[i].last())+1) } //bc.computeCardinality() return answer } func (rc *runContainer16) orBitmapContainerCardinality(bc *bitmapContainer) int { return rc.getCardinality() + bc.getCardinality() - rc.andBitmapContainerCardinality(bc) } // orArray finds the union of rc and ac. func (rc *runContainer16) orArray(ac *arrayContainer) container { bc1 := newBitmapContainerFromRun(rc) bc2 := ac.toBitmapContainer() return bc1.orBitmap(bc2) } // orArray finds the union of rc and ac. func (rc *runContainer16) orArrayCardinality(ac *arrayContainer) int { return ac.getCardinality() + rc.getCardinality() - rc.andArrayCardinality(ac) } func (rc *runContainer16) ior(a container) container { if rc.isFull() { return rc } switch c := a.(type) { case *runContainer16: return rc.inplaceUnion(c) case *arrayContainer: return rc.iorArray(c) case *bitmapContainer: return rc.iorBitmapContainer(c) } panic("unsupported container type") } func (rc *runContainer16) inplaceUnion(rc2 *runContainer16) container { for _, p := range rc2.iv { last := int64(p.last()) for i := int64(p.start); i <= last; i++ { rc.Add(uint16(i)) } } return rc } func (rc *runContainer16) iorBitmapContainer(bc *bitmapContainer) container { it := bc.getShortIterator() for it.hasNext() { rc.Add(it.next()) } return rc } func (rc *runContainer16) iorArray(ac *arrayContainer) container { it := ac.getShortIterator() for it.hasNext() { rc.Add(it.next()) } return rc } // lazyIOR is described (not yet implemented) in // this nice note from @lemire on // https://github.com/RoaringBitmap/roaring/pull/70#issuecomment-263613737 // // Description of lazyOR and lazyIOR from @lemire: // // Lazy functions are optional and can be simply // wrapper around non-lazy functions. // // The idea of "laziness" is as follows. It is // inspired by the concept of lazy evaluation // you might be familiar with (functional programming // and all that). So a roaring bitmap is // such that all its containers are, in some // sense, chosen to use as little memory as // possible. This is nice. Also, all bitsets // are "cardinality aware" so that you can do // fast rank/select queries, or query the // cardinality of the whole bitmap... very fast, // without latency. // // However, imagine that you are aggregating 100 // bitmaps together. So you OR the first two, then OR // that with the third one and so forth. Clearly, // intermediate bitmaps don't need to be as // compressed as possible, right? They can be // in a "dirty state". You only need the end // result to be in a nice state... which you // can achieve by calling repairAfterLazy at the end. // // The Java/C code does something special for // the in-place lazy OR runs. The idea is that // instead of taking two run containers and // generating a new one, we actually try to // do the computation in-place through a // technique invented by @gssiyankai (pinging him!). // What you do is you check whether the host // run container has lots of extra capacity. // If it does, you move its data at the end of // the backing array, and then you write // the answer at the beginning. What this // trick does is minimize memory allocations. // func (rc *runContainer16) lazyIOR(a container) container { // not lazy at the moment return rc.ior(a) } // lazyOR is described above in lazyIOR. func (rc *runContainer16) lazyOR(a container) container { // not lazy at the moment return rc.or(a) } func (rc *runContainer16) intersects(a container) bool { // TODO: optimize by doing inplace/less allocation, possibly? isect := rc.and(a) return isect.getCardinality() > 0 } func (rc *runContainer16) xor(a container) container { switch c := a.(type) { case *arrayContainer: return rc.xorArray(c) case *bitmapContainer: return rc.xorBitmap(c) case *runContainer16: return rc.xorRunContainer16(c) } panic("unsupported container type") } func (rc *runContainer16) iandNot(a container) container { switch c := a.(type) { case *arrayContainer: return rc.iandNotArray(c) case *bitmapContainer: return rc.iandNotBitmap(c) case *runContainer16: return rc.iandNotRunContainer16(c) } panic("unsupported container type") } // flip the values in the range [firstOfRange,endx) func (rc *runContainer16) inot(firstOfRange, endx int) container { if firstOfRange >= endx { panic(fmt.Sprintf("invalid %v = endx >= firstOfRange = %v", endx, firstOfRange)) } // TODO: minimize copies, do it all inplace; not() makes a copy. rc = rc.Not(firstOfRange, endx) return rc } func (rc *runContainer16) getCardinality() int { return int(rc.cardinality()) } func (rc *runContainer16) rank(x uint16) int { n := int64(len(rc.iv)) xx := int64(x) w, already, _ := rc.search(xx, nil) if w < 0 { return 0 } if !already && w == n-1 { return rc.getCardinality() } var rnk int64 if !already { for i := int64(0); i <= w; i++ { rnk += rc.iv[i].runlen() } return int(rnk) } for i := int64(0); i < w; i++ { rnk += rc.iv[i].runlen() } rnk += int64(x-rc.iv[w].start) + 1 return int(rnk) } func (rc *runContainer16) selectInt(x uint16) int { return rc.selectInt16(x) } func (rc *runContainer16) andNotRunContainer16(b *runContainer16) container { return rc.AndNotRunContainer16(b) } func (rc *runContainer16) andNotArray(ac *arrayContainer) container { rcb := rc.toBitmapContainer() acb := ac.toBitmapContainer() return rcb.andNotBitmap(acb) } func (rc *runContainer16) andNotBitmap(bc *bitmapContainer) container { rcb := rc.toBitmapContainer() return rcb.andNotBitmap(bc) } func (rc *runContainer16) toBitmapContainer() *bitmapContainer { bc := newBitmapContainer() for i := range rc.iv { bc.iaddRange(int(rc.iv[i].start), int(rc.iv[i].last())+1) } bc.computeCardinality() return bc } func (rc *runContainer16) iandNotRunContainer16(x2 *runContainer16) container { rcb := rc.toBitmapContainer() x2b := x2.toBitmapContainer() rcb.iandNotBitmapSurely(x2b) // TODO: check size and optimize the return value // TODO: is inplace modification really required? If not, elide the copy. rc2 := newRunContainer16FromBitmapContainer(rcb) *rc = *rc2 return rc } func (rc *runContainer16) iandNotArray(ac *arrayContainer) container { rcb := rc.toBitmapContainer() acb := ac.toBitmapContainer() rcb.iandNotBitmapSurely(acb) // TODO: check size and optimize the return value // TODO: is inplace modification really required? If not, elide the copy. rc2 := newRunContainer16FromBitmapContainer(rcb) *rc = *rc2 return rc } func (rc *runContainer16) iandNotBitmap(bc *bitmapContainer) container { rcb := rc.toBitmapContainer() rcb.iandNotBitmapSurely(bc) // TODO: check size and optimize the return value // TODO: is inplace modification really required? If not, elide the copy. rc2 := newRunContainer16FromBitmapContainer(rcb) *rc = *rc2 return rc } func (rc *runContainer16) xorRunContainer16(x2 *runContainer16) container { rcb := rc.toBitmapContainer() x2b := x2.toBitmapContainer() return rcb.xorBitmap(x2b) } func (rc *runContainer16) xorArray(ac *arrayContainer) container { rcb := rc.toBitmapContainer() acb := ac.toBitmapContainer() return rcb.xorBitmap(acb) } func (rc *runContainer16) xorBitmap(bc *bitmapContainer) container { rcb := rc.toBitmapContainer() return rcb.xorBitmap(bc) } // convert to bitmap or array *if needed* func (rc *runContainer16) toEfficientContainer() container { // runContainer16SerializedSizeInBytes(numRuns) sizeAsRunContainer := rc.getSizeInBytes() sizeAsBitmapContainer := bitmapContainerSizeInBytes() card := int(rc.cardinality()) sizeAsArrayContainer := arrayContainerSizeInBytes(card) if sizeAsRunContainer <= minOfInt(sizeAsBitmapContainer, sizeAsArrayContainer) { return rc } if card <= arrayDefaultMaxSize { return rc.toArrayContainer() } bc := newBitmapContainerFromRun(rc) return bc } func (rc *runContainer16) toArrayContainer() *arrayContainer { ac := newArrayContainer() for i := range rc.iv { ac.iaddRange(int(rc.iv[i].start), int(rc.iv[i].last())+1) } return ac } func newRunContainer16FromContainer(c container) *runContainer16 { switch x := c.(type) { case *runContainer16: return x.Clone() case *arrayContainer: return newRunContainer16FromArray(x) case *bitmapContainer: return newRunContainer16FromBitmapContainer(x) } panic("unsupported container type") } // And finds the intersection of rc and b. func (rc *runContainer16) And(b *Bitmap) *Bitmap { out := NewBitmap() for _, p := range rc.iv { plast := p.last() for i := p.start; i <= plast; i++ { if b.Contains(uint32(i)) { out.Add(uint32(i)) } } } return out } // Xor returns the exclusive-or of rc and b. func (rc *runContainer16) Xor(b *Bitmap) *Bitmap { out := b.Clone() for _, p := range rc.iv { plast := p.last() for v := p.start; v <= plast; v++ { w := uint32(v) if out.Contains(w) { out.RemoveRange(uint64(w), uint64(w+1)) } else { out.Add(w) } } } return out } // Or returns the union of rc and b. func (rc *runContainer16) Or(b *Bitmap) *Bitmap { out := b.Clone() for _, p := range rc.iv { plast := p.last() for v := p.start; v <= plast; v++ { out.Add(uint32(v)) } } return out } // serializedSizeInBytes returns the number of bytes of memory // required by this runContainer16. This is for the // Roaring format, as specified https://github.com/RoaringBitmap/RoaringFormatSpec/ func (rc *runContainer16) serializedSizeInBytes() int { // number of runs in one uint16, then each run // needs two more uint16 return 2 + len(rc.iv)*4 } func (rc *runContainer16) addOffset(x uint16) []container { low := newRunContainer16() high := newRunContainer16() for _, iv := range rc.iv { val := int(iv.start) + int(x) finalVal := int(val) + int(iv.length) if val <= 0xffff { if finalVal <= 0xffff { low.iv = append(low.iv, interval16{uint16(val), iv.length}) } else { low.iv = append(low.iv, interval16{uint16(val), uint16(0xffff - val)}) high.iv = append(high.iv, interval16{uint16(0), uint16(finalVal & 0xffff)}) } } else { high.iv = append(high.iv, interval16{uint16(val & 0xffff), iv.length}) } } return []container{low, high} } roaring-0.4.21/runcontainer_gen.go 0000664 0000000 0000000 00000056040 13542657257 0017153 0 ustar 00root root 0000000 0000000 package roaring // NOTE: THIS FILE WAS PRODUCED BY THE // MSGP CODE GENERATION TOOL (github.com/tinylib/msgp) // DO NOT EDIT import "github.com/tinylib/msgp/msgp" // Deprecated: DecodeMsg implements msgp.Decodable func (z *addHelper16) DecodeMsg(dc *msgp.Reader) (err error) { var field []byte _ = field var zbai uint32 zbai, err = dc.ReadMapHeader() if err != nil { return } for zbai > 0 { zbai-- field, err = dc.ReadMapKeyPtr() if err != nil { return } switch msgp.UnsafeString(field) { case "runstart": z.runstart, err = dc.ReadUint16() if err != nil { return } case "runlen": z.runlen, err = dc.ReadUint16() if err != nil { return } case "actuallyAdded": z.actuallyAdded, err = dc.ReadUint16() if err != nil { return } case "m": var zcmr uint32 zcmr, err = dc.ReadArrayHeader() if err != nil { return } if cap(z.m) >= int(zcmr) { z.m = (z.m)[:zcmr] } else { z.m = make([]interval16, zcmr) } for zxvk := range z.m { var zajw uint32 zajw, err = dc.ReadMapHeader() if err != nil { return } for zajw > 0 { zajw-- field, err = dc.ReadMapKeyPtr() if err != nil { return } switch msgp.UnsafeString(field) { case "start": z.m[zxvk].start, err = dc.ReadUint16() if err != nil { return } case "last": z.m[zxvk].length, err = dc.ReadUint16() z.m[zxvk].length -= z.m[zxvk].start if err != nil { return } default: err = dc.Skip() if err != nil { return } } } } case "rc": if dc.IsNil() { err = dc.ReadNil() if err != nil { return } z.rc = nil } else { if z.rc == nil { z.rc = new(runContainer16) } var zwht uint32 zwht, err = dc.ReadMapHeader() if err != nil { return } for zwht > 0 { zwht-- field, err = dc.ReadMapKeyPtr() if err != nil { return } switch msgp.UnsafeString(field) { case "iv": var zhct uint32 zhct, err = dc.ReadArrayHeader() if err != nil { return } if cap(z.rc.iv) >= int(zhct) { z.rc.iv = (z.rc.iv)[:zhct] } else { z.rc.iv = make([]interval16, zhct) } for zbzg := range z.rc.iv { var zcua uint32 zcua, err = dc.ReadMapHeader() if err != nil { return } for zcua > 0 { zcua-- field, err = dc.ReadMapKeyPtr() if err != nil { return } switch msgp.UnsafeString(field) { case "start": z.rc.iv[zbzg].start, err = dc.ReadUint16() if err != nil { return } case "last": z.rc.iv[zbzg].length, err = dc.ReadUint16() z.rc.iv[zbzg].length -= z.rc.iv[zbzg].start if err != nil { return } default: err = dc.Skip() if err != nil { return } } } } case "card": z.rc.card, err = dc.ReadInt64() if err != nil { return } default: err = dc.Skip() if err != nil { return } } } } default: err = dc.Skip() if err != nil { return } } } return } // Deprecated: EncodeMsg implements msgp.Encodable func (z *addHelper16) EncodeMsg(en *msgp.Writer) (err error) { // map header, size 5 // write "runstart" err = en.Append(0x85, 0xa8, 0x72, 0x75, 0x6e, 0x73, 0x74, 0x61, 0x72, 0x74) if err != nil { return err } err = en.WriteUint16(z.runstart) if err != nil { return } // write "runlen" err = en.Append(0xa6, 0x72, 0x75, 0x6e, 0x6c, 0x65, 0x6e) if err != nil { return err } err = en.WriteUint16(z.runlen) if err != nil { return } // write "actuallyAdded" err = en.Append(0xad, 0x61, 0x63, 0x74, 0x75, 0x61, 0x6c, 0x6c, 0x79, 0x41, 0x64, 0x64, 0x65, 0x64) if err != nil { return err } err = en.WriteUint16(z.actuallyAdded) if err != nil { return } // write "m" err = en.Append(0xa1, 0x6d) if err != nil { return err } err = en.WriteArrayHeader(uint32(len(z.m))) if err != nil { return } for zxvk := range z.m { // map header, size 2 // write "start" err = en.Append(0x82, 0xa5, 0x73, 0x74, 0x61, 0x72, 0x74) if err != nil { return err } err = en.WriteUint16(z.m[zxvk].start) if err != nil { return } // write "last" err = en.Append(0xa4, 0x6c, 0x61, 0x73, 0x74) if err != nil { return err } err = en.WriteUint16(z.m[zxvk].last()) if err != nil { return } } // write "rc" err = en.Append(0xa2, 0x72, 0x63) if err != nil { return err } if z.rc == nil { err = en.WriteNil() if err != nil { return } } else { // map header, size 2 // write "iv" err = en.Append(0x82, 0xa2, 0x69, 0x76) if err != nil { return err } err = en.WriteArrayHeader(uint32(len(z.rc.iv))) if err != nil { return } for zbzg := range z.rc.iv { // map header, size 2 // write "start" err = en.Append(0x82, 0xa5, 0x73, 0x74, 0x61, 0x72, 0x74) if err != nil { return err } err = en.WriteUint16(z.rc.iv[zbzg].start) if err != nil { return } // write "last" err = en.Append(0xa4, 0x6c, 0x61, 0x73, 0x74) if err != nil { return err } err = en.WriteUint16(z.rc.iv[zbzg].last()) if err != nil { return } } // write "card" err = en.Append(0xa4, 0x63, 0x61, 0x72, 0x64) if err != nil { return err } err = en.WriteInt64(z.rc.card) if err != nil { return } } return } // Deprecated: MarshalMsg implements msgp.Marshaler func (z *addHelper16) MarshalMsg(b []byte) (o []byte, err error) { o = msgp.Require(b, z.Msgsize()) // map header, size 5 // string "runstart" o = append(o, 0x85, 0xa8, 0x72, 0x75, 0x6e, 0x73, 0x74, 0x61, 0x72, 0x74) o = msgp.AppendUint16(o, z.runstart) // string "runlen" o = append(o, 0xa6, 0x72, 0x75, 0x6e, 0x6c, 0x65, 0x6e) o = msgp.AppendUint16(o, z.runlen) // string "actuallyAdded" o = append(o, 0xad, 0x61, 0x63, 0x74, 0x75, 0x61, 0x6c, 0x6c, 0x79, 0x41, 0x64, 0x64, 0x65, 0x64) o = msgp.AppendUint16(o, z.actuallyAdded) // string "m" o = append(o, 0xa1, 0x6d) o = msgp.AppendArrayHeader(o, uint32(len(z.m))) for zxvk := range z.m { // map header, size 2 // string "start" o = append(o, 0x82, 0xa5, 0x73, 0x74, 0x61, 0x72, 0x74) o = msgp.AppendUint16(o, z.m[zxvk].start) // string "last" o = append(o, 0xa4, 0x6c, 0x61, 0x73, 0x74) o = msgp.AppendUint16(o, z.m[zxvk].last()) } // string "rc" o = append(o, 0xa2, 0x72, 0x63) if z.rc == nil { o = msgp.AppendNil(o) } else { // map header, size 2 // string "iv" o = append(o, 0x82, 0xa2, 0x69, 0x76) o = msgp.AppendArrayHeader(o, uint32(len(z.rc.iv))) for zbzg := range z.rc.iv { // map header, size 2 // string "start" o = append(o, 0x82, 0xa5, 0x73, 0x74, 0x61, 0x72, 0x74) o = msgp.AppendUint16(o, z.rc.iv[zbzg].start) // string "last" o = append(o, 0xa4, 0x6c, 0x61, 0x73, 0x74) o = msgp.AppendUint16(o, z.rc.iv[zbzg].last()) } // string "card" o = append(o, 0xa4, 0x63, 0x61, 0x72, 0x64) o = msgp.AppendInt64(o, z.rc.card) } return } // Deprecated: UnmarshalMsg implements msgp.Unmarshaler func (z *addHelper16) UnmarshalMsg(bts []byte) (o []byte, err error) { var field []byte _ = field var zxhx uint32 zxhx, bts, err = msgp.ReadMapHeaderBytes(bts) if err != nil { return } for zxhx > 0 { zxhx-- field, bts, err = msgp.ReadMapKeyZC(bts) if err != nil { return } switch msgp.UnsafeString(field) { case "runstart": z.runstart, bts, err = msgp.ReadUint16Bytes(bts) if err != nil { return } case "runlen": z.runlen, bts, err = msgp.ReadUint16Bytes(bts) if err != nil { return } case "actuallyAdded": z.actuallyAdded, bts, err = msgp.ReadUint16Bytes(bts) if err != nil { return } case "m": var zlqf uint32 zlqf, bts, err = msgp.ReadArrayHeaderBytes(bts) if err != nil { return } if cap(z.m) >= int(zlqf) { z.m = (z.m)[:zlqf] } else { z.m = make([]interval16, zlqf) } for zxvk := range z.m { var zdaf uint32 zdaf, bts, err = msgp.ReadMapHeaderBytes(bts) if err != nil { return } for zdaf > 0 { zdaf-- field, bts, err = msgp.ReadMapKeyZC(bts) if err != nil { return } switch msgp.UnsafeString(field) { case "start": z.m[zxvk].start, bts, err = msgp.ReadUint16Bytes(bts) if err != nil { return } case "last": z.m[zxvk].length, bts, err = msgp.ReadUint16Bytes(bts) z.m[zxvk].length -= z.m[zxvk].start if err != nil { return } default: bts, err = msgp.Skip(bts) if err != nil { return } } } } case "rc": if msgp.IsNil(bts) { bts, err = msgp.ReadNilBytes(bts) if err != nil { return } z.rc = nil } else { if z.rc == nil { z.rc = new(runContainer16) } var zpks uint32 zpks, bts, err = msgp.ReadMapHeaderBytes(bts) if err != nil { return } for zpks > 0 { zpks-- field, bts, err = msgp.ReadMapKeyZC(bts) if err != nil { return } switch msgp.UnsafeString(field) { case "iv": var zjfb uint32 zjfb, bts, err = msgp.ReadArrayHeaderBytes(bts) if err != nil { return } if cap(z.rc.iv) >= int(zjfb) { z.rc.iv = (z.rc.iv)[:zjfb] } else { z.rc.iv = make([]interval16, zjfb) } for zbzg := range z.rc.iv { var zcxo uint32 zcxo, bts, err = msgp.ReadMapHeaderBytes(bts) if err != nil { return } for zcxo > 0 { zcxo-- field, bts, err = msgp.ReadMapKeyZC(bts) if err != nil { return } switch msgp.UnsafeString(field) { case "start": z.rc.iv[zbzg].start, bts, err = msgp.ReadUint16Bytes(bts) if err != nil { return } case "last": z.rc.iv[zbzg].length, bts, err = msgp.ReadUint16Bytes(bts) z.rc.iv[zbzg].length -= z.rc.iv[zbzg].start if err != nil { return } default: bts, err = msgp.Skip(bts) if err != nil { return } } } } case "card": z.rc.card, bts, err = msgp.ReadInt64Bytes(bts) if err != nil { return } default: bts, err = msgp.Skip(bts) if err != nil { return } } } } default: bts, err = msgp.Skip(bts) if err != nil { return } } } o = bts return } // Deprecated: Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message func (z *addHelper16) Msgsize() (s int) { s = 1 + 9 + msgp.Uint16Size + 7 + msgp.Uint16Size + 14 + msgp.Uint16Size + 2 + msgp.ArrayHeaderSize + (len(z.m) * (12 + msgp.Uint16Size + msgp.Uint16Size)) + 3 if z.rc == nil { s += msgp.NilSize } else { s += 1 + 3 + msgp.ArrayHeaderSize + (len(z.rc.iv) * (12 + msgp.Uint16Size + msgp.Uint16Size)) + 5 + msgp.Int64Size } return } // Deprecated: DecodeMsg implements msgp.Decodable func (z *interval16) DecodeMsg(dc *msgp.Reader) (err error) { var field []byte _ = field var zeff uint32 zeff, err = dc.ReadMapHeader() if err != nil { return } for zeff > 0 { zeff-- field, err = dc.ReadMapKeyPtr() if err != nil { return } switch msgp.UnsafeString(field) { case "start": z.start, err = dc.ReadUint16() if err != nil { return } case "last": z.length, err = dc.ReadUint16() z.length = -z.start if err != nil { return } default: err = dc.Skip() if err != nil { return } } } return } // Deprecated: EncodeMsg implements msgp.Encodable func (z interval16) EncodeMsg(en *msgp.Writer) (err error) { // map header, size 2 // write "start" err = en.Append(0x82, 0xa5, 0x73, 0x74, 0x61, 0x72, 0x74) if err != nil { return err } err = en.WriteUint16(z.start) if err != nil { return } // write "last" err = en.Append(0xa4, 0x6c, 0x61, 0x73, 0x74) if err != nil { return err } err = en.WriteUint16(z.last()) if err != nil { return } return } // Deprecated: MarshalMsg implements msgp.Marshaler func (z interval16) MarshalMsg(b []byte) (o []byte, err error) { o = msgp.Require(b, z.Msgsize()) // map header, size 2 // string "start" o = append(o, 0x82, 0xa5, 0x73, 0x74, 0x61, 0x72, 0x74) o = msgp.AppendUint16(o, z.start) // string "last" o = append(o, 0xa4, 0x6c, 0x61, 0x73, 0x74) o = msgp.AppendUint16(o, z.last()) return } // Deprecated: UnmarshalMsg implements msgp.Unmarshaler func (z *interval16) UnmarshalMsg(bts []byte) (o []byte, err error) { var field []byte _ = field var zrsw uint32 zrsw, bts, err = msgp.ReadMapHeaderBytes(bts) if err != nil { return } for zrsw > 0 { zrsw-- field, bts, err = msgp.ReadMapKeyZC(bts) if err != nil { return } switch msgp.UnsafeString(field) { case "start": z.start, bts, err = msgp.ReadUint16Bytes(bts) if err != nil { return } case "last": z.length, bts, err = msgp.ReadUint16Bytes(bts) z.length -= z.start if err != nil { return } default: bts, err = msgp.Skip(bts) if err != nil { return } } } o = bts return } // Deprecated: Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message func (z interval16) Msgsize() (s int) { s = 1 + 6 + msgp.Uint16Size + 5 + msgp.Uint16Size return } // Deprecated: DecodeMsg implements msgp.Decodable func (z *runContainer16) DecodeMsg(dc *msgp.Reader) (err error) { var field []byte _ = field var zdnj uint32 zdnj, err = dc.ReadMapHeader() if err != nil { return } for zdnj > 0 { zdnj-- field, err = dc.ReadMapKeyPtr() if err != nil { return } switch msgp.UnsafeString(field) { case "iv": var zobc uint32 zobc, err = dc.ReadArrayHeader() if err != nil { return } if cap(z.iv) >= int(zobc) { z.iv = (z.iv)[:zobc] } else { z.iv = make([]interval16, zobc) } for zxpk := range z.iv { var zsnv uint32 zsnv, err = dc.ReadMapHeader() if err != nil { return } for zsnv > 0 { zsnv-- field, err = dc.ReadMapKeyPtr() if err != nil { return } switch msgp.UnsafeString(field) { case "start": z.iv[zxpk].start, err = dc.ReadUint16() if err != nil { return } case "last": z.iv[zxpk].length, err = dc.ReadUint16() z.iv[zxpk].length -= z.iv[zxpk].start if err != nil { return } default: err = dc.Skip() if err != nil { return } } } } case "card": z.card, err = dc.ReadInt64() if err != nil { return } default: err = dc.Skip() if err != nil { return } } } return } // Deprecated: EncodeMsg implements msgp.Encodable func (z *runContainer16) EncodeMsg(en *msgp.Writer) (err error) { // map header, size 2 // write "iv" err = en.Append(0x82, 0xa2, 0x69, 0x76) if err != nil { return err } err = en.WriteArrayHeader(uint32(len(z.iv))) if err != nil { return } for zxpk := range z.iv { // map header, size 2 // write "start" err = en.Append(0x82, 0xa5, 0x73, 0x74, 0x61, 0x72, 0x74) if err != nil { return err } err = en.WriteUint16(z.iv[zxpk].start) if err != nil { return } // write "last" err = en.Append(0xa4, 0x6c, 0x61, 0x73, 0x74) if err != nil { return err } err = en.WriteUint16(z.iv[zxpk].last()) if err != nil { return } } // write "card" err = en.Append(0xa4, 0x63, 0x61, 0x72, 0x64) if err != nil { return err } err = en.WriteInt64(z.card) if err != nil { return } return } // Deprecated: MarshalMsg implements msgp.Marshaler func (z *runContainer16) MarshalMsg(b []byte) (o []byte, err error) { o = msgp.Require(b, z.Msgsize()) // map header, size 2 // string "iv" o = append(o, 0x82, 0xa2, 0x69, 0x76) o = msgp.AppendArrayHeader(o, uint32(len(z.iv))) for zxpk := range z.iv { // map header, size 2 // string "start" o = append(o, 0x82, 0xa5, 0x73, 0x74, 0x61, 0x72, 0x74) o = msgp.AppendUint16(o, z.iv[zxpk].start) // string "last" o = append(o, 0xa4, 0x6c, 0x61, 0x73, 0x74) o = msgp.AppendUint16(o, z.iv[zxpk].last()) } // string "card" o = append(o, 0xa4, 0x63, 0x61, 0x72, 0x64) o = msgp.AppendInt64(o, z.card) return } // Deprecated: UnmarshalMsg implements msgp.Unmarshaler func (z *runContainer16) UnmarshalMsg(bts []byte) (o []byte, err error) { var field []byte _ = field var zkgt uint32 zkgt, bts, err = msgp.ReadMapHeaderBytes(bts) if err != nil { return } for zkgt > 0 { zkgt-- field, bts, err = msgp.ReadMapKeyZC(bts) if err != nil { return } switch msgp.UnsafeString(field) { case "iv": var zema uint32 zema, bts, err = msgp.ReadArrayHeaderBytes(bts) if err != nil { return } if cap(z.iv) >= int(zema) { z.iv = (z.iv)[:zema] } else { z.iv = make([]interval16, zema) } for zxpk := range z.iv { var zpez uint32 zpez, bts, err = msgp.ReadMapHeaderBytes(bts) if err != nil { return } for zpez > 0 { zpez-- field, bts, err = msgp.ReadMapKeyZC(bts) if err != nil { return } switch msgp.UnsafeString(field) { case "start": z.iv[zxpk].start, bts, err = msgp.ReadUint16Bytes(bts) if err != nil { return } case "last": z.iv[zxpk].length, bts, err = msgp.ReadUint16Bytes(bts) z.iv[zxpk].length -= z.iv[zxpk].start if err != nil { return } default: bts, err = msgp.Skip(bts) if err != nil { return } } } } case "card": z.card, bts, err = msgp.ReadInt64Bytes(bts) if err != nil { return } default: bts, err = msgp.Skip(bts) if err != nil { return } } } o = bts return } // Deprecated: Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message func (z *runContainer16) Msgsize() (s int) { s = 1 + 3 + msgp.ArrayHeaderSize + (len(z.iv) * (12 + msgp.Uint16Size + msgp.Uint16Size)) + 5 + msgp.Int64Size return } // Deprecated: DecodeMsg implements msgp.Decodable func (z *runIterator16) DecodeMsg(dc *msgp.Reader) (err error) { var field []byte _ = field var zqke uint32 zqke, err = dc.ReadMapHeader() if err != nil { return } for zqke > 0 { zqke-- field, err = dc.ReadMapKeyPtr() if err != nil { return } switch msgp.UnsafeString(field) { case "rc": if dc.IsNil() { err = dc.ReadNil() if err != nil { return } z.rc = nil } else { if z.rc == nil { z.rc = new(runContainer16) } err = z.rc.DecodeMsg(dc) if err != nil { return } } case "curIndex": z.curIndex, err = dc.ReadInt64() if err != nil { return } case "curPosInIndex": z.curPosInIndex, err = dc.ReadUint16() if err != nil { return } default: err = dc.Skip() if err != nil { return } } } return } // Deprecated: EncodeMsg implements msgp.Encodable func (z *runIterator16) EncodeMsg(en *msgp.Writer) (err error) { // map header, size 3 // write "rc" err = en.Append(0x83, 0xa2, 0x72, 0x63) if err != nil { return err } if z.rc == nil { err = en.WriteNil() if err != nil { return } } else { err = z.rc.EncodeMsg(en) if err != nil { return } } // write "curIndex" err = en.Append(0xa8, 0x63, 0x75, 0x72, 0x49, 0x6e, 0x64, 0x65, 0x78) if err != nil { return err } err = en.WriteInt64(z.curIndex) if err != nil { return } // write "curPosInIndex" err = en.Append(0xad, 0x63, 0x75, 0x72, 0x50, 0x6f, 0x73, 0x49, 0x6e, 0x49, 0x6e, 0x64, 0x65, 0x78) if err != nil { return err } err = en.WriteUint16(z.curPosInIndex) if err != nil { return } return } // Deprecated: MarshalMsg implements msgp.Marshaler func (z *runIterator16) MarshalMsg(b []byte) (o []byte, err error) { o = msgp.Require(b, z.Msgsize()) // map header, size 3 // string "rc" o = append(o, 0x83, 0xa2, 0x72, 0x63) if z.rc == nil { o = msgp.AppendNil(o) } else { o, err = z.rc.MarshalMsg(o) if err != nil { return } } // string "curIndex" o = append(o, 0xa8, 0x63, 0x75, 0x72, 0x49, 0x6e, 0x64, 0x65, 0x78) o = msgp.AppendInt64(o, z.curIndex) // string "curPosInIndex" o = append(o, 0xad, 0x63, 0x75, 0x72, 0x50, 0x6f, 0x73, 0x49, 0x6e, 0x49, 0x6e, 0x64, 0x65, 0x78) o = msgp.AppendUint16(o, z.curPosInIndex) return } // Deprecated: UnmarshalMsg implements msgp.Unmarshaler func (z *runIterator16) UnmarshalMsg(bts []byte) (o []byte, err error) { var field []byte _ = field var zqyh uint32 zqyh, bts, err = msgp.ReadMapHeaderBytes(bts) if err != nil { return } for zqyh > 0 { zqyh-- field, bts, err = msgp.ReadMapKeyZC(bts) if err != nil { return } switch msgp.UnsafeString(field) { case "rc": if msgp.IsNil(bts) { bts, err = msgp.ReadNilBytes(bts) if err != nil { return } z.rc = nil } else { if z.rc == nil { z.rc = new(runContainer16) } bts, err = z.rc.UnmarshalMsg(bts) if err != nil { return } } case "curIndex": z.curIndex, bts, err = msgp.ReadInt64Bytes(bts) if err != nil { return } case "curPosInIndex": z.curPosInIndex, bts, err = msgp.ReadUint16Bytes(bts) if err != nil { return } default: bts, err = msgp.Skip(bts) if err != nil { return } } } o = bts return } // Deprecated: Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message func (z *runIterator16) Msgsize() (s int) { s = 1 + 3 if z.rc == nil { s += msgp.NilSize } else { s += z.rc.Msgsize() } s += 9 + msgp.Int64Size + 14 + msgp.Uint16Size return } // Deprecated: DecodeMsg implements msgp.Decodable func (z *uint16Slice) DecodeMsg(dc *msgp.Reader) (err error) { var zjpj uint32 zjpj, err = dc.ReadArrayHeader() if err != nil { return } if cap((*z)) >= int(zjpj) { (*z) = (*z)[:zjpj] } else { (*z) = make(uint16Slice, zjpj) } for zywj := range *z { (*z)[zywj], err = dc.ReadUint16() if err != nil { return } } return } // Deprecated: EncodeMsg implements msgp.Encodable func (z uint16Slice) EncodeMsg(en *msgp.Writer) (err error) { err = en.WriteArrayHeader(uint32(len(z))) if err != nil { return } for zzpf := range z { err = en.WriteUint16(z[zzpf]) if err != nil { return } } return } // Deprecated: MarshalMsg implements msgp.Marshaler func (z uint16Slice) MarshalMsg(b []byte) (o []byte, err error) { o = msgp.Require(b, z.Msgsize()) o = msgp.AppendArrayHeader(o, uint32(len(z))) for zzpf := range z { o = msgp.AppendUint16(o, z[zzpf]) } return } // Deprecated: UnmarshalMsg implements msgp.Unmarshaler func (z *uint16Slice) UnmarshalMsg(bts []byte) (o []byte, err error) { var zgmo uint32 zgmo, bts, err = msgp.ReadArrayHeaderBytes(bts) if err != nil { return } if cap((*z)) >= int(zgmo) { (*z) = (*z)[:zgmo] } else { (*z) = make(uint16Slice, zgmo) } for zrfe := range *z { (*z)[zrfe], bts, err = msgp.ReadUint16Bytes(bts) if err != nil { return } } o = bts return } // Deprecated: Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message func (z uint16Slice) Msgsize() (s int) { s = msgp.ArrayHeaderSize + (len(z) * (msgp.Uint16Size)) return } roaring-0.4.21/runcontainer_gen_test.go 0000664 0000000 0000000 00000025720 13542657257 0020213 0 ustar 00root root 0000000 0000000 package roaring // NOTE: THIS FILE WAS PRODUCED BY THE // MSGP CODE GENERATION TOOL (github.com/tinylib/msgp) // DO NOT EDIT import ( "bytes" "testing" "github.com/tinylib/msgp/msgp" ) func TestMarshalUnmarshaladdHelper16(t *testing.T) { v := addHelper16{} bts, err := v.MarshalMsg(nil) if err != nil { t.Fatal(err) } left, err := v.UnmarshalMsg(bts) if err != nil { t.Fatal(err) } if len(left) > 0 { t.Errorf("%d bytes left over after UnmarshalMsg(): %q", len(left), left) } left, err = msgp.Skip(bts) if err != nil { t.Fatal(err) } if len(left) > 0 { t.Errorf("%d bytes left over after Skip(): %q", len(left), left) } } func BenchmarkMarshalMsgaddHelper16(b *testing.B) { v := addHelper16{} b.ReportAllocs() b.ResetTimer() for i := 0; i < b.N; i++ { v.MarshalMsg(nil) } } func BenchmarkAppendMsgaddHelper16(b *testing.B) { v := addHelper16{} bts := make([]byte, 0, v.Msgsize()) bts, _ = v.MarshalMsg(bts[0:0]) b.SetBytes(int64(len(bts))) b.ReportAllocs() b.ResetTimer() for i := 0; i < b.N; i++ { bts, _ = v.MarshalMsg(bts[0:0]) } } func BenchmarkUnmarshaladdHelper16(b *testing.B) { v := addHelper16{} bts, _ := v.MarshalMsg(nil) b.ReportAllocs() b.SetBytes(int64(len(bts))) b.ResetTimer() for i := 0; i < b.N; i++ { _, err := v.UnmarshalMsg(bts) if err != nil { b.Fatal(err) } } } func TestEncodeDecodeaddHelper16(t *testing.T) { v := addHelper16{} var buf bytes.Buffer msgp.Encode(&buf, &v) m := v.Msgsize() if buf.Len() > m { t.Logf("WARNING: Msgsize() for %v is inaccurate", v) } vn := addHelper16{} err := msgp.Decode(&buf, &vn) if err != nil { t.Error(err) } buf.Reset() msgp.Encode(&buf, &v) err = msgp.NewReader(&buf).Skip() if err != nil { t.Error(err) } } func BenchmarkEncodeaddHelper16(b *testing.B) { v := addHelper16{} var buf bytes.Buffer msgp.Encode(&buf, &v) b.SetBytes(int64(buf.Len())) en := msgp.NewWriter(msgp.Nowhere) b.ReportAllocs() b.ResetTimer() for i := 0; i < b.N; i++ { v.EncodeMsg(en) } en.Flush() } func BenchmarkDecodeaddHelper16(b *testing.B) { v := addHelper16{} var buf bytes.Buffer msgp.Encode(&buf, &v) b.SetBytes(int64(buf.Len())) rd := msgp.NewEndlessReader(buf.Bytes(), b) dc := msgp.NewReader(rd) b.ReportAllocs() b.ResetTimer() for i := 0; i < b.N; i++ { err := v.DecodeMsg(dc) if err != nil { b.Fatal(err) } } } func TestMarshalUnmarshalinterval16(t *testing.T) { v := interval16{} bts, err := v.MarshalMsg(nil) if err != nil { t.Fatal(err) } left, err := v.UnmarshalMsg(bts) if err != nil { t.Fatal(err) } if len(left) > 0 { t.Errorf("%d bytes left over after UnmarshalMsg(): %q", len(left), left) } left, err = msgp.Skip(bts) if err != nil { t.Fatal(err) } if len(left) > 0 { t.Errorf("%d bytes left over after Skip(): %q", len(left), left) } } func BenchmarkMarshalMsginterval16(b *testing.B) { v := interval16{} b.ReportAllocs() b.ResetTimer() for i := 0; i < b.N; i++ { v.MarshalMsg(nil) } } func BenchmarkAppendMsginterval16(b *testing.B) { v := interval16{} bts := make([]byte, 0, v.Msgsize()) bts, _ = v.MarshalMsg(bts[0:0]) b.SetBytes(int64(len(bts))) b.ReportAllocs() b.ResetTimer() for i := 0; i < b.N; i++ { bts, _ = v.MarshalMsg(bts[0:0]) } } func BenchmarkUnmarshalinterval16(b *testing.B) { v := interval16{} bts, _ := v.MarshalMsg(nil) b.ReportAllocs() b.SetBytes(int64(len(bts))) b.ResetTimer() for i := 0; i < b.N; i++ { _, err := v.UnmarshalMsg(bts) if err != nil { b.Fatal(err) } } } func TestEncodeDecodeinterval16(t *testing.T) { v := interval16{} var buf bytes.Buffer msgp.Encode(&buf, &v) m := v.Msgsize() if buf.Len() > m { t.Logf("WARNING: Msgsize() for %v is inaccurate", v) } vn := interval16{} err := msgp.Decode(&buf, &vn) if err != nil { t.Error(err) } buf.Reset() msgp.Encode(&buf, &v) err = msgp.NewReader(&buf).Skip() if err != nil { t.Error(err) } } func BenchmarkEncodeinterval16(b *testing.B) { v := interval16{} var buf bytes.Buffer msgp.Encode(&buf, &v) b.SetBytes(int64(buf.Len())) en := msgp.NewWriter(msgp.Nowhere) b.ReportAllocs() b.ResetTimer() for i := 0; i < b.N; i++ { v.EncodeMsg(en) } en.Flush() } func BenchmarkDecodeinterval16(b *testing.B) { v := interval16{} var buf bytes.Buffer msgp.Encode(&buf, &v) b.SetBytes(int64(buf.Len())) rd := msgp.NewEndlessReader(buf.Bytes(), b) dc := msgp.NewReader(rd) b.ReportAllocs() b.ResetTimer() for i := 0; i < b.N; i++ { err := v.DecodeMsg(dc) if err != nil { b.Fatal(err) } } } func TestMarshalUnmarshalrunContainer16(t *testing.T) { v := runContainer16{} bts, err := v.MarshalMsg(nil) if err != nil { t.Fatal(err) } left, err := v.UnmarshalMsg(bts) if err != nil { t.Fatal(err) } if len(left) > 0 { t.Errorf("%d bytes left over after UnmarshalMsg(): %q", len(left), left) } left, err = msgp.Skip(bts) if err != nil { t.Fatal(err) } if len(left) > 0 { t.Errorf("%d bytes left over after Skip(): %q", len(left), left) } } func BenchmarkMarshalMsgrunContainer16(b *testing.B) { v := runContainer16{} b.ReportAllocs() b.ResetTimer() for i := 0; i < b.N; i++ { v.MarshalMsg(nil) } } func BenchmarkAppendMsgrunContainer16(b *testing.B) { v := runContainer16{} bts := make([]byte, 0, v.Msgsize()) bts, _ = v.MarshalMsg(bts[0:0]) b.SetBytes(int64(len(bts))) b.ReportAllocs() b.ResetTimer() for i := 0; i < b.N; i++ { bts, _ = v.MarshalMsg(bts[0:0]) } } func BenchmarkUnmarshalrunContainer16(b *testing.B) { v := runContainer16{} bts, _ := v.MarshalMsg(nil) b.ReportAllocs() b.SetBytes(int64(len(bts))) b.ResetTimer() for i := 0; i < b.N; i++ { _, err := v.UnmarshalMsg(bts) if err != nil { b.Fatal(err) } } } func TestEncodeDecoderunContainer16(t *testing.T) { v := runContainer16{} var buf bytes.Buffer msgp.Encode(&buf, &v) m := v.Msgsize() if buf.Len() > m { t.Logf("WARNING: Msgsize() for %v is inaccurate", v) } vn := runContainer16{} err := msgp.Decode(&buf, &vn) if err != nil { t.Error(err) } buf.Reset() msgp.Encode(&buf, &v) err = msgp.NewReader(&buf).Skip() if err != nil { t.Error(err) } } func BenchmarkEncoderunContainer16(b *testing.B) { v := runContainer16{} var buf bytes.Buffer msgp.Encode(&buf, &v) b.SetBytes(int64(buf.Len())) en := msgp.NewWriter(msgp.Nowhere) b.ReportAllocs() b.ResetTimer() for i := 0; i < b.N; i++ { v.EncodeMsg(en) } en.Flush() } func BenchmarkDecoderunContainer16(b *testing.B) { v := runContainer16{} var buf bytes.Buffer msgp.Encode(&buf, &v) b.SetBytes(int64(buf.Len())) rd := msgp.NewEndlessReader(buf.Bytes(), b) dc := msgp.NewReader(rd) b.ReportAllocs() b.ResetTimer() for i := 0; i < b.N; i++ { err := v.DecodeMsg(dc) if err != nil { b.Fatal(err) } } } func TestMarshalUnmarshalrunIterator16(t *testing.T) { v := runIterator16{} bts, err := v.MarshalMsg(nil) if err != nil { t.Fatal(err) } left, err := v.UnmarshalMsg(bts) if err != nil { t.Fatal(err) } if len(left) > 0 { t.Errorf("%d bytes left over after UnmarshalMsg(): %q", len(left), left) } left, err = msgp.Skip(bts) if err != nil { t.Fatal(err) } if len(left) > 0 { t.Errorf("%d bytes left over after Skip(): %q", len(left), left) } } func BenchmarkMarshalMsgrunIterator16(b *testing.B) { v := runIterator16{} b.ReportAllocs() b.ResetTimer() for i := 0; i < b.N; i++ { v.MarshalMsg(nil) } } func BenchmarkAppendMsgrunIterator16(b *testing.B) { v := runIterator16{} bts := make([]byte, 0, v.Msgsize()) bts, _ = v.MarshalMsg(bts[0:0]) b.SetBytes(int64(len(bts))) b.ReportAllocs() b.ResetTimer() for i := 0; i < b.N; i++ { bts, _ = v.MarshalMsg(bts[0:0]) } } func BenchmarkUnmarshalrunIterator16(b *testing.B) { v := runIterator16{} bts, _ := v.MarshalMsg(nil) b.ReportAllocs() b.SetBytes(int64(len(bts))) b.ResetTimer() for i := 0; i < b.N; i++ { _, err := v.UnmarshalMsg(bts) if err != nil { b.Fatal(err) } } } func TestEncodeDecoderunIterator16(t *testing.T) { v := runIterator16{} var buf bytes.Buffer msgp.Encode(&buf, &v) m := v.Msgsize() if buf.Len() > m { t.Logf("WARNING: Msgsize() for %v is inaccurate", v) } vn := runIterator16{} err := msgp.Decode(&buf, &vn) if err != nil { t.Error(err) } buf.Reset() msgp.Encode(&buf, &v) err = msgp.NewReader(&buf).Skip() if err != nil { t.Error(err) } } func BenchmarkEncoderunIterator16(b *testing.B) { v := runIterator16{} var buf bytes.Buffer msgp.Encode(&buf, &v) b.SetBytes(int64(buf.Len())) en := msgp.NewWriter(msgp.Nowhere) b.ReportAllocs() b.ResetTimer() for i := 0; i < b.N; i++ { v.EncodeMsg(en) } en.Flush() } func BenchmarkDecoderunIterator16(b *testing.B) { v := runIterator16{} var buf bytes.Buffer msgp.Encode(&buf, &v) b.SetBytes(int64(buf.Len())) rd := msgp.NewEndlessReader(buf.Bytes(), b) dc := msgp.NewReader(rd) b.ReportAllocs() b.ResetTimer() for i := 0; i < b.N; i++ { err := v.DecodeMsg(dc) if err != nil { b.Fatal(err) } } } func TestMarshalUnmarshaluint16Slice(t *testing.T) { v := uint16Slice{} bts, err := v.MarshalMsg(nil) if err != nil { t.Fatal(err) } left, err := v.UnmarshalMsg(bts) if err != nil { t.Fatal(err) } if len(left) > 0 { t.Errorf("%d bytes left over after UnmarshalMsg(): %q", len(left), left) } left, err = msgp.Skip(bts) if err != nil { t.Fatal(err) } if len(left) > 0 { t.Errorf("%d bytes left over after Skip(): %q", len(left), left) } } func BenchmarkMarshalMsguint16Slice(b *testing.B) { v := uint16Slice{} b.ReportAllocs() b.ResetTimer() for i := 0; i < b.N; i++ { v.MarshalMsg(nil) } } func BenchmarkAppendMsguint16Slice(b *testing.B) { v := uint16Slice{} bts := make([]byte, 0, v.Msgsize()) bts, _ = v.MarshalMsg(bts[0:0]) b.SetBytes(int64(len(bts))) b.ReportAllocs() b.ResetTimer() for i := 0; i < b.N; i++ { bts, _ = v.MarshalMsg(bts[0:0]) } } func BenchmarkUnmarshaluint16Slice(b *testing.B) { v := uint16Slice{} bts, _ := v.MarshalMsg(nil) b.ReportAllocs() b.SetBytes(int64(len(bts))) b.ResetTimer() for i := 0; i < b.N; i++ { _, err := v.UnmarshalMsg(bts) if err != nil { b.Fatal(err) } } } func TestEncodeDecodeuint16Slice(t *testing.T) { v := uint16Slice{} var buf bytes.Buffer msgp.Encode(&buf, &v) m := v.Msgsize() if buf.Len() > m { t.Logf("WARNING: Msgsize() for %v is inaccurate", v) } vn := uint16Slice{} err := msgp.Decode(&buf, &vn) if err != nil { t.Error(err) } buf.Reset() msgp.Encode(&buf, &v) err = msgp.NewReader(&buf).Skip() if err != nil { t.Error(err) } } func BenchmarkEncodeuint16Slice(b *testing.B) { v := uint16Slice{} var buf bytes.Buffer msgp.Encode(&buf, &v) b.SetBytes(int64(buf.Len())) en := msgp.NewWriter(msgp.Nowhere) b.ReportAllocs() b.ResetTimer() for i := 0; i < b.N; i++ { v.EncodeMsg(en) } en.Flush() } func BenchmarkDecodeuint16Slice(b *testing.B) { v := uint16Slice{} var buf bytes.Buffer msgp.Encode(&buf, &v) b.SetBytes(int64(buf.Len())) rd := msgp.NewEndlessReader(buf.Bytes(), b) dc := msgp.NewReader(rd) b.ReportAllocs() b.ResetTimer() for i := 0; i < b.N; i++ { err := v.DecodeMsg(dc) if err != nil { b.Fatal(err) } } } roaring-0.4.21/runcontainer_test.go 0000664 0000000 0000000 00000164556 13542657257 0017375 0 ustar 00root root 0000000 0000000 package roaring import ( "fmt" "math/rand" "sort" "strings" "testing" "github.com/stretchr/testify/assert" ) // trial is used in the randomized testing of runContainers type trial struct { n int percentFill float64 ntrial int // only in the union test // only subtract test percentDelete float64 // only in 067 randomized operations // we do this + 1 passes numRandomOpsPass int // allow sampling range control // only recent tests respect this. srang *interval16 } // canMerge, and mergeInterval16s should do what they say func TestRleInterval16s(t *testing.T) { a := newInterval16Range(0, 9) b := newInterval16Range(0, 1) report := sliceToString16([]interval16{a, b}) _ = report c := newInterval16Range(2, 4) d := newInterval16Range(2, 5) e := newInterval16Range(0, 4) f := newInterval16Range(9, 9) g := newInterval16Range(8, 9) h := newInterval16Range(5, 6) i := newInterval16Range(6, 6) aIb, empty := intersectInterval16s(a, b) assert.False(t, empty) assert.EqualValues(t, b, aIb) assert.True(t, canMerge16(b, c)) assert.True(t, canMerge16(c, b)) assert.True(t, canMerge16(a, h)) assert.True(t, canMerge16(d, e)) assert.True(t, canMerge16(f, g)) assert.True(t, canMerge16(c, h)) assert.False(t, canMerge16(b, h)) assert.False(t, canMerge16(h, b)) assert.False(t, canMerge16(c, i)) assert.EqualValues(t, e, mergeInterval16s(b, c)) assert.EqualValues(t, e, mergeInterval16s(c, b)) assert.EqualValues(t, h, mergeInterval16s(h, i)) assert.EqualValues(t, h, mergeInterval16s(i, h)) ////// start assert.EqualValues(t, newInterval16Range(0, 1), mergeInterval16s(newInterval16Range(0, 0), newInterval16Range(1, 1))) assert.EqualValues(t, newInterval16Range(0, 1), mergeInterval16s(newInterval16Range(1, 1), newInterval16Range(0, 0))) assert.EqualValues(t, newInterval16Range(0, 5), mergeInterval16s(newInterval16Range(0, 4), newInterval16Range(3, 5))) assert.EqualValues(t, newInterval16Range(0, 4), mergeInterval16s(newInterval16Range(0, 4), newInterval16Range(3, 4))) assert.EqualValues(t, newInterval16Range(0, 8), mergeInterval16s(newInterval16Range(1, 7), newInterval16Range(0, 8))) assert.EqualValues(t, newInterval16Range(0, 8), mergeInterval16s(newInterval16Range(1, 7), newInterval16Range(0, 8))) assert.Panics(t, func() { _ = mergeInterval16s(newInterval16Range(0, 0), newInterval16Range(2, 3)) }) } func TestRunOffset(t *testing.T) { v := newRunContainer16TakeOwnership([]interval16{newInterval16Range(34, 39)}) offtest := uint16(65500) w := v.addOffset(offtest) w0card := w[0].getCardinality() w1card := w[1].getCardinality() if w0card+w1card != 6 { t.Errorf("Bogus cardinality.") } expected := []int{65534, 65535, 65536, 65537, 65538, 65539} wout := make([]int, len(expected)) for i := 0; i < w0card; i++ { wout[i] = w[0].selectInt(uint16(i)) } for i := 0; i < w1card; i++ { wout[i+w0card] = w[1].selectInt(uint16(i)) + 65536 } for i, x := range wout { if x != expected[i] { t.Errorf("found discrepancy %d!=%d", x, expected[i]) } } } func TestRleRunIterator16(t *testing.T) { t.Run("RunIterator16 unit tests for next, hasNext, and peekNext should pass", func(t *testing.T) { { rc := newRunContainer16() msg := rc.String() _ = msg assert.EqualValues(t, 0, rc.cardinality()) it := rc.newRunIterator16() assert.False(t, it.hasNext()) assert.Panics(t, func() { it.peekNext() }) assert.Panics(t, func() { it.next() }) } { rc := newRunContainer16TakeOwnership([]interval16{newInterval16Range(4, 4)}) assert.EqualValues(t, 1, rc.cardinality()) it := rc.newRunIterator16() assert.True(t, it.hasNext()) assert.EqualValues(t, uint16(4), it.peekNext()) assert.EqualValues(t, uint16(4), it.next()) } { rc := newRunContainer16CopyIv([]interval16{newInterval16Range(4, 9)}) assert.EqualValues(t, 6, rc.cardinality()) it := rc.newRunIterator16() assert.True(t, it.hasNext()) for i := 4; i < 10; i++ { assert.Equal(t, uint16(i), it.next()) } assert.False(t, it.hasNext()) } { // basic nextMany test rc := newRunContainer16CopyIv([]interval16{newInterval16Range(4, 9)}) assert.EqualValues(t, 6, rc.cardinality()) it := rc.newManyRunIterator16() buf := make([]uint32, 10) n := it.nextMany(0, buf) assert.Equal(t, 6, n) expected := []uint32{4, 5, 6, 7, 8, 9, 0, 0, 0, 0} for i, e := range expected { assert.Equal(t, e, buf[i]) } } { // nextMany with len(buf) == 0 rc := newRunContainer16CopyIv([]interval16{newInterval16Range(4, 9)}) assert.EqualValues(t, 6, rc.cardinality()) it := rc.newManyRunIterator16() var buf []uint32 n := it.nextMany(0, buf) assert.Equal(t, 0, n) } { // basic nextMany test across ranges rc := newRunContainer16CopyIv([]interval16{ newInterval16Range(4, 7), newInterval16Range(11, 13), newInterval16Range(18, 21)}) assert.EqualValues(t, 11, rc.cardinality()) it := rc.newManyRunIterator16() buf := make([]uint32, 15) n := it.nextMany(0, buf) assert.Equal(t, 11, n) expected := []uint32{4, 5, 6, 7, 11, 12, 13, 18, 19, 20, 21, 0, 0, 0, 0} for i, e := range expected { assert.Equal(t, e, buf[i]) } } { // basic nextMany test across ranges with different buffer sizes rc := newRunContainer16CopyIv([]interval16{ newInterval16Range(4, 7), newInterval16Range(11, 13), newInterval16Range(18, 21)}) expectedCard := 11 expectedVals := []uint32{4, 5, 6, 7, 11, 12, 13, 18, 19, 20, 21} hs := uint32(1 << 16) assert.EqualValues(t, expectedCard, rc.cardinality()) for bufSize := 2; bufSize < 15; bufSize++ { buf := make([]uint32, bufSize) seen := 0 it := rc.newManyRunIterator16() for n := it.nextMany(hs, buf); n != 0; n = it.nextMany(hs, buf) { // catch runaway iteration assert.LessOrEqual(t, seen+n, expectedCard) for i, e := range expectedVals[seen : seen+n] { assert.Equal(t, e+hs, buf[i]) } seen += n // if we have more values to return then we shouldn't leave empty slots in the buffer if seen < expectedCard { assert.Equal(t, bufSize, n) } } assert.Equal(t, expectedCard, seen) } } { // basic nextMany interaction with hasNext rc := newRunContainer16CopyIv([]interval16{newInterval16Range(4, 4)}) assert.EqualValues(t, 1, rc.cardinality()) it := rc.newManyRunIterator16() assert.True(t, it.hasNext()) buf := make([]uint32, 4) n := it.nextMany(0, buf) assert.Equal(t, 1, n) expected := []uint32{4, 0, 0, 0} for i, e := range expected { assert.Equal(t, e, buf[i]) } assert.False(t, it.hasNext()) buf = make([]uint32, 4) n = it.nextMany(0, buf) assert.Equal(t, 0, n) expected = []uint32{0, 0, 0, 0} for i, e := range expected { assert.Equal(t, e, buf[i]) } } { rc := newRunContainer16TakeOwnership([]interval16{ newInterval16Range(0, 0), newInterval16Range(2, 2), newInterval16Range(4, 4), }) rc1 := newRunContainer16TakeOwnership([]interval16{ newInterval16Range(6, 7), newInterval16Range(10, 11), newInterval16Range(MaxUint16, MaxUint16), }) rc = rc.union(rc1) assert.EqualValues(t, 8, rc.cardinality()) it := rc.newRunIterator16() assert.EqualValues(t, 0, it.next()) assert.EqualValues(t, 2, it.next()) assert.EqualValues(t, 4, it.next()) assert.EqualValues(t, 6, it.next()) assert.EqualValues(t, 7, it.next()) assert.EqualValues(t, 10, it.next()) assert.EqualValues(t, 11, it.next()) assert.EqualValues(t, MaxUint16, it.next()) assert.False(t, it.hasNext()) newInterval16Range(0, MaxUint16) rc2 := newRunContainer16TakeOwnership([]interval16{newInterval16Range(0, MaxUint16)}) rc2 = rc2.union(rc) assert.Equal(t, 1, rc2.numIntervals()) } }) } func TestRleRunReverseIterator16(t *testing.T) { t.Run("RunReverseIterator16 unit tests for next, hasNext, and peekNext should pass", func(t *testing.T) { { rc := newRunContainer16() it := rc.newRunReverseIterator16() assert.False(t, it.hasNext()) assert.Panics(t, func() { it.next() }) } { rc := newRunContainer16TakeOwnership([]interval16{newInterval16Range(0, 0)}) it := rc.newRunReverseIterator16() assert.True(t, it.hasNext()) assert.EqualValues(t, uint16(0), it.next()) assert.Panics(t, func() { it.next() }) assert.False(t, it.hasNext()) assert.Panics(t, func() { it.next() }) } { rc := newRunContainer16TakeOwnership([]interval16{newInterval16Range(4, 4)}) it := rc.newRunReverseIterator16() assert.True(t, it.hasNext()) assert.EqualValues(t, uint16(4), it.next()) assert.False(t, it.hasNext()) } { rc := newRunContainer16TakeOwnership([]interval16{newInterval16Range(MaxUint16, MaxUint16)}) it := rc.newRunReverseIterator16() assert.True(t, it.hasNext()) assert.EqualValues(t, uint16(MaxUint16), it.next()) assert.False(t, it.hasNext()) } { rc := newRunContainer16TakeOwnership([]interval16{newInterval16Range(4, 9)}) it := rc.newRunReverseIterator16() assert.True(t, it.hasNext()) for i := 9; i >= 4; i-- { assert.Equal(t, uint16(i), it.next()) if i > 4 { assert.True(t, it.hasNext()) } else if i == 4 { assert.False(t, it.hasNext()) } } assert.False(t, it.hasNext()) assert.Panics(t, func() { it.next() }) } { rc := newRunContainer16TakeOwnership([]interval16{ newInterval16Range(0, 0), newInterval16Range(2, 2), newInterval16Range(4, 4), newInterval16Range(6, 7), newInterval16Range(10, 12), newInterval16Range(MaxUint16, MaxUint16), }) it := rc.newRunReverseIterator16() assert.Equal(t, uint16(MaxUint16), it.next()) assert.Equal(t, uint16(12), it.next()) assert.Equal(t, uint16(11), it.next()) assert.Equal(t, uint16(10), it.next()) assert.Equal(t, uint16(7), it.next()) assert.Equal(t, uint16(6), it.next()) assert.Equal(t, uint16(4), it.next()) assert.Equal(t, uint16(2), it.next()) assert.Equal(t, uint16(0), it.next()) assert.Equal(t, false, it.hasNext()) assert.Panics(t, func() { it.next() }) } }) } func TestRleRunSearch16(t *testing.T) { t.Run("RunContainer16.search should respect the prior bounds we provide for efficiency of searching through a subset of the intervals", func(t *testing.T) { { vals := []uint16{0, 2, 4, 6, 8, 10, 12, 14, 16, 18, MaxUint16 - 3, MaxUint16} exAt := []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11} // expected at absent := []uint16{1, 3, 5, 7, 9, 11, 13, 15, 17, 19, MaxUint16 - 2} rc := newRunContainer16FromVals(true, vals...) assert.EqualValues(t, 12, rc.cardinality()) var where int64 var present bool for i, v := range vals { where, present, _ = rc.search(int64(v), nil) assert.True(t, present) assert.EqualValues(t, exAt[i], where) } for i, v := range absent { where, present, _ = rc.search(int64(v), nil) assert.False(t, present) assert.EqualValues(t, i, where) } // delete the MaxUint16 so we can test // the behavior when searching near upper limit. assert.EqualValues(t, 12, rc.cardinality()) assert.Equal(t, 12, rc.numIntervals()) rc.removeKey(MaxUint16) assert.EqualValues(t, 11, rc.cardinality()) assert.Equal(t, 11, rc.numIntervals()) where, present, _ = rc.search(MaxUint16, nil) assert.False(t, present) assert.EqualValues(t, 10, where) var numCompares int where, present, numCompares = rc.search(MaxUint16, nil) assert.False(t, present) assert.EqualValues(t, 10, where) assert.EqualValues(t, 3, numCompares) opts := &searchOptions{ startIndex: 5, } where, present, numCompares = rc.search(MaxUint16, opts) assert.False(t, present) assert.EqualValues(t, 10, where) assert.EqualValues(t, 2, numCompares) where, present, _ = rc.search(MaxUint16-3, opts) assert.True(t, present) assert.EqualValues(t, 10, where) // with the bound in place, MaxUint16-3 should not be found opts.endxIndex = 10 where, present, _ = rc.search(MaxUint16-3, opts) assert.False(t, present) assert.EqualValues(t, 9, where) } }) } func TestRleIntersection16(t *testing.T) { t.Run("RunContainer16.intersect of two RunContainer16(s) should return their intersection", func(t *testing.T) { { vals := []uint16{0, 2, 4, 6, 8, 10, 12, 14, 16, 18, MaxUint16 - 3, MaxUint16 - 1} a := newRunContainer16FromVals(true, vals[:5]...) b := newRunContainer16FromVals(true, vals[2:]...) assert.True(t, haveOverlap16(newInterval16Range(0, 2), newInterval16Range(2, 2))) assert.False(t, haveOverlap16(newInterval16Range(0, 2), newInterval16Range(3, 3))) isect := a.intersect(b) assert.EqualValues(t, 3, isect.cardinality()) assert.True(t, isect.contains(4)) assert.True(t, isect.contains(6)) assert.True(t, isect.contains(8)) newInterval16Range(0, MaxUint16) d := newRunContainer16TakeOwnership([]interval16{newInterval16Range(0, MaxUint16)}) isect = isect.intersect(d) assert.EqualValues(t, 3, isect.cardinality()) assert.True(t, isect.contains(4)) assert.True(t, isect.contains(6)) assert.True(t, isect.contains(8)) e := newRunContainer16TakeOwnership( []interval16{ newInterval16Range(2, 4), newInterval16Range(8, 9), newInterval16Range(14, 16), newInterval16Range(20, 22)}, ) f := newRunContainer16TakeOwnership( []interval16{ newInterval16Range(3, 18), newInterval16Range(22, 23)}, ) { isect = e.intersect(f) assert.EqualValues(t, 8, isect.cardinality()) assert.True(t, isect.contains(3)) assert.True(t, isect.contains(4)) assert.True(t, isect.contains(8)) assert.True(t, isect.contains(9)) assert.True(t, isect.contains(14)) assert.True(t, isect.contains(15)) assert.True(t, isect.contains(16)) assert.True(t, isect.contains(22)) } { // check for symmetry isect = f.intersect(e) assert.EqualValues(t, 8, isect.cardinality()) assert.True(t, isect.contains(3)) assert.True(t, isect.contains(4)) assert.True(t, isect.contains(8)) assert.True(t, isect.contains(9)) assert.True(t, isect.contains(14)) assert.True(t, isect.contains(15)) assert.True(t, isect.contains(16)) assert.True(t, isect.contains(22)) } } }) } func TestRleRandomIntersection16(t *testing.T) { t.Run("RunContainer.intersect of two RunContainers should return their intersection, and this should hold over randomized container content when compared to intersection done with hash maps", func(t *testing.T) { seed := int64(42) rand.Seed(seed) trials := []trial{ {n: 100, percentFill: .80, ntrial: 10}, {n: 1000, percentFill: .20, ntrial: 20}, {n: 10000, percentFill: .01, ntrial: 10}, {n: 1000, percentFill: .99, ntrial: 10}, } tester := func(tr trial) { for j := 0; j < tr.ntrial; j++ { ma := make(map[int]bool) mb := make(map[int]bool) n := tr.n a := []uint16{} b := []uint16{} var first, second int draw := int(float64(n) * tr.percentFill) for i := 0; i < draw; i++ { r0 := rand.Intn(n) a = append(a, uint16(r0)) ma[r0] = true if i == 0 { first = r0 second = r0 + 1 a = append(a, uint16(second)) ma[second] = true } r1 := rand.Intn(n) b = append(b, uint16(r1)) mb[r1] = true } // print a; very likely it has dups sort.Sort(uint16Slice(a)) stringA := "" for i := range a { stringA += fmt.Sprintf("%v, ", a[i]) } // hash version of intersect: hashi := make(map[int]bool) for k := range ma { if mb[k] { hashi[k] = true } } // RunContainer's Intersect brle := newRunContainer16FromVals(false, b...) //arle := newRunContainer16FromVals(false, a...) // instead of the above line, create from array // get better test coverage: arr := newArrayContainerRange(int(first), int(second)) arle := newRunContainer16FromArray(arr) arle.set(false, a...) isect := arle.intersect(brle) //showHash("hashi", hashi) for k := range hashi { assert.True(t, isect.contains(uint16(k))) } assert.EqualValues(t, len(hashi), isect.cardinality()) } } for i := range trials { tester(trials[i]) } }) } func TestRleRandomUnion16(t *testing.T) { t.Run("RunContainer.union of two RunContainers should return their union, and this should hold over randomized container content when compared to union done with hash maps", func(t *testing.T) { seed := int64(42) rand.Seed(seed) trials := []trial{ {n: 100, percentFill: .80, ntrial: 10}, {n: 1000, percentFill: .20, ntrial: 20}, {n: 10000, percentFill: .01, ntrial: 10}, {n: 1000, percentFill: .99, ntrial: 10, percentDelete: .04}, } tester := func(tr trial) { for j := 0; j < tr.ntrial; j++ { ma := make(map[int]bool) mb := make(map[int]bool) n := tr.n a := []uint16{} b := []uint16{} draw := int(float64(n) * tr.percentFill) numDel := int(float64(n) * tr.percentDelete) for i := 0; i < draw; i++ { r0 := rand.Intn(n) a = append(a, uint16(r0)) ma[r0] = true r1 := rand.Intn(n) b = append(b, uint16(r1)) mb[r1] = true } // hash version of union: hashu := make(map[int]bool) for k := range ma { hashu[k] = true } for k := range mb { hashu[k] = true } //showHash("hashu", hashu) // RunContainer's Union arle := newRunContainer16() for i := range a { arle.Add(a[i]) } brle := newRunContainer16() brle.set(false, b...) union := arle.union(brle) un := union.AsSlice() sort.Sort(uint16Slice(un)) for kk, v := range un { _ = kk assert.True(t, hashu[int(v)]) } for k := range hashu { assert.True(t, union.contains(uint16(k))) } assert.EqualValues(t, len(hashu), union.cardinality()) // do the deletes, exercising the remove functionality for i := 0; i < numDel; i++ { r1 := rand.Intn(len(a)) goner := a[r1] union.removeKey(goner) delete(hashu, int(goner)) } // verify the same as in the hashu assert.EqualValues(t, len(hashu), union.cardinality()) for k := range hashu { assert.True(t, union.contains(uint16(k))) } } } for i := range trials { tester(trials[i]) } }) } func TestRleAndOrXor16(t *testing.T) { t.Run("RunContainer And, Or, Xor tests", func(t *testing.T) { { rc := newRunContainer16TakeOwnership([]interval16{ newInterval16Range(0, 0), newInterval16Range(2, 2), newInterval16Range(4, 4), }) b0 := NewBitmap() b0.Add(2) b0.Add(6) b0.Add(8) and := rc.And(b0) or := rc.Or(b0) xor := rc.Xor(b0) assert.EqualValues(t, 1, and.GetCardinality()) assert.EqualValues(t, 5, or.GetCardinality()) assert.EqualValues(t, 4, xor.GetCardinality()) // test creating size 0 and 1 from array arr := newArrayContainerCapacity(0) empty := newRunContainer16FromArray(arr) onceler := newArrayContainerCapacity(1) onceler.content = append(onceler.content, uint16(0)) oneZero := newRunContainer16FromArray(onceler) assert.EqualValues(t, 0, empty.cardinality()) assert.EqualValues(t, 1, oneZero.cardinality()) assert.EqualValues(t, 0, empty.And(b0).GetCardinality()) assert.EqualValues(t, 3, empty.Or(b0).GetCardinality()) // exercise newRunContainer16FromVals() with 0 and 1 inputs. empty2 := newRunContainer16FromVals(false, []uint16{}...) assert.EqualValues(t, 0, empty2.cardinality()) one2 := newRunContainer16FromVals(false, []uint16{1}...) assert.EqualValues(t, 1, one2.cardinality()) } }) } func TestRlePanics16(t *testing.T) { t.Run("Some RunContainer calls/methods should panic if misused", func(t *testing.T) { // newRunContainer16FromVals assert.Panics(t, func() { newRunContainer16FromVals(true, 1, 0) }) arr := newArrayContainerRange(1, 3) arr.content = []uint16{2, 3, 3, 2, 1} assert.Panics(t, func() { newRunContainer16FromArray(arr) }) }) } func TestRleCoverageOddsAndEnds16(t *testing.T) { t.Run("Some RunContainer code paths that don't otherwise get coverage -- these should be tested to increase percentage of code coverage in testing", func(t *testing.T) { rc := &runContainer16{} assert.Equal(t, "runContainer16{}", rc.String()) rc.iv = make([]interval16, 1) rc.iv[0] = newInterval16Range(3, 4) assert.Equal(t, "runContainer16{0:[3, 4], }", rc.String()) a := newInterval16Range(5, 9) b := newInterval16Range(0, 1) c := newInterval16Range(1, 2) // intersectInterval16s(a, b interval16) isect, isEmpty := intersectInterval16s(a, b) assert.True(t, isEmpty) // [0,0] can't be trusted: assert.Equal(t, 0, isect.runlen()) isect, isEmpty = intersectInterval16s(b, c) assert.False(t, isEmpty) assert.EqualValues(t, 1, isect.runlen()) // runContainer16.union { ra := newRunContainer16FromVals(false, 4, 5) rb := newRunContainer16FromVals(false, 4, 6, 8, 9, 10) ra.union(rb) assert.EqualValues(t, 2, rb.indexOfIntervalAtOrAfter(4, 2)) assert.EqualValues(t, 2, rb.indexOfIntervalAtOrAfter(3, 2)) } // runContainer.intersect { ra := newRunContainer16() rb := newRunContainer16() assert.EqualValues(t, 0, ra.intersect(rb).cardinality()) } { ra := newRunContainer16FromVals(false, 1) rb := newRunContainer16FromVals(false, 4) assert.EqualValues(t, 0, ra.intersect(rb).cardinality()) } // runContainer.Add { ra := newRunContainer16FromVals(false, 1) rb := newRunContainer16FromVals(false, 4) assert.EqualValues(t, 1, ra.cardinality()) assert.EqualValues(t, 1, rb.cardinality()) ra.Add(5) assert.EqualValues(t, 2, ra.cardinality()) // newRunIterator16() empty := newRunContainer16() it := empty.newRunIterator16() assert.Panics(t, func() { it.next() }) it2 := ra.newRunIterator16() it2.curIndex = int64(len(it2.rc.iv)) assert.Panics(t, func() { it2.next() }) // runIterator16.peekNext() emptyIt := empty.newRunIterator16() assert.Panics(t, func() { emptyIt.peekNext() }) // newRunContainer16FromArray arr := newArrayContainerRange(1, 6) arr.content = []uint16{5, 5, 5, 6, 9} rc3 := newRunContainer16FromArray(arr) assert.EqualValues(t, 3, rc3.cardinality()) // runContainer16SerializedSizeInBytes // runContainer16.SerializedSizeInBytes _ = runContainer16SerializedSizeInBytes(3) _ = rc3.serializedSizeInBytes() // findNextIntervalThatIntersectsStartingFrom idx, _ := rc3.findNextIntervalThatIntersectsStartingFrom(0, 100) assert.EqualValues(t, 1, idx) // deleteAt / remove rc3.Add(10) rc3.removeKey(10) rc3.removeKey(9) assert.EqualValues(t, 2, rc3.cardinality()) rc3.Add(9) rc3.Add(10) rc3.Add(12) assert.EqualValues(t, 5, rc3.cardinality()) it3 := rc3.newRunIterator16() it3.next() it3.next() it3.next() it3.next() assert.EqualValues(t, 12, it3.peekNext()) assert.EqualValues(t, 12, it3.next()) } // runContainer16.equals { rc16 := newRunContainer16() assert.True(t, rc16.equals16(rc16)) rc16b := newRunContainer16() assert.True(t, rc16.equals16(rc16b)) rc16.Add(1) rc16b.Add(2) assert.False(t, rc16.equals16(rc16b)) } }) } func TestRleStoringMax16(t *testing.T) { t.Run("Storing the MaxUint16 should be possible, because it may be necessary to do so--users will assume that any valid uint16 should be storable. In particular the smaller 16-bit version will definitely expect full access to all bits.", func(t *testing.T) { rc := newRunContainer16() rc.Add(MaxUint16) assert.True(t, rc.contains(MaxUint16)) assert.EqualValues(t, 1, rc.cardinality()) rc.removeKey(MaxUint16) assert.False(t, rc.contains(MaxUint16)) assert.EqualValues(t, 0, rc.cardinality()) rc.set(false, MaxUint16-1, MaxUint16) assert.EqualValues(t, 2, rc.cardinality()) assert.True(t, rc.contains(MaxUint16-1)) assert.True(t, rc.contains(MaxUint16)) rc.removeKey(MaxUint16 - 1) assert.EqualValues(t, 1, rc.cardinality()) rc.removeKey(MaxUint16) assert.EqualValues(t, 0, rc.cardinality()) rc.set(false, MaxUint16-2, MaxUint16-1, MaxUint16) assert.EqualValues(t, 3, rc.cardinality()) assert.EqualValues(t, 1, rc.numIntervals()) rc.removeKey(MaxUint16 - 1) assert.EqualValues(t, 2, rc.numIntervals()) assert.EqualValues(t, 2, rc.cardinality()) }) } // go test -bench BenchmarkFromBitmap -run - func BenchmarkFromBitmap16(b *testing.B) { b.StopTimer() seed := int64(42) rand.Seed(seed) tr := trial{n: 10000, percentFill: .95, ntrial: 1, numRandomOpsPass: 100} _, _, bc := getRandomSameThreeContainers(tr) b.StartTimer() for j := 0; j < b.N; j++ { newRunContainer16FromBitmapContainer(bc) } } func TestRle16RandomIntersectAgainstOtherContainers010(t *testing.T) { t.Run("runContainer16 `and` operation against other container types should correctly do the intersection", func(t *testing.T) { seed := int64(42) rand.Seed(seed) trials := []trial{ {n: 100, percentFill: .95, ntrial: 1}, } tester := func(tr trial) { for j := 0; j < tr.ntrial; j++ { ma := make(map[int]bool) mb := make(map[int]bool) n := tr.n a := []uint16{} b := []uint16{} draw := int(float64(n) * tr.percentFill) for i := 0; i < draw; i++ { r0 := rand.Intn(n) a = append(a, uint16(r0)) ma[r0] = true r1 := rand.Intn(n) b = append(b, uint16(r1)) mb[r1] = true } //showArray16(a, "a") //showArray16(b, "b") // hash version of intersect: hashi := make(map[int]bool) for k := range ma { if mb[k] { hashi[k] = true } } // RunContainer's Intersect rc := newRunContainer16FromVals(false, a...) // vs bitmapContainer bc := newBitmapContainer() for _, bv := range b { bc.iadd(bv) } // vs arrayContainer ac := newArrayContainer() for _, bv := range b { ac.iadd(bv) } // vs runContainer rcb := newRunContainer16FromVals(false, b...) rcVsBcIsect := rc.and(bc) rcVsAcIsect := rc.and(ac) rcVsRcbIsect := rc.and(rcb) for k := range hashi { assert.True(t, rcVsBcIsect.contains(uint16(k))) assert.True(t, rcVsAcIsect.contains(uint16(k))) assert.True(t, rcVsRcbIsect.contains(uint16(k))) } assert.Equal(t, len(hashi), rcVsBcIsect.getCardinality()) assert.Equal(t, len(hashi), rcVsAcIsect.getCardinality()) assert.Equal(t, len(hashi), rcVsRcbIsect.getCardinality()) } } for i := range trials { tester(trials[i]) } }) } func TestRle16RandomUnionAgainstOtherContainers011(t *testing.T) { t.Run("runContainer16 `or` operation against other container types should correctly do the intersection", func(t *testing.T) { seed := int64(42) rand.Seed(seed) trials := []trial{ {n: 100, percentFill: .95, ntrial: 1}, } tester := func(tr trial) { for j := 0; j < tr.ntrial; j++ { ma := make(map[int]bool) mb := make(map[int]bool) n := tr.n a := []uint16{} b := []uint16{} draw := int(float64(n) * tr.percentFill) for i := 0; i < draw; i++ { r0 := rand.Intn(n) a = append(a, uint16(r0)) ma[r0] = true r1 := rand.Intn(n) b = append(b, uint16(r1)) mb[r1] = true } //showArray16(a, "a") //showArray16(b, "b") // hash version of union hashi := make(map[int]bool) for k := range ma { hashi[k] = true } for k := range mb { hashi[k] = true } // RunContainer's 'or' rc := newRunContainer16FromVals(false, a...) // vs bitmapContainer bc := newBitmapContainer() for _, bv := range b { bc.iadd(bv) } // vs arrayContainer ac := newArrayContainer() for _, bv := range b { ac.iadd(bv) } // vs runContainer rcb := newRunContainer16FromVals(false, b...) rcVsBcUnion := rc.or(bc) rcVsAcUnion := rc.or(ac) rcVsRcbUnion := rc.or(rcb) for k := range hashi { assert.True(t, rcVsBcUnion.contains(uint16(k))) assert.True(t, rcVsAcUnion.contains(uint16(k))) assert.True(t, rcVsRcbUnion.contains(uint16(k))) } assert.Equal(t, len(hashi), rcVsBcUnion.getCardinality()) assert.Equal(t, len(hashi), rcVsAcUnion.getCardinality()) assert.Equal(t, len(hashi), rcVsRcbUnion.getCardinality()) } } for i := range trials { tester(trials[i]) } }) } func TestRle16RandomInplaceUnionAgainstOtherContainers012(t *testing.T) { t.Run("runContainer16 `ior` inplace union operation against other container types should correctly do the intersection", func(t *testing.T) { seed := int64(42) rand.Seed(seed) trials := []trial{ {n: 10, percentFill: .95, ntrial: 1}, } tester := func(tr trial) { for j := 0; j < tr.ntrial; j++ { ma := make(map[int]bool) mb := make(map[int]bool) n := tr.n a := []uint16{} b := []uint16{} draw := int(float64(n) * tr.percentFill) for i := 0; i < draw; i++ { r0 := rand.Intn(n) a = append(a, uint16(r0)) ma[r0] = true r1 := rand.Intn(n) b = append(b, uint16(r1)) mb[r1] = true } //showArray16(a, "a") //showArray16(b, "b") // hash version of union hashi := make(map[int]bool) for k := range ma { hashi[k] = true } for k := range mb { hashi[k] = true } // RunContainer's 'or' rc := newRunContainer16FromVals(false, a...) rcVsBcUnion := rc.Clone() rcVsAcUnion := rc.Clone() rcVsRcbUnion := rc.Clone() // vs bitmapContainer bc := newBitmapContainer() for _, bv := range b { bc.iadd(bv) } // vs arrayContainer ac := newArrayContainer() for _, bv := range b { ac.iadd(bv) } // vs runContainer rcb := newRunContainer16FromVals(false, b...) rcVsBcUnion.ior(bc) rcVsAcUnion.ior(ac) rcVsRcbUnion.ior(rcb) for k := range hashi { assert.True(t, rcVsBcUnion.contains(uint16(k))) assert.True(t, rcVsAcUnion.contains(uint16(k))) assert.True(t, rcVsRcbUnion.contains(uint16(k))) } assert.Equal(t, len(hashi), rcVsBcUnion.getCardinality()) assert.Equal(t, len(hashi), rcVsAcUnion.getCardinality()) assert.Equal(t, len(hashi), rcVsRcbUnion.getCardinality()) } } for i := range trials { tester(trials[i]) } }) } func TestRle16RandomInplaceIntersectAgainstOtherContainers014(t *testing.T) { t.Run("runContainer16 `iand` inplace-and operation against other container types should correctly do the intersection", func(t *testing.T) { seed := int64(42) rand.Seed(seed) trials := []trial{ {n: 100, percentFill: .95, ntrial: 1}, } tester := func(tr trial) { for j := 0; j < tr.ntrial; j++ { ma := make(map[int]bool) mb := make(map[int]bool) n := tr.n a := []uint16{} b := []uint16{} draw := int(float64(n) * tr.percentFill) for i := 0; i < draw; i++ { r0 := rand.Intn(n) a = append(a, uint16(r0)) ma[r0] = true r1 := rand.Intn(n) b = append(b, uint16(r1)) mb[r1] = true } //showArray16(a, "a") //showArray16(b, "b") // hash version of intersect: hashi := make(map[int]bool) for k := range ma { if mb[k] { hashi[k] = true } } // RunContainer's Intersect rc := newRunContainer16FromVals(false, a...) // vs bitmapContainer bc := newBitmapContainer() for _, bv := range b { bc.iadd(bv) } // vs arrayContainer ac := newArrayContainer() for _, bv := range b { ac.iadd(bv) } // vs runContainer rcb := newRunContainer16FromVals(false, b...) var rcVsBcIsect container = rc.Clone() var rcVsAcIsect container = rc.Clone() var rcVsRcbIsect container = rc.Clone() rcVsBcIsect = rcVsBcIsect.iand(bc) rcVsAcIsect = rcVsAcIsect.iand(ac) rcVsRcbIsect = rcVsRcbIsect.iand(rcb) for k := range hashi { assert.True(t, rcVsBcIsect.contains(uint16(k))) assert.True(t, rcVsAcIsect.contains(uint16(k))) assert.True(t, rcVsRcbIsect.contains(uint16(k))) } assert.Equal(t, len(hashi), rcVsBcIsect.getCardinality()) assert.Equal(t, len(hashi), rcVsAcIsect.getCardinality()) assert.Equal(t, len(hashi), rcVsRcbIsect.getCardinality()) } } for i := range trials { tester(trials[i]) } }) } func TestRle16RemoveApi015(t *testing.T) { t.Run("runContainer16 `remove` (a minus b) should work", func(t *testing.T) { seed := int64(42) rand.Seed(seed) trials := []trial{ {n: 100, percentFill: .95, ntrial: 1}, } tester := func(tr trial) { for j := 0; j < tr.ntrial; j++ { ma := make(map[int]bool) mb := make(map[int]bool) n := tr.n a := []uint16{} b := []uint16{} draw := int(float64(n) * tr.percentFill) for i := 0; i < draw; i++ { r0 := rand.Intn(n) a = append(a, uint16(r0)) ma[r0] = true r1 := rand.Intn(n) b = append(b, uint16(r1)) mb[r1] = true } //showArray16(a, "a") //showArray16(b, "b") // hash version of remove: hashrm := make(map[int]bool) for k := range ma { hashrm[k] = true } for k := range mb { delete(hashrm, k) } // RunContainer's remove rc := newRunContainer16FromVals(false, a...) for k := range mb { rc.iremove(uint16(k)) } for k := range hashrm { assert.True(t, rc.contains(uint16(k))) } assert.Equal(t, len(hashrm), rc.getCardinality()) } } for i := range trials { tester(trials[i]) } }) } func showArray16(a []uint16, name string) { sort.Sort(uint16Slice(a)) stringA := "" for i := range a { stringA += fmt.Sprintf("%v, ", a[i]) } } func TestRle16RandomAndNot016(t *testing.T) { t.Run("runContainer16 `andNot` operation against other container types should correctly do the and-not operation", func(t *testing.T) { seed := int64(42) rand.Seed(seed) trials := []trial{ {n: 1000, percentFill: .95, ntrial: 2}, } tester := func(tr trial) { for j := 0; j < tr.ntrial; j++ { ma := make(map[int]bool) mb := make(map[int]bool) n := tr.n a := []uint16{} b := []uint16{} draw := int(float64(n) * tr.percentFill) for i := 0; i < draw; i++ { r0 := rand.Intn(n) a = append(a, uint16(r0)) ma[r0] = true r1 := rand.Intn(n) b = append(b, uint16(r1)) mb[r1] = true } //showArray16(a, "a") //showArray16(b, "b") // hash version of and-not hashi := make(map[int]bool) for k := range ma { hashi[k] = true } for k := range mb { delete(hashi, k) } // RunContainer's and-not rc := newRunContainer16FromVals(false, a...) // vs bitmapContainer bc := newBitmapContainer() for _, bv := range b { bc.iadd(bv) } // vs arrayContainer ac := newArrayContainer() for _, bv := range b { ac.iadd(bv) } // vs runContainer rcb := newRunContainer16FromVals(false, b...) rcVsBcAndnot := rc.andNot(bc) rcVsAcAndnot := rc.andNot(ac) rcVsRcbAndnot := rc.andNot(rcb) for k := range hashi { assert.True(t, rcVsBcAndnot.contains(uint16(k))) assert.True(t, rcVsAcAndnot.contains(uint16(k))) assert.True(t, rcVsRcbAndnot.contains(uint16(k))) } assert.Equal(t, len(hashi), rcVsBcAndnot.getCardinality()) assert.Equal(t, len(hashi), rcVsAcAndnot.getCardinality()) assert.Equal(t, len(hashi), rcVsRcbAndnot.getCardinality()) } } for i := range trials { tester(trials[i]) } }) } func TestRle16RandomInplaceAndNot017(t *testing.T) { t.Run("runContainer16 `iandNot` operation against other container types should correctly do the inplace-and-not operation", func(t *testing.T) { seed := int64(42) rand.Seed(seed) trials := []trial{ {n: 1000, percentFill: .95, ntrial: 2}, } tester := func(tr trial) { for j := 0; j < tr.ntrial; j++ { ma := make(map[int]bool) mb := make(map[int]bool) n := tr.n a := []uint16{} b := []uint16{} draw := int(float64(n) * tr.percentFill) for i := 0; i < draw; i++ { r0 := rand.Intn(n) a = append(a, uint16(r0)) ma[r0] = true r1 := rand.Intn(n) b = append(b, uint16(r1)) mb[r1] = true } //showArray16(a, "a") //showArray16(b, "b") // hash version of and-not hashi := make(map[int]bool) for k := range ma { hashi[k] = true } for k := range mb { delete(hashi, k) } // RunContainer's and-not rc := newRunContainer16FromVals(false, a...) // vs bitmapContainer bc := newBitmapContainer() for _, bv := range b { bc.iadd(bv) } // vs arrayContainer ac := newArrayContainer() for _, bv := range b { ac.iadd(bv) } // vs runContainer rcb := newRunContainer16FromVals(false, b...) rcVsBcIandnot := rc.Clone() rcVsAcIandnot := rc.Clone() rcVsRcbIandnot := rc.Clone() rcVsBcIandnot.iandNot(bc) rcVsAcIandnot.iandNot(ac) rcVsRcbIandnot.iandNot(rcb) for k := range hashi { assert.True(t, rcVsBcIandnot.contains(uint16(k))) assert.True(t, rcVsAcIandnot.contains(uint16(k))) assert.True(t, rcVsRcbIandnot.contains(uint16(k))) } assert.Equal(t, len(hashi), rcVsBcIandnot.getCardinality()) assert.Equal(t, len(hashi), rcVsAcIandnot.getCardinality()) assert.Equal(t, len(hashi), rcVsRcbIandnot.getCardinality()) } } for i := range trials { tester(trials[i]) } }) } func TestRle16InversionOfIntervals018(t *testing.T) { t.Run("runContainer `invert` operation should do a NOT on the set of intervals, in-place", func(t *testing.T) { seed := int64(42) rand.Seed(seed) trials := []trial{ {n: 1000, percentFill: .90, ntrial: 1}, } tester := func(tr trial) { for j := 0; j < tr.ntrial; j++ { ma := make(map[int]bool) hashNotA := make(map[int]bool) n := tr.n a := []uint16{} // hashNotA will be NOT ma //for i := 0; i < n; i++ { for i := 0; i < MaxUint16+1; i++ { hashNotA[i] = true } draw := int(float64(n) * tr.percentFill) for i := 0; i < draw; i++ { r0 := rand.Intn(n) a = append(a, uint16(r0)) ma[r0] = true delete(hashNotA, r0) } // RunContainer's invert rc := newRunContainer16FromVals(false, a...) inv := rc.invert() assert.Equal(t, 1+MaxUint16-rc.cardinality(), inv.cardinality()) for k := 0; k < n; k++ { if hashNotA[k] { assert.True(t, inv.contains(uint16(k))) } } // skip for now, too big to do 2^16-1 assert.Equal(t, len(hashNotA), inv.getCardinality()) } } for i := range trials { tester(trials[i]) } }) } func TestRle16SubtractionOfIntervals019(t *testing.T) { t.Run("runContainer `subtract` operation removes an interval in-place", func(t *testing.T) { // basics i22 := newInterval16Range(2, 2) left, _ := i22.subtractInterval(i22) assert.EqualValues(t, 0, len(left)) v := newInterval16Range(1, 6) left, _ = v.subtractInterval(newInterval16Range(3, 4)) assert.EqualValues(t, 2, len(left)) assert.EqualValues(t, 1, left[0].start) assert.EqualValues(t, 2, left[0].last()) assert.EqualValues(t, 5, left[1].start) assert.EqualValues(t, 6, left[1].last()) v = newInterval16Range(1, 6) left, _ = v.subtractInterval(newInterval16Range(4, 10)) assert.EqualValues(t, 1, len(left)) assert.EqualValues(t, 1, left[0].start) assert.EqualValues(t, 3, left[0].last()) v = newInterval16Range(5, 10) left, _ = v.subtractInterval(newInterval16Range(0, 7)) assert.EqualValues(t, 1, len(left)) assert.EqualValues(t, 8, left[0].start) assert.EqualValues(t, 10, left[0].last()) seed := int64(42) rand.Seed(seed) trials := []trial{ {n: 1000, percentFill: .90, ntrial: 1}, } tester := func(tr trial) { for j := 0; j < tr.ntrial; j++ { ma := make(map[int]bool) mb := make(map[int]bool) n := tr.n a := []uint16{} b := []uint16{} // hashAminusB will be ma - mb hashAminusB := make(map[int]bool) draw := int(float64(n) * tr.percentFill) for i := 0; i < draw; i++ { r0 := rand.Intn(n) a = append(a, uint16(r0)) ma[r0] = true hashAminusB[r0] = true r1 := rand.Intn(n) b = append(b, uint16(r1)) mb[r1] = true } for k := range mb { delete(hashAminusB, k) } // RunContainer's subtract A - B rc := newRunContainer16FromVals(false, a...) rcb := newRunContainer16FromVals(false, b...) abkup := rc.Clone() it := rcb.newRunIterator16() for it.hasNext() { nx := it.next() rc.isubtract(newInterval16Range(nx, nx)) } // also check full interval subtraction for _, p := range rcb.iv { abkup.isubtract(p) } for k := range hashAminusB { assert.True(t, rc.contains(uint16(k))) assert.True(t, abkup.contains(uint16(k))) } assert.EqualValues(t, len(hashAminusB), rc.getCardinality()) assert.EqualValues(t, len(hashAminusB), abkup.getCardinality()) } } for i := range trials { tester(trials[i]) } }) } func TestRle16Rank020(t *testing.T) { v := container(newRunContainer16()) v = v.iaddReturnMinimized(10) v = v.iaddReturnMinimized(100) v = v.iaddReturnMinimized(1000) if v.getCardinality() != 3 { t.Errorf("Bogus cardinality.") } for i := 0; i <= arrayDefaultMaxSize; i++ { thisrank := v.rank(uint16(i)) if i < 10 { if thisrank != 0 { t.Errorf("At %d should be zero but is %d ", i, thisrank) } } else if i < 100 { if thisrank != 1 { t.Errorf("At %d should be zero but is %d ", i, thisrank) } } else if i < 1000 { if thisrank != 2 { t.Errorf("At %d should be zero but is %d ", i, thisrank) } } else { if thisrank != 3 { t.Errorf("At %d should be zero but is %d ", i, thisrank) } } } } func TestRle16NotAlsoKnownAsFlipRange021(t *testing.T) { t.Run("runContainer `Not` operation should flip the bits of a range on the new returned container", func(t *testing.T) { seed := int64(42) rand.Seed(seed) trials := []trial{ {n: 100, percentFill: .8, ntrial: 2}, } tester := func(tr trial) { for j := 0; j < tr.ntrial; j++ { // what is the interval we are going to flip? ma := make(map[int]bool) flipped := make(map[int]bool) n := tr.n a := []uint16{} draw := int(float64(n) * tr.percentFill) for i := 0; i < draw; i++ { r0 := rand.Intn(n) a = append(a, uint16(r0)) ma[r0] = true flipped[r0] = true } // pick an interval to flip begin := rand.Intn(n) last := rand.Intn(n) if last < begin { begin, last = last, begin } // do the flip on the hash `flipped` for i := begin; i <= last; i++ { if flipped[i] { delete(flipped, i) } else { flipped[i] = true } } // RunContainer's Not rc := newRunContainer16FromVals(false, a...) flp := rc.Not(begin, last+1) assert.EqualValues(t, len(flipped), flp.cardinality()) for k := 0; k < n; k++ { if flipped[k] { assert.True(t, flp.contains(uint16(k))) } else { assert.False(t, flp.contains(uint16(k))) } } assert.EqualValues(t, len(flipped), flp.getCardinality()) } } for i := range trials { tester(trials[i]) } }) } func TestRleEquals022(t *testing.T) { t.Run("runContainer `equals` should accurately compare contents against other container types", func(t *testing.T) { seed := int64(42) rand.Seed(seed) trials := []trial{ {n: 100, percentFill: .2, ntrial: 10}, } tester := func(tr trial) { for j := 0; j < tr.ntrial; j++ { ma := make(map[int]bool) n := tr.n a := []uint16{} draw := int(float64(n) * tr.percentFill) for i := 0; i < draw; i++ { r0 := rand.Intn(n) a = append(a, uint16(r0)) ma[r0] = true } rc := newRunContainer16FromVals(false, a...) // make bitmap and array versions: bc := newBitmapContainer() ac := newArrayContainer() for k := range ma { ac.iadd(uint16(k)) bc.iadd(uint16(k)) } // compare equals() across all three assert.True(t, rc.equals(ac)) assert.True(t, rc.equals(bc)) assert.True(t, ac.equals(rc)) assert.True(t, ac.equals(bc)) assert.True(t, bc.equals(ac)) assert.True(t, bc.equals(rc)) // and for good measure, check against the hash assert.EqualValues(t, len(ma), rc.getCardinality()) assert.EqualValues(t, len(ma), ac.getCardinality()) assert.EqualValues(t, len(ma), bc.getCardinality()) for k := range ma { assert.True(t, rc.contains(uint16(k))) assert.True(t, ac.contains(uint16(k))) assert.True(t, bc.contains(uint16(k))) } } } for i := range trials { tester(trials[i]) } }) } func TestRleIntersects023(t *testing.T) { t.Run("runContainer `intersects` query should work against any mix of container types", func(t *testing.T) { seed := int64(42) rand.Seed(seed) trials := []trial{ {n: 10, percentFill: .293, ntrial: 1000}, } tester := func(tr trial) { for j := 0; j < tr.ntrial; j++ { ma := make(map[int]bool) mb := make(map[int]bool) n := tr.n a := []uint16{} b := []uint16{} draw := int(float64(n) * tr.percentFill) for i := 0; i < draw; i++ { r0 := rand.Intn(n) a = append(a, uint16(r0)) ma[r0] = true r1 := rand.Intn(n) b = append(b, uint16(r1)) mb[r1] = true } // determine if they intersect from the maps isect := false for k := range ma { if mb[k] { isect = true break } } rcA := newRunContainer16FromVals(false, a...) rcB := newRunContainer16FromVals(false, b...) // make bitmap and array versions: bcA := newBitmapContainer() bcB := newBitmapContainer() acA := newArrayContainer() acB := newArrayContainer() for k := range ma { acA.iadd(uint16(k)) bcA.iadd(uint16(k)) } for k := range mb { acB.iadd(uint16(k)) bcB.iadd(uint16(k)) } // compare intersects() across all three // same type assert.Equal(t, isect, rcA.intersects(rcB)) assert.Equal(t, isect, acA.intersects(acB)) assert.Equal(t, isect, bcA.intersects(bcB)) // across types assert.Equal(t, isect, rcA.intersects(acB)) assert.Equal(t, isect, rcA.intersects(bcB)) assert.Equal(t, isect, acA.intersects(rcB)) assert.Equal(t, isect, acA.intersects(bcB)) assert.Equal(t, isect, bcA.intersects(acB)) assert.Equal(t, isect, bcA.intersects(rcB)) // and swap the call pattern, so we test B intersects A as well. // same type assert.Equal(t, isect, rcB.intersects(rcA)) assert.Equal(t, isect, acB.intersects(acA)) assert.Equal(t, isect, bcB.intersects(bcA)) // across types assert.Equal(t, isect, rcB.intersects(acA)) assert.Equal(t, isect, rcB.intersects(bcA)) assert.Equal(t, isect, acB.intersects(rcA)) assert.Equal(t, isect, acB.intersects(bcA)) assert.Equal(t, isect, bcB.intersects(acA)) assert.Equal(t, isect, bcB.intersects(rcA)) } } for i := range trials { tester(trials[i]) } }) } func TestRleToEfficientContainer027(t *testing.T) { t.Run("runContainer toEfficientContainer should return equivalent containers", func(t *testing.T) { seed := int64(42) rand.Seed(seed) // 4096 or fewer integers -> array typically trials := []trial{ {n: 8000, percentFill: .01, ntrial: 10}, {n: 8000, percentFill: .99, ntrial: 10}, } tester := func(tr trial) { for j := 0; j < tr.ntrial; j++ { ma := make(map[int]bool) n := tr.n a := []uint16{} draw := int(float64(n) * tr.percentFill) for i := 0; i < draw; i++ { r0 := rand.Intn(n) a = append(a, uint16(r0)) ma[r0] = true } rc := newRunContainer16FromVals(false, a...) c := rc.toEfficientContainer() assert.True(t, rc.equals(c)) } } for i := range trials { tester(trials[i]) } }) t.Run("runContainer toEfficientContainer should return an equivalent bitmap when that is efficient", func(t *testing.T) { a := []uint16{} // odd intergers should be smallest as a bitmap for i := 0; i < MaxUint16; i++ { if i%2 == 1 { a = append(a, uint16(i)) } } rc := newRunContainer16FromVals(false, a...) c := rc.toEfficientContainer() assert.True(t, rc.equals(c)) _, isBitmapContainer := c.(*bitmapContainer) assert.True(t, isBitmapContainer) }) } func TestRle16RandomFillLeastSignificant16bits029(t *testing.T) { t.Run("runContainer16.fillLeastSignificant16bits() should fill contents as expected, matching the same function on bitmap and array containers", func(t *testing.T) { seed := int64(42) rand.Seed(seed) trials := []trial{ {n: 100, percentFill: .95, ntrial: 1}, } tester := func(tr trial) { for j := 0; j < tr.ntrial; j++ { ma := make(map[int]bool) n := tr.n a := []uint16{} draw := int(float64(n) * tr.percentFill) for i := 0; i < draw; i++ { r0 := rand.Intn(n) a = append(a, uint16(r0)) ma[r0] = true } //showArray16(a, "a") // RunContainer rc := newRunContainer16FromVals(false, a...) // vs bitmapContainer bc := newBitmapContainer() for _, av := range a { bc.iadd(av) } // vs arrayContainer ac := newArrayContainer() for _, av := range a { ac.iadd(av) } acOut := make([]uint32, n+10) bcOut := make([]uint32, n+10) rcOut := make([]uint32, n+10) pos2 := 0 // see Bitmap.ToArray() for principal use hs := uint32(43) << 16 ac.fillLeastSignificant16bits(acOut, pos2, hs) bc.fillLeastSignificant16bits(bcOut, pos2, hs) rc.fillLeastSignificant16bits(rcOut, pos2, hs) assert.EqualValues(t, acOut, rcOut) assert.EqualValues(t, bcOut, rcOut) } } for i := range trials { tester(trials[i]) } }) } func TestRle16RandomGetShortIterator030(t *testing.T) { t.Run("runContainer16.getShortIterator should traverse the contents expected, matching the traversal of the bitmap and array containers", func(t *testing.T) { seed := int64(42) rand.Seed(seed) trials := []trial{ {n: 100, percentFill: .95, ntrial: 1}, } tester := func(tr trial) { for j := 0; j < tr.ntrial; j++ { ma := make(map[int]bool) n := tr.n a := []uint16{} draw := int(float64(n) * tr.percentFill) for i := 0; i < draw; i++ { r0 := rand.Intn(n) a = append(a, uint16(r0)) ma[r0] = true } //showArray16(a, "a") // RunContainer rc := newRunContainer16FromVals(false, a...) // vs bitmapContainer bc := newBitmapContainer() for _, av := range a { bc.iadd(av) } // vs arrayContainer ac := newArrayContainer() for _, av := range a { ac.iadd(av) } rit := rc.getShortIterator() ait := ac.getShortIterator() bit := bc.getShortIterator() for ait.hasNext() { rn := rit.next() an := ait.next() bn := bit.next() assert.Equal(t, an, rn) assert.Equal(t, bn, rn) } } } for i := range trials { tester(trials[i]) } }) } func TestRle16RandomIaddRangeIremoveRange031(t *testing.T) { t.Run("runContainer16.iaddRange and iremoveRange should add/remove contents as expected, matching the same operations on the bitmap and array containers and the hashmap pos control", func(t *testing.T) { seed := int64(42) rand.Seed(seed) trials := []trial{ {n: 101, percentFill: .9, ntrial: 10}, } tester := func(tr trial) { for j := 0; j < tr.ntrial; j++ { ma := make(map[int]bool) n := tr.n a := []uint16{} draw := int(float64(n) * tr.percentFill) for i := 0; i < draw; i++ { r0 := rand.Intn(n) a = append(a, uint16(r0)) ma[r0] = true } //showArray16(a, "a") // RunContainer rc := newRunContainer16FromVals(false, a...) // vs bitmapContainer bc := newBitmapContainer() for _, av := range a { bc.iadd(av) } // vs arrayContainer ac := newArrayContainer() for _, av := range a { ac.iadd(av) } // iaddRange and iRemoveRange : pick some distinct random endpoints a0 := rand.Intn(n) a1 := a0 for a1 == a0 { a1 = rand.Intn(n) } if a0 > a1 { a0, a1 = a1, a0 } r0 := rand.Intn(n) r1 := r0 for r1 == r0 { r1 = rand.Intn(n) } if r0 > r1 { r0, r1 = r1, r0 } // do the add for i := a0; i <= a1; i++ { ma[i] = true } // then the remove for i := r0; i <= r1; i++ { delete(ma, i) } rc.iaddRange(a0, a1+1) rc.iremoveRange(r0, r1+1) bc.iaddRange(a0, a1+1) bc.iremoveRange(r0, r1+1) ac.iaddRange(a0, a1+1) ac.iremoveRange(r0, r1+1) assert.EqualValues(t, len(ma), rc.getCardinality()) assert.Equal(t, ac.getCardinality(), rc.getCardinality()) assert.Equal(t, bc.getCardinality(), rc.getCardinality()) rit := rc.getShortIterator() ait := ac.getShortIterator() bit := bc.getShortIterator() for ait.hasNext() { rn := rit.next() an := ait.next() bn := bit.next() assert.Equal(t, an, rn) assert.Equal(t, bn, rn) } // verify againt the map for k := range ma { assert.True(t, rc.contains(uint16(k))) } // coverage for run16 method assert.Equal(t, 2+4*rc.numberOfRuns(), rc.serializedSizeInBytes()) } } for i := range trials { tester(trials[i]) } }) } func TestAllContainerMethodsAllContainerTypes065(t *testing.T) { t.Run("each of the container methods that takes two containers should handle all 3x3==9 possible ways of being called -- without panic", func(t *testing.T) { a := newArrayContainer() r := newRunContainer16() b := newBitmapContainer() arr := []container{a, r, b} for _, i := range arr { for _, j := range arr { i.and(j) i.iand(j) i.andNot(j) i.iandNot(j) i.xor(j) i.equals(j) i.or(j) i.ior(j) i.intersects(j) i.lazyOR(j) i.lazyIOR(j) } } }) } type twoCall func(r container) container type twofer struct { name string call twoCall cn container } func TestAllContainerMethodsAllContainerTypesWithData067(t *testing.T) { t.Run("each of the container methods that takes two containers should handle all 3x3==9 possible ways of being called -- and return results that agree with each other", func(t *testing.T) { //rleVerbose = true seed := int64(42) rand.Seed(seed) srang := newInterval16Range(MaxUint16-100, MaxUint16) trials := []trial{ {n: 100, percentFill: .7, ntrial: 1, numRandomOpsPass: 100}, {n: 100, percentFill: .7, ntrial: 1, numRandomOpsPass: 100, srang: &srang}} tester := func(tr trial) { for j := 0; j < tr.ntrial; j++ { a, r, b := getRandomSameThreeContainers(tr) a2, r2, b2 := getRandomSameThreeContainers(tr) receiver := []container{a, r, b} arg := []container{a2, r2, b2} callme := []twofer{} nCalls := 0 for k, c := range receiver { callme = append(callme, twofer{"and", c.and, c}) callme = append(callme, twofer{"iand", c.iand, c}) callme = append(callme, twofer{"ior", c.ior, c}) callme = append(callme, twofer{"lazyOR", c.lazyOR, c}) callme = append(callme, twofer{"lazyIOR", c.lazyIOR, c}) callme = append(callme, twofer{"or", c.or, c}) callme = append(callme, twofer{"xor", c.xor, c}) callme = append(callme, twofer{"andNot", c.andNot, c}) callme = append(callme, twofer{"iandNot", c.iandNot, c}) if k == 0 { nCalls = len(callme) } } for pass := 0; pass < tr.numRandomOpsPass+1; pass++ { for k := 0; k < nCalls; k++ { perm := getRandomPermutation(nCalls) kk := perm[k] c1 := callme[kk] // array receiver c2 := callme[kk+nCalls] // run receiver c3 := callme[kk+2*nCalls] // bitmap receiver if c1.name != c2.name { panic("internal logic error") } if c3.name != c2.name { panic("internal logic error") } for k2, a := range arg { if !c1.cn.equals(c2.cn) { panic("c1 not equal to c2") } if !c1.cn.equals(c3.cn) { panic("c1 not equal to c3") } res1 := c1.call(a) // array res2 := c2.call(a) // run res3 := c3.call(a) // bitmap z := c1.name // In-place operation are best effort // User should not assume the receiver is modified, returned container has to be used if strings.HasPrefix(z, "i") { c1.cn = res1 c2.cn = res2 c3.cn = res3 } if strings.HasPrefix(z, "lazy") { // on purpose, the lazy functions // do not scan to update their cardinality if asBc, isBc := res1.(*bitmapContainer); isBc { asBc.computeCardinality() } if asBc, isBc := res2.(*bitmapContainer); isBc { asBc.computeCardinality() } if asBc, isBc := res3.(*bitmapContainer); isBc { asBc.computeCardinality() } } // check for equality all ways... // excercising equals() calls too. if !res1.equals(res2) { panic(fmt.Sprintf("k:%v, k2:%v, res1 != res2,"+ " call is '%s'", k, k2, c1.name)) } if !res2.equals(res1) { panic(fmt.Sprintf("k:%v, k2:%v, res2 != res1,"+ " call is '%s'", k, k2, c1.name)) } if !res1.equals(res3) { panic(fmt.Sprintf("k:%v, k2:%v, res1 != res3,"+ " call is '%s'", k, k2, c1.name)) } if !res3.equals(res1) { panic(fmt.Sprintf("k:%v, k2:%v, res3 != res1,"+ " call is '%s'", k, k2, c1.name)) } if !res2.equals(res3) { panic(fmt.Sprintf("k:%v, k2:%v, res2 != res3,"+ " call is '%s'", k, k2, c1.name)) } if !res3.equals(res2) { panic(fmt.Sprintf("k:%v, k2:%v, res3 != res2,"+ " call is '%s'", k, k2, c1.name)) } } } // end k } // end pass } // end j } // end tester for i := range trials { tester(trials[i]) } }) } func TestRuntimeIteratorPeekNext(t *testing.T) { testContainerIteratorPeekNext(t, newRunContainer16()) } func TestRuntimeIteratorAdvance(t *testing.T) { testContainerIteratorAdvance(t, newRunContainer16()) } // go test -bench BenchmarkShortIteratorAdvance -run - func BenchmarkShortIteratorAdvanceRuntime(b *testing.B) { benchmarkContainerIteratorAdvance(b, newRunContainer16()) } // go test -bench BenchmarkShortIteratorNext -run - func BenchmarkShortIteratorNextRuntime(b *testing.B) { benchmarkContainerIteratorNext(b, newRunContainer16()) } // generate random contents, then return that same // logical content in three different container types func getRandomSameThreeContainers(tr trial) (*arrayContainer, *runContainer16, *bitmapContainer) { ma := make(map[int]bool) n := tr.n a := []uint16{} var samp interval16 if tr.srang != nil { samp = *tr.srang } else { if n-1 > MaxUint16 { panic(fmt.Errorf("n out of range: %v", n)) } samp.start = 0 samp.length = uint16(n - 2) } draw := int(float64(n) * tr.percentFill) for i := 0; i < draw; i++ { r0 := int(samp.start) + rand.Intn(int(samp.runlen())) a = append(a, uint16(r0)) ma[r0] = true } rc := newRunContainer16FromVals(false, a...) // vs bitmapContainer bc := newBitmapContainerFromRun(rc) ac := rc.toArrayContainer() return ac, rc, bc } roaring-0.4.21/serialization.go 0000664 0000000 0000000 00000001456 13542657257 0016471 0 ustar 00root root 0000000 0000000 package roaring import ( "encoding/binary" "io" "github.com/tinylib/msgp/msgp" ) // writeTo for runContainer16 follows this // spec: https://github.com/RoaringBitmap/RoaringFormatSpec // func (b *runContainer16) writeTo(stream io.Writer) (int, error) { buf := make([]byte, 2+4*len(b.iv)) binary.LittleEndian.PutUint16(buf[0:], uint16(len(b.iv))) for i, v := range b.iv { binary.LittleEndian.PutUint16(buf[2+i*4:], v.start) binary.LittleEndian.PutUint16(buf[2+2+i*4:], v.length) } return stream.Write(buf) } func (b *runContainer16) writeToMsgpack(stream io.Writer) (int, error) { bts, err := b.MarshalMsg(nil) if err != nil { return 0, err } return stream.Write(bts) } func (b *runContainer16) readFromMsgpack(stream io.Reader) (int, error) { err := msgp.Decode(stream, b) return 0, err } roaring-0.4.21/serialization_generic.go 0000664 0000000 0000000 00000005614 13542657257 0020165 0 ustar 00root root 0000000 0000000 // +build !amd64,!386 appengine package roaring import ( "encoding/binary" "errors" "io" ) func (b *arrayContainer) writeTo(stream io.Writer) (int, error) { buf := make([]byte, 2*len(b.content)) for i, v := range b.content { base := i * 2 buf[base] = byte(v) buf[base+1] = byte(v >> 8) } return stream.Write(buf) } func (b *arrayContainer) readFrom(stream io.Reader) (int, error) { err := binary.Read(stream, binary.LittleEndian, b.content) if err != nil { return 0, err } return 2 * len(b.content), nil } func (b *bitmapContainer) writeTo(stream io.Writer) (int, error) { if b.cardinality <= arrayDefaultMaxSize { return 0, errors.New("refusing to write bitmap container with cardinality of array container") } // Write set buf := make([]byte, 8*len(b.bitmap)) for i, v := range b.bitmap { base := i * 8 buf[base] = byte(v) buf[base+1] = byte(v >> 8) buf[base+2] = byte(v >> 16) buf[base+3] = byte(v >> 24) buf[base+4] = byte(v >> 32) buf[base+5] = byte(v >> 40) buf[base+6] = byte(v >> 48) buf[base+7] = byte(v >> 56) } return stream.Write(buf) } func (b *bitmapContainer) readFrom(stream io.Reader) (int, error) { err := binary.Read(stream, binary.LittleEndian, b.bitmap) if err != nil { return 0, err } b.computeCardinality() return 8 * len(b.bitmap), nil } func (bc *bitmapContainer) asLittleEndianByteSlice() []byte { by := make([]byte, len(bc.bitmap)*8) for i := range bc.bitmap { binary.LittleEndian.PutUint64(by[i*8:], bc.bitmap[i]) } return by } func uint64SliceAsByteSlice(slice []uint64) []byte { by := make([]byte, len(slice)*8) for i, v := range slice { binary.LittleEndian.PutUint64(by[i*8:], v) } return by } func uint16SliceAsByteSlice(slice []uint16) []byte { by := make([]byte, len(slice)*2) for i, v := range slice { binary.LittleEndian.PutUint16(by[i*2:], v) } return by } func byteSliceAsUint16Slice(slice []byte) []uint16 { if len(slice)%2 != 0 { panic("Slice size should be divisible by 2") } b := make([]uint16, len(slice)/2) for i := range b { b[i] = binary.LittleEndian.Uint16(slice[2*i:]) } return b } func byteSliceAsUint64Slice(slice []byte) []uint64 { if len(slice)%8 != 0 { panic("Slice size should be divisible by 8") } b := make([]uint64, len(slice)/8) for i := range b { b[i] = binary.LittleEndian.Uint64(slice[8*i:]) } return b } // Converts a byte slice to a interval16 slice. // The function assumes that the slice byte buffer is run container data // encoded according to Roaring Format Spec func byteSliceAsInterval16Slice(byteSlice []byte) []interval16 { if len(byteSlice)%4 != 0 { panic("Slice size should be divisible by 4") } intervalSlice := make([]interval16, len(byteSlice)/4) for i := range intervalSlice { intervalSlice[i] = interval16{ start: binary.LittleEndian.Uint16(byteSlice[i*4:]), length: binary.LittleEndian.Uint16(byteSlice[i*4+2:]), } } return intervalSlice } roaring-0.4.21/serialization_littleendian.go 0000664 0000000 0000000 00000007236 13542657257 0021227 0 ustar 00root root 0000000 0000000 // +build 386 amd64,!appengine package roaring import ( "errors" "io" "reflect" "runtime" "unsafe" ) func (ac *arrayContainer) writeTo(stream io.Writer) (int, error) { buf := uint16SliceAsByteSlice(ac.content) return stream.Write(buf) } func (bc *bitmapContainer) writeTo(stream io.Writer) (int, error) { if bc.cardinality <= arrayDefaultMaxSize { return 0, errors.New("refusing to write bitmap container with cardinality of array container") } buf := uint64SliceAsByteSlice(bc.bitmap) return stream.Write(buf) } func uint64SliceAsByteSlice(slice []uint64) []byte { // make a new slice header header := *(*reflect.SliceHeader)(unsafe.Pointer(&slice)) // update its capacity and length header.Len *= 8 header.Cap *= 8 // instantiate result and use KeepAlive so data isn't unmapped. result := *(*[]byte)(unsafe.Pointer(&header)) runtime.KeepAlive(&slice) // return it return result } func uint16SliceAsByteSlice(slice []uint16) []byte { // make a new slice header header := *(*reflect.SliceHeader)(unsafe.Pointer(&slice)) // update its capacity and length header.Len *= 2 header.Cap *= 2 // instantiate result and use KeepAlive so data isn't unmapped. result := *(*[]byte)(unsafe.Pointer(&header)) runtime.KeepAlive(&slice) // return it return result } func (bc *bitmapContainer) asLittleEndianByteSlice() []byte { return uint64SliceAsByteSlice(bc.bitmap) } // Deserialization code follows //// // These methods (byteSliceAsUint16Slice,...) do not make copies, // they are pointer-based (unsafe). The caller is responsible to // ensure that the input slice does not get garbage collected, deleted // or modified while you hold the returned slince. //// func byteSliceAsUint16Slice(slice []byte) (result []uint16) { // here we create a new slice holder if len(slice)%2 != 0 { panic("Slice size should be divisible by 2") } // reference: https://go101.org/article/unsafe.html // make a new slice header bHeader := (*reflect.SliceHeader)(unsafe.Pointer(&slice)) rHeader := (*reflect.SliceHeader)(unsafe.Pointer(&result)) // transfer the data from the given slice to a new variable (our result) rHeader.Data = bHeader.Data rHeader.Len = bHeader.Len / 2 rHeader.Cap = bHeader.Cap / 2 // instantiate result and use KeepAlive so data isn't unmapped. runtime.KeepAlive(&slice) // it is still crucial, GC can free it) // return result return } func byteSliceAsUint64Slice(slice []byte) (result []uint64) { if len(slice)%8 != 0 { panic("Slice size should be divisible by 8") } // reference: https://go101.org/article/unsafe.html // make a new slice header bHeader := (*reflect.SliceHeader)(unsafe.Pointer(&slice)) rHeader := (*reflect.SliceHeader)(unsafe.Pointer(&result)) // transfer the data from the given slice to a new variable (our result) rHeader.Data = bHeader.Data rHeader.Len = bHeader.Len / 8 rHeader.Cap = bHeader.Cap / 8 // instantiate result and use KeepAlive so data isn't unmapped. runtime.KeepAlive(&slice) // it is still crucial, GC can free it) // return result return } func byteSliceAsInterval16Slice(slice []byte) (result []interval16) { if len(slice)%4 != 0 { panic("Slice size should be divisible by 4") } // reference: https://go101.org/article/unsafe.html // make a new slice header bHeader := (*reflect.SliceHeader)(unsafe.Pointer(&slice)) rHeader := (*reflect.SliceHeader)(unsafe.Pointer(&result)) // transfer the data from the given slice to a new variable (our result) rHeader.Data = bHeader.Data rHeader.Len = bHeader.Len / 4 rHeader.Cap = bHeader.Cap / 4 // instantiate result and use KeepAlive so data isn't unmapped. runtime.KeepAlive(&slice) // it is still crucial, GC can free it) // return result return } roaring-0.4.21/serialization_test.go 0000664 0000000 0000000 00000041462 13542657257 0017531 0 ustar 00root root 0000000 0000000 package roaring // to run just these tests: go test -run TestSerialization* import ( "bytes" "encoding/binary" "encoding/gob" "fmt" "io/ioutil" "math/rand" "os" "path/filepath" "runtime" "testing" "github.com/stretchr/testify/assert" ) func TestSerializationOfEmptyBitmap(t *testing.T) { rb := NewBitmap() buf := &bytes.Buffer{} _, err := rb.WriteTo(buf) assert.NoError(t, err) assert.EqualValues(t, buf.Len(), rb.GetSerializedSizeInBytes()) newrb := NewBitmap() _, err = newrb.ReadFrom(buf) assert.NoError(t, err) assert.True(t, rb.Equals(newrb)) } func TestBase64_036(t *testing.T) { rb := BitmapOf(1, 2, 3, 4, 5, 100, 1000) bstr, _ := rb.ToBase64() assert.NotEmpty(t, bstr) newrb := NewBitmap() _, err := newrb.FromBase64(bstr) assert.NoError(t, err) assert.True(t, rb.Equals(newrb)) } func TestSerializationBasic037(t *testing.T) { rb := BitmapOf(1, 2, 3, 4, 5, 100, 1000) buf := &bytes.Buffer{} _, err := rb.WriteTo(buf) assert.NoError(t, err) assert.EqualValues(t, buf.Len(), rb.GetSerializedSizeInBytes()) newrb := NewBitmap() _, err = newrb.ReadFrom(buf) assert.NoError(t, err) assert.True(t, rb.Equals(newrb)) } func TestSerializationToFile038(t *testing.T) { rb := BitmapOf(1, 2, 3, 4, 5, 100, 1000) fname := "myfile.bin" fout, err := os.OpenFile(fname, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0660) assert.NoError(t, err) var l int64 l, err = rb.WriteTo(fout) assert.NoError(t, err) assert.EqualValues(t, l, rb.GetSerializedSizeInBytes()) fout.Close() newrb := NewBitmap() fin, err := os.Open(fname) assert.NoError(t, err) defer func() { fin.Close() assert.NoError(t, os.Remove(fname)) }() _, _ = newrb.ReadFrom(fin) assert.True(t, rb.Equals(newrb)) } func TestSerializationReadRunsFromFile039(t *testing.T) { fn := "testdata/bitmapwithruns.bin" by, err := ioutil.ReadFile(fn) assert.NoError(t, err) newrb := NewBitmap() _, err = newrb.ReadFrom(bytes.NewBuffer(by)) assert.NoError(t, err) } func TestSerializationBasic4WriteAndReadFile040(t *testing.T) { fname := "testdata/all3.classic" rb := NewBitmap() for k := uint32(0); k < 100000; k += 1000 { rb.Add(k) } for k := uint32(100000); k < 200000; k++ { rb.Add(3 * k) } for k := uint32(700000); k < 800000; k++ { rb.Add(k) } rb.highlowcontainer.runOptimize() fout, err := os.Create(fname) assert.NoError(t, err) var l int64 l, err = rb.WriteTo(fout) assert.NoError(t, err) assert.EqualValues(t, l, rb.GetSerializedSizeInBytes()) fout.Close() fin, err := os.Open(fname) assert.NoError(t, err) defer fin.Close() newrb := NewBitmap() _, err = newrb.ReadFrom(fin) assert.NoError(t, err) assert.True(t, rb.Equals(newrb)) } func TestSerializationFromJava051(t *testing.T) { fname := "testdata/bitmapwithoutruns.bin" newrb := NewBitmap() fin, err := os.Open(fname) assert.NoError(t, err) defer func() { fin.Close() }() _, _ = newrb.ReadFrom(fin) fmt.Println(newrb.GetCardinality()) rb := NewBitmap() for k := uint32(0); k < 100000; k += 1000 { rb.Add(k) } for k := uint32(100000); k < 200000; k++ { rb.Add(3 * k) } for k := uint32(700000); k < 800000; k++ { rb.Add(k) } assert.True(t, rb.Equals(newrb)) } func TestSerializationFromJavaWithRuns052(t *testing.T) { fname := "testdata/bitmapwithruns.bin" newrb := NewBitmap() fin, err := os.Open(fname) assert.NoError(t, err) defer func() { fin.Close() }() _, _ = newrb.ReadFrom(fin) rb := NewBitmap() for k := uint32(0); k < 100000; k += 1000 { rb.Add(k) } for k := uint32(100000); k < 200000; k++ { rb.Add(3 * k) } for k := uint32(700000); k < 800000; k++ { rb.Add(k) } assert.True(t, rb.Equals(newrb)) } func TestSerializationBasic2_041(t *testing.T) { rb := BitmapOf(1, 2, 3, 4, 5, 100, 1000, 10000, 100000, 1000000) buf := &bytes.Buffer{} sz := rb.GetSerializedSizeInBytes() ub := BoundSerializedSizeInBytes(rb.GetCardinality(), 1000001) assert.False(t, sz > ub+10) l := int(rb.GetSerializedSizeInBytes()) _, err := rb.WriteTo(buf) assert.NoError(t, err) assert.Equal(t, l, buf.Len()) newrb := NewBitmap() _, err = newrb.ReadFrom(buf) assert.NoError(t, err) assert.True(t, rb.Equals(newrb)) } // roaringarray.writeTo and .readFrom should serialize and unserialize when containing all 3 container types func TestSerializationBasic3_042(t *testing.T) { rb := BitmapOf(1, 2, 3, 4, 5, 100, 1000, 10000, 100000, 1000000) for i := 5000000; i < 5000000+2*(1<<16); i++ { rb.AddInt(i) } // confirm all three types present var bc, ac, rc bool for _, v := range rb.highlowcontainer.containers { switch cn := v.(type) { case *bitmapContainer: bc = true case *arrayContainer: ac = true case *runContainer16: rc = true default: panic(fmt.Errorf("Unrecognized container implementation: %T", cn)) } } assert.True(t, bc, "no bitmapContainer found, change your test input so we test all three!") assert.True(t, ac, "no arrayContainer found, change your test input so we test all three!") assert.True(t, rc, "no runContainer16 found, change your test input so we test all three!") var buf bytes.Buffer _, err := rb.WriteTo(&buf) assert.NoError(t, err) assert.EqualValues(t, buf.Len(), rb.GetSerializedSizeInBytes()) newrb := NewBitmap() _, err = newrb.ReadFrom(&buf) assert.NoError(t, err) assert.Equal(t, rb.GetCardinality(), newrb.GetCardinality()) assert.True(t, newrb.Equals(rb)) } func TestGobcoding043(t *testing.T) { rb := BitmapOf(1, 2, 3, 4, 5, 100, 1000) buf := new(bytes.Buffer) encoder := gob.NewEncoder(buf) err := encoder.Encode(rb) assert.NoError(t, err) var b Bitmap decoder := gob.NewDecoder(buf) err = decoder.Decode(&b) assert.NoError(t, err) assert.True(t, b.Equals(rb)) } // runContainer writeTo and readFrom should return logically equivalent containers func TestSerializationRunContainerMsgpack028(t *testing.T) { seed := int64(42) rand.Seed(seed) trials := []trial{ {n: 10, percentFill: .2, ntrial: 10}, {n: 10, percentFill: .8, ntrial: 10}, {n: 10, percentFill: .50, ntrial: 10}, } tester := func(tr trial) { for j := 0; j < tr.ntrial; j++ { ma := make(map[int]bool) n := tr.n a := []uint16{} draw := int(float64(n) * tr.percentFill) for i := 0; i < draw; i++ { r0 := rand.Intn(n) a = append(a, uint16(r0)) ma[r0] = true } orig := newRunContainer16FromVals(false, a...) // serialize var buf bytes.Buffer _, err := orig.writeToMsgpack(&buf) if err != nil { panic(err) } // deserialize restored := &runContainer16{} _, err = restored.readFromMsgpack(&buf) if err != nil { panic(err) } // and compare assert.True(t, restored.equals(orig)) } } for i := range trials { tester(trials[i]) } } //roaringarray.writeToMsgpack and .readFromMsgpack should serialize and unserialize when containing all 3 container types func TestSerializationBasicMsgpack035(t *testing.T) { rb := BitmapOf(1, 2, 3, 4, 5, 100, 1000, 10000, 100000, 1000000) for i := 5000000; i < 5000000+2*(1<<16); i++ { rb.AddInt(i) } // confirm all three types present var bc, ac, rc bool for _, v := range rb.highlowcontainer.containers { switch cn := v.(type) { case *bitmapContainer: bc = true assert.Equal(t, bitmapContype, cn.containerType()) case *arrayContainer: ac = true assert.Equal(t, arrayContype, cn.containerType()) case *runContainer16: rc = true assert.Equal(t, run16Contype, cn.containerType()) default: panic(fmt.Errorf("Unrecognized container implementation: %T", cn)) } } assert.True(t, bc, "no bitmapContainer found, change your test input so we test all three!") assert.True(t, ac, "no arrayContainer found, change your test input so we test all three!") assert.True(t, rc, "no runContainer16 found, change your test input so we test all three!") var buf bytes.Buffer _, err := rb.WriteToMsgpack(&buf) assert.NoError(t, err) newrb := NewBitmap() _, err = newrb.ReadFromMsgpack(&buf) assert.NoError(t, err) assert.Equal(t, rb.GetCardinality(), newrb.GetCardinality()) assert.True(t, newrb.Equals(rb)) } func TestByteSliceAsUint16Slice(t *testing.T) { t.Run("valid slice", func(t *testing.T) { expectedSize := 2 slice := make([]byte, 4) binary.LittleEndian.PutUint16(slice, 42) binary.LittleEndian.PutUint16(slice[2:], 43) uint16Slice := byteSliceAsUint16Slice(slice) assert.Equal(t, expectedSize, len(uint16Slice)) assert.Equal(t, expectedSize, cap(uint16Slice)) assert.False(t, uint16Slice[0] != 42 || uint16Slice[1] != 43) }) t.Run("inlined", func(t *testing.T) { first, second := singleSliceInArray() t.Logf("received %v %v", first, second[0]) if !first.Equals(second[0]) { t.Errorf("inline fail %v %v", first, second[0]) } }) t.Run("empty slice", func(t *testing.T) { slice := make([]byte, 0, 0) uint16Slice := byteSliceAsUint16Slice(slice) assert.Equal(t, 0, len(uint16Slice)) assert.Equal(t, 0, cap(uint16Slice)) }) t.Run("invalid slice size", func(t *testing.T) { slice := make([]byte, 1, 1) assert.Panics(t, func() { byteSliceAsUint16Slice(slice) }) }) } func singleSliceInArray() (*Bitmap, []*Bitmap) { firstSlice := singleSlice() containerSlice := make([]*Bitmap, 0) secondContainer := singleSlice() containerSlice = append(containerSlice, secondContainer) return firstSlice, containerSlice } func singleSlice() *Bitmap { slice := make([]byte, 2) return &Bitmap{highlowcontainer:roaringArray{keys: []uint16{0}, containers: []container{&arrayContainer{ byteSliceAsUint16Slice(slice)}}}} } func TestByteSliceAsUint64Slice(t *testing.T) { t.Run("valid slice", func(t *testing.T) { expectedSize := 2 slice := make([]byte, 16) binary.LittleEndian.PutUint64(slice, 42) binary.LittleEndian.PutUint64(slice[8:], 43) uint64Slice := byteSliceAsUint64Slice(slice) assert.Equal(t, expectedSize, len(uint64Slice)) assert.Equal(t, expectedSize, cap(uint64Slice)) assert.False(t, uint64Slice[0] != 42 || uint64Slice[1] != 43) }) t.Run("empty slice", func(t *testing.T) { slice := make([]byte, 0, 0) uint64Slice := byteSliceAsUint64Slice(slice) assert.Equal(t, 0, len(uint64Slice)) assert.Equal(t, 0, cap(uint64Slice)) }) t.Run("invalid slice size", func(t *testing.T) { slice := make([]byte, 1, 1) assert.Panics(t, func() { byteSliceAsUint64Slice(slice) }) }) } func TestByteSliceAsInterval16Slice(t *testing.T) { t.Run("valid slice", func(t *testing.T) { expectedSize := 2 slice := make([]byte, 8) binary.LittleEndian.PutUint16(slice, 10) binary.LittleEndian.PutUint16(slice[2:], 2) binary.LittleEndian.PutUint16(slice[4:], 20) binary.LittleEndian.PutUint16(slice[6:], 2) intervalSlice := byteSliceAsInterval16Slice(slice) assert.Equal(t, expectedSize, len(intervalSlice)) assert.Equal(t, expectedSize, cap(intervalSlice)) i1 := newInterval16Range(10, 12) i2 := newInterval16Range(20, 22) assert.False(t, intervalSlice[0] != i1 || intervalSlice[1] != i2) }) t.Run("empty slice", func(t *testing.T) { slice := make([]byte, 0, 0) intervalSlice := byteSliceAsInterval16Slice(slice) assert.Equal(t, 0, len(intervalSlice)) assert.Equal(t, 0, cap(intervalSlice)) }) t.Run("invalid slice length", func(t *testing.T) { slice := make([]byte, 1, 1) assert.Panics(t, func() { byteSliceAsInterval16Slice(slice) }) }) } func TestBitmap_FromBuffer(t *testing.T) { t.Run("empty bitmap", func(t *testing.T) { rb := NewBitmap() buf := &bytes.Buffer{} _, err := rb.WriteTo(buf) assert.NoError(t, err) assert.EqualValues(t, buf.Len(), rb.GetSerializedSizeInBytes()) newRb := NewBitmap() newRb.FromBuffer(buf.Bytes()) assert.NoError(t, err) assert.True(t, rb.Equals(newRb)) }) t.Run("basic bitmap of 7 elements", func(t *testing.T) { rb := BitmapOf(1, 2, 3, 4, 5, 100, 1000) buf := &bytes.Buffer{} _, err := rb.WriteTo(buf) assert.NoError(t, err) newRb := NewBitmap() _, err = newRb.FromBuffer(buf.Bytes()) assert.NoError(t, err) assert.True(t, rb.Equals(newRb)) }) t.Run("bitmap with runs", func(t *testing.T) { file := "testdata/bitmapwithruns.bin" buf, err := ioutil.ReadFile(file) assert.NoError(t, err) rb := NewBitmap() _, err = rb.FromBuffer(buf) assert.NoError(t, err) assert.EqualValues(t, 3, rb.Stats().RunContainers) assert.EqualValues(t, 11, rb.Stats().Containers) }) t.Run("bitmap without runs", func(t *testing.T) { fn := "testdata/bitmapwithruns.bin" buf, err := ioutil.ReadFile(fn) assert.NoError(t, err) rb := NewBitmap() _, err = rb.FromBuffer(buf) assert.NoError(t, err) }) // all3.classic somehow created by other tests. t.Run("all3.classic bitmap", func(t *testing.T) { file := "testdata/all3.classic" buf, err := ioutil.ReadFile(file) assert.NoError(t, err) rb := NewBitmap() _, err = rb.FromBuffer(buf) assert.NoError(t, err) }) t.Run("testdata/bitmapwithruns.bin bitmap Ops", func(t *testing.T) { file := "testdata/bitmapwithruns.bin" buf, err := ioutil.ReadFile(file) assert.NoError(t, err) empt := NewBitmap() rb1 := NewBitmap() _, err = rb1.FromBuffer(buf) assert.NoError(t, err) rb2 := NewBitmap() _, err = rb2.FromBuffer(buf) assert.NoError(t, err) rbor := Or(rb1, rb2) rbfastor := FastOr(rb1, rb2) rband := And(rb1, rb2) rbxor := Xor(rb1, rb2) rbandnot := AndNot(rb1, rb2) assert.True(t, rbor.Equals(rb1)) assert.True(t, rbfastor.Equals(rbor)) assert.True(t, rband.Equals(rb1)) assert.True(t, rbxor.Equals(empt)) assert.True(t, rbandnot.Equals(empt)) }) t.Run("marking all containers as requiring COW", func(t *testing.T) { file := "testdata/bitmapwithruns.bin" buf, err := ioutil.ReadFile(file) assert.NoError(t, err) rb := NewBitmap() _, err = rb.FromBuffer(buf) assert.NoError(t, err) for i, cow := range rb.highlowcontainer.needCopyOnWrite { assert.Truef(t, cow, "Container at pos %d was not marked as needs-copy-on-write", i) } }) } func TestSerializationCrashers(t *testing.T) { crashers, err := filepath.Glob("testdata/crash*") assert.NoError(t, err) for _, crasher := range crashers { data, err := ioutil.ReadFile(crasher) assert.NoError(t, err) // take a copy in case the stream is modified during unpacking attempt orig := make([]byte, len(data)) copy(orig, data) _, err = NewBitmap().FromBuffer(data) assert.Error(t, err) // reset for next one copy(data, orig) _, err = NewBitmap().ReadFrom(bytes.NewReader(data)) assert.Error(t, err) } } func TestBitmapFromBufferCOW(t *testing.T) { rbbogus := NewBitmap() rbbogus.Add(100) rbbogus.Add(100000) rb1 := NewBitmap() rb1.Add(1) buf1 := &bytes.Buffer{} rb1.WriteTo(buf1) rb2 := NewBitmap() rb2.Add(1000000) buf2 := &bytes.Buffer{} rb2.WriteTo(buf2) newRb1 := NewBitmap() newRb1.FromBuffer(buf1.Bytes()) newRb2 := NewBitmap() newRb2.FromBuffer(buf2.Bytes()) rbor1 := Or(newRb1, newRb2) rbor2 := rbor1 rbor3 := Or(newRb1, newRb2) rbor1.CloneCopyOnWriteContainers() rbor2.CloneCopyOnWriteContainers() rbor3.CloneCopyOnWriteContainers() buf1.Reset() buf2.Reset() rbbogus.WriteTo(buf1) rbbogus.WriteTo(buf2) rbexpected := NewBitmap() rbexpected.Add(1) rbexpected.Add(1000000) assert.True(t, rbexpected.Equals(rbor2)) assert.True(t, rbexpected.Equals(rbor3)) } func TestHoldReference(t *testing.T) { t.Run("Test Hold Reference", func(t *testing.T) { rb := New() buf := &bytes.Buffer{} for i := uint32(0); i < 650; i++ { rb.Add(i) } _, err := rb.WriteTo(buf) assert.NoError(t, err) nb := New() data := buf.Bytes() _, err = nb.ReadFrom(bytes.NewReader(data)) assert.NoError(t, err) buf = nil rb = nil data = nil runtime.GC() iterator := nb.Iterator() i := uint32(0) for iterator.HasNext() { v := iterator.Next() if v != i { return } assert.Equal(t, i, v) i++ } }) } func BenchmarkUnserializeReadFrom(b *testing.B) { for _, size := range []uint32{650, 6500, 65000, 650000, 6500000} { rb := New() buf := &bytes.Buffer{} for i := uint32(0); i < size; i++ { rb.Add(i) } _, err := rb.WriteTo(buf) if err != nil { b.Fatalf("Unexpected error occurs: %v", err) } b.Run(fmt.Sprintf("ReadFrom-%d", size), func(b *testing.B) { b.ReportAllocs() b.StartTimer() for n := 0; n < b.N; n++ { reader := bytes.NewReader(buf.Bytes()) nb := New() if _, err := nb.ReadFrom(reader); err != nil { b.Fatalf("Unexpected error occurs: %v", err) } } b.StopTimer() }) } } func BenchmarkUnserializeFromBuffer(b *testing.B) { for _, size := range []uint32{650, 6500, 65000, 650000, 6500000} { rb := New() buf := &bytes.Buffer{} for i := uint32(0); i < size; i++ { rb.Add(i) } _, err := rb.WriteTo(buf) if err != nil { b.Fatalf("Unexpected error occurs: %v", err) } b.Run(fmt.Sprintf("FromBuffer-%d", size), func(b *testing.B) { b.ReportAllocs() b.StartTimer() for n := 0; n < b.N; n++ { nb := New() if _, err := nb.FromBuffer(buf.Bytes()); err != nil { b.Fatalf("Unexpected error occurs: %v", err) } } b.StopTimer() }) } } roaring-0.4.21/serializationfuzz.go 0000664 0000000 0000000 00000000551 13542657257 0017403 0 ustar 00root root 0000000 0000000 // +build gofuzz package roaring import "bytes" func FuzzSerializationStream(data []byte) int { newrb := NewBitmap() if _, err := newrb.ReadFrom(bytes.NewReader(data)); err != nil { return 0 } return 1 } func FuzzSerializationBuffer(data []byte) int { newrb := NewBitmap() if _, err := newrb.FromBuffer(data); err != nil { return 0 } return 1 } roaring-0.4.21/setutil.go 0000664 0000000 0000000 00000022220 13542657257 0015275 0 ustar 00root root 0000000 0000000 package roaring func equal(a, b []uint16) bool { if len(a) != len(b) { return false } for i := range a { if a[i] != b[i] { return false } } return true } func difference(set1 []uint16, set2 []uint16, buffer []uint16) int { if 0 == len(set2) { for k := 0; k < len(set1); k++ { buffer[k] = set1[k] } return len(set1) } if 0 == len(set1) { return 0 } pos := 0 k1 := 0 k2 := 0 buffer = buffer[:cap(buffer)] s1 := set1[k1] s2 := set2[k2] for { if s1 < s2 { buffer[pos] = s1 pos++ k1++ if k1 >= len(set1) { break } s1 = set1[k1] } else if s1 == s2 { k1++ k2++ if k1 >= len(set1) { break } s1 = set1[k1] if k2 >= len(set2) { for ; k1 < len(set1); k1++ { buffer[pos] = set1[k1] pos++ } break } s2 = set2[k2] } else { // if (val1>val2) k2++ if k2 >= len(set2) { for ; k1 < len(set1); k1++ { buffer[pos] = set1[k1] pos++ } break } s2 = set2[k2] } } return pos } func exclusiveUnion2by2(set1 []uint16, set2 []uint16, buffer []uint16) int { if 0 == len(set2) { buffer = buffer[:len(set1)] copy(buffer, set1[:]) return len(set1) } if 0 == len(set1) { buffer = buffer[:len(set2)] copy(buffer, set2[:]) return len(set2) } pos := 0 k1 := 0 k2 := 0 s1 := set1[k1] s2 := set2[k2] buffer = buffer[:cap(buffer)] for { if s1 < s2 { buffer[pos] = s1 pos++ k1++ if k1 >= len(set1) { for ; k2 < len(set2); k2++ { buffer[pos] = set2[k2] pos++ } break } s1 = set1[k1] } else if s1 == s2 { k1++ k2++ if k1 >= len(set1) { for ; k2 < len(set2); k2++ { buffer[pos] = set2[k2] pos++ } break } if k2 >= len(set2) { for ; k1 < len(set1); k1++ { buffer[pos] = set1[k1] pos++ } break } s1 = set1[k1] s2 = set2[k2] } else { // if (val1>val2) buffer[pos] = s2 pos++ k2++ if k2 >= len(set2) { for ; k1 < len(set1); k1++ { buffer[pos] = set1[k1] pos++ } break } s2 = set2[k2] } } return pos } func union2by2(set1 []uint16, set2 []uint16, buffer []uint16) int { pos := 0 k1 := 0 k2 := 0 if 0 == len(set2) { buffer = buffer[:len(set1)] copy(buffer, set1[:]) return len(set1) } if 0 == len(set1) { buffer = buffer[:len(set2)] copy(buffer, set2[:]) return len(set2) } s1 := set1[k1] s2 := set2[k2] buffer = buffer[:cap(buffer)] for { if s1 < s2 { buffer[pos] = s1 pos++ k1++ if k1 >= len(set1) { copy(buffer[pos:], set2[k2:]) pos += len(set2) - k2 break } s1 = set1[k1] } else if s1 == s2 { buffer[pos] = s1 pos++ k1++ k2++ if k1 >= len(set1) { copy(buffer[pos:], set2[k2:]) pos += len(set2) - k2 break } if k2 >= len(set2) { copy(buffer[pos:], set1[k1:]) pos += len(set1) - k1 break } s1 = set1[k1] s2 = set2[k2] } else { // if (set1[k1]>set2[k2]) buffer[pos] = s2 pos++ k2++ if k2 >= len(set2) { copy(buffer[pos:], set1[k1:]) pos += len(set1) - k1 break } s2 = set2[k2] } } return pos } func union2by2Cardinality(set1 []uint16, set2 []uint16) int { pos := 0 k1 := 0 k2 := 0 if 0 == len(set2) { return len(set1) } if 0 == len(set1) { return len(set2) } s1 := set1[k1] s2 := set2[k2] for { if s1 < s2 { pos++ k1++ if k1 >= len(set1) { pos += len(set2) - k2 break } s1 = set1[k1] } else if s1 == s2 { pos++ k1++ k2++ if k1 >= len(set1) { pos += len(set2) - k2 break } if k2 >= len(set2) { pos += len(set1) - k1 break } s1 = set1[k1] s2 = set2[k2] } else { // if (set1[k1]>set2[k2]) pos++ k2++ if k2 >= len(set2) { pos += len(set1) - k1 break } s2 = set2[k2] } } return pos } func intersection2by2( set1 []uint16, set2 []uint16, buffer []uint16) int { if len(set1)*64 < len(set2) { return onesidedgallopingintersect2by2(set1, set2, buffer) } else if len(set2)*64 < len(set1) { return onesidedgallopingintersect2by2(set2, set1, buffer) } else { return localintersect2by2(set1, set2, buffer) } } func intersection2by2Cardinality( set1 []uint16, set2 []uint16) int { if len(set1)*64 < len(set2) { return onesidedgallopingintersect2by2Cardinality(set1, set2) } else if len(set2)*64 < len(set1) { return onesidedgallopingintersect2by2Cardinality(set2, set1) } else { return localintersect2by2Cardinality(set1, set2) } } func intersects2by2( set1 []uint16, set2 []uint16) bool { // could be optimized if one set is much larger than the other one if (0 == len(set1)) || (0 == len(set2)) { return false } k1 := 0 k2 := 0 s1 := set1[k1] s2 := set2[k2] mainwhile: for { if s2 < s1 { for { k2++ if k2 == len(set2) { break mainwhile } s2 = set2[k2] if s2 >= s1 { break } } } if s1 < s2 { for { k1++ if k1 == len(set1) { break mainwhile } s1 = set1[k1] if s1 >= s2 { break } } } else { // (set2[k2] == set1[k1]) return true } } return false } func localintersect2by2( set1 []uint16, set2 []uint16, buffer []uint16) int { if (0 == len(set1)) || (0 == len(set2)) { return 0 } k1 := 0 k2 := 0 pos := 0 buffer = buffer[:cap(buffer)] s1 := set1[k1] s2 := set2[k2] mainwhile: for { if s2 < s1 { for { k2++ if k2 == len(set2) { break mainwhile } s2 = set2[k2] if s2 >= s1 { break } } } if s1 < s2 { for { k1++ if k1 == len(set1) { break mainwhile } s1 = set1[k1] if s1 >= s2 { break } } } else { // (set2[k2] == set1[k1]) buffer[pos] = s1 pos++ k1++ if k1 == len(set1) { break } s1 = set1[k1] k2++ if k2 == len(set2) { break } s2 = set2[k2] } } return pos } func localintersect2by2Cardinality( set1 []uint16, set2 []uint16) int { if (0 == len(set1)) || (0 == len(set2)) { return 0 } k1 := 0 k2 := 0 pos := 0 s1 := set1[k1] s2 := set2[k2] mainwhile: for { if s2 < s1 { for { k2++ if k2 == len(set2) { break mainwhile } s2 = set2[k2] if s2 >= s1 { break } } } if s1 < s2 { for { k1++ if k1 == len(set1) { break mainwhile } s1 = set1[k1] if s1 >= s2 { break } } } else { // (set2[k2] == set1[k1]) pos++ k1++ if k1 == len(set1) { break } s1 = set1[k1] k2++ if k2 == len(set2) { break } s2 = set2[k2] } } return pos } func advanceUntil( array []uint16, pos int, length int, min uint16) int { lower := pos + 1 if lower >= length || array[lower] >= min { return lower } spansize := 1 for lower+spansize < length && array[lower+spansize] < min { spansize *= 2 } var upper int if lower+spansize < length { upper = lower + spansize } else { upper = length - 1 } if array[upper] == min { return upper } if array[upper] < min { // means // array // has no // item // >= min // pos = array.length; return length } // we know that the next-smallest span was too small lower += (spansize >> 1) mid := 0 for lower+1 != upper { mid = (lower + upper) >> 1 if array[mid] == min { return mid } else if array[mid] < min { lower = mid } else { upper = mid } } return upper } func onesidedgallopingintersect2by2( smallset []uint16, largeset []uint16, buffer []uint16) int { if 0 == len(smallset) { return 0 } buffer = buffer[:cap(buffer)] k1 := 0 k2 := 0 pos := 0 s1 := largeset[k1] s2 := smallset[k2] mainwhile: for { if s1 < s2 { k1 = advanceUntil(largeset, k1, len(largeset), s2) if k1 == len(largeset) { break mainwhile } s1 = largeset[k1] } if s2 < s1 { k2++ if k2 == len(smallset) { break mainwhile } s2 = smallset[k2] } else { buffer[pos] = s2 pos++ k2++ if k2 == len(smallset) { break } s2 = smallset[k2] k1 = advanceUntil(largeset, k1, len(largeset), s2) if k1 == len(largeset) { break mainwhile } s1 = largeset[k1] } } return pos } func onesidedgallopingintersect2by2Cardinality( smallset []uint16, largeset []uint16) int { if 0 == len(smallset) { return 0 } k1 := 0 k2 := 0 pos := 0 s1 := largeset[k1] s2 := smallset[k2] mainwhile: for { if s1 < s2 { k1 = advanceUntil(largeset, k1, len(largeset), s2) if k1 == len(largeset) { break mainwhile } s1 = largeset[k1] } if s2 < s1 { k2++ if k2 == len(smallset) { break mainwhile } s2 = smallset[k2] } else { pos++ k2++ if k2 == len(smallset) { break } s2 = smallset[k2] k1 = advanceUntil(largeset, k1, len(largeset), s2) if k1 == len(largeset) { break mainwhile } s1 = largeset[k1] } } return pos } func binarySearch(array []uint16, ikey uint16) int { low := 0 high := len(array) - 1 for low+16 <= high { middleIndex := int(uint32(low+high) >> 1) middleValue := array[middleIndex] if middleValue < ikey { low = middleIndex + 1 } else if middleValue > ikey { high = middleIndex - 1 } else { return middleIndex } } for ; low <= high; low++ { val := array[low] if val >= ikey { if val == ikey { return low } break } } return -(low + 1) } roaring-0.4.21/setutil_test.go 0000664 0000000 0000000 00000005771 13542657257 0016350 0 ustar 00root root 0000000 0000000 package roaring // to run just these tests: go test -run TestSetUtil* import ( "github.com/stretchr/testify/assert" "testing" ) func TestSetUtilDifference(t *testing.T) { data1 := []uint16{0, 1, 2, 3, 4, 9} data2 := []uint16{2, 3, 4, 5, 8, 9, 11} result := make([]uint16, 0, len(data1)+len(data2)) expectedresult := []uint16{0, 1} nl := difference(data1, data2, result) result = result[:nl] assert.Equal(t, expectedresult, result) expectedresult = []uint16{5, 8, 11} nl = difference(data2, data1, result) result = result[:nl] assert.Equal(t, expectedresult, result) } func TestSetUtilUnion(t *testing.T) { data1 := []uint16{0, 1, 2, 3, 4, 9} data2 := []uint16{2, 3, 4, 5, 8, 9, 11} result := make([]uint16, 0, len(data1)+len(data2)) expectedresult := []uint16{0, 1, 2, 3, 4, 5, 8, 9, 11} nl := union2by2(data1, data2, result) result = result[:nl] assert.Equal(t, expectedresult, result) nl = union2by2(data2, data1, result) result = result[:nl] assert.Equal(t, expectedresult, result) } func TestSetUtilExclusiveUnion(t *testing.T) { data1 := []uint16{0, 1, 2, 3, 4, 9} data2 := []uint16{2, 3, 4, 5, 8, 9, 11} result := make([]uint16, 0, len(data1)+len(data2)) expectedresult := []uint16{0, 1, 5, 8, 11} nl := exclusiveUnion2by2(data1, data2, result) result = result[:nl] assert.Equal(t, expectedresult, result) nl = exclusiveUnion2by2(data2, data1, result) result = result[:nl] assert.Equal(t, expectedresult, result) } func TestSetUtilIntersection(t *testing.T) { data1 := []uint16{0, 1, 2, 3, 4, 9} data2 := []uint16{2, 3, 4, 5, 8, 9, 11} result := make([]uint16, 0, len(data1)+len(data2)) expectedresult := []uint16{2, 3, 4, 9} nl := intersection2by2(data1, data2, result) result = result[:nl] result = result[:len(expectedresult)] assert.Equal(t, expectedresult, result) nl = intersection2by2(data2, data1, result) result = result[:nl] assert.Equal(t, expectedresult, result) data1 = []uint16{4} data2 = make([]uint16, 10000) for i := range data2 { data2[i] = uint16(i) } result = make([]uint16, 0, len(data1)+len(data2)) expectedresult = data1 nl = intersection2by2(data1, data2, result) result = result[:nl] assert.Equal(t, expectedresult, result) nl = intersection2by2(data2, data1, result) result = result[:nl] assert.Equal(t, expectedresult, result) } func TestSetUtilIntersection2(t *testing.T) { data1 := []uint16{0, 2, 4, 6, 8, 10, 12, 14, 16, 18} data2 := []uint16{0, 3, 6, 9, 12, 15, 18} result := make([]uint16, 0, len(data1)+len(data2)) expectedresult := []uint16{0, 6, 12, 18} nl := intersection2by2(data1, data2, result) result = result[:nl] result = result[:len(expectedresult)] assert.Equal(t, expectedresult, result) } func TestSetUtilBinarySearch(t *testing.T) { data := make([]uint16, 256) for i := range data { data[i] = uint16(2 * i) } for i := 0; i < 2*len(data); i++ { key := uint16(i) loc := binarySearch(data, key) if (key & 1) == 0 { assert.Equal(t, int(key)/2, loc) } else { assert.Equal(t, -int(key)/2-2, loc) } } } roaring-0.4.21/shortiterator.go 0000664 0000000 0000000 00000001541 13542657257 0016520 0 ustar 00root root 0000000 0000000 package roaring type shortIterable interface { hasNext() bool next() uint16 } type shortPeekable interface { shortIterable peekNext() uint16 advanceIfNeeded(minval uint16) } type shortIterator struct { slice []uint16 loc int } func (si *shortIterator) hasNext() bool { return si.loc < len(si.slice) } func (si *shortIterator) next() uint16 { a := si.slice[si.loc] si.loc++ return a } func (si *shortIterator) peekNext() uint16 { return si.slice[si.loc] } func (si *shortIterator) advanceIfNeeded(minval uint16) { if si.hasNext() && si.peekNext() < minval { si.loc = advanceUntil(si.slice, si.loc, len(si.slice), minval) } } type reverseIterator struct { slice []uint16 loc int } func (si *reverseIterator) hasNext() bool { return si.loc >= 0 } func (si *reverseIterator) next() uint16 { a := si.slice[si.loc] si.loc-- return a } roaring-0.4.21/smat.go 0000664 0000000 0000000 00000024522 13542657257 0014557 0 ustar 00root root 0000000 0000000 // +build gofuzz /* # Instructions for smat testing for roaring [smat](https://github.com/mschoch/smat) is a framework that provides state machine assisted fuzz testing. To run the smat tests for roaring... ## Prerequisites $ go get github.com/dvyukov/go-fuzz/go-fuzz $ go get github.com/dvyukov/go-fuzz/go-fuzz-build ## Steps 1. Generate initial smat corpus: ``` go test -tags=gofuzz -run=TestGenerateSmatCorpus ``` 2. Build go-fuzz test program with instrumentation: ``` go-fuzz-build -func FuzzSmat github.com/RoaringBitmap/roaring ``` 3. Run go-fuzz: ``` go-fuzz -bin=./roaring-fuzz.zip -workdir=workdir/ -timeout=200 ``` You should see output like... ``` 2016/09/16 13:58:35 slaves: 8, corpus: 1 (3s ago), crashers: 0, restarts: 1/0, execs: 0 (0/sec), cover: 0, uptime: 3s 2016/09/16 13:58:38 slaves: 8, corpus: 1 (6s ago), crashers: 0, restarts: 1/0, execs: 0 (0/sec), cover: 0, uptime: 6s 2016/09/16 13:58:41 slaves: 8, corpus: 1 (9s ago), crashers: 0, restarts: 1/44, execs: 44 (5/sec), cover: 0, uptime: 9s 2016/09/16 13:58:44 slaves: 8, corpus: 1 (12s ago), crashers: 0, restarts: 1/45, execs: 45 (4/sec), cover: 0, uptime: 12s 2016/09/16 13:58:47 slaves: 8, corpus: 1 (15s ago), crashers: 0, restarts: 1/46, execs: 46 (3/sec), cover: 0, uptime: 15s 2016/09/16 13:58:50 slaves: 8, corpus: 1 (18s ago), crashers: 0, restarts: 1/47, execs: 47 (3/sec), cover: 0, uptime: 18s 2016/09/16 13:58:53 slaves: 8, corpus: 1 (21s ago), crashers: 0, restarts: 1/63, execs: 63 (3/sec), cover: 0, uptime: 21s 2016/09/16 13:58:56 slaves: 8, corpus: 1 (24s ago), crashers: 0, restarts: 1/65, execs: 65 (3/sec), cover: 0, uptime: 24s 2016/09/16 13:58:59 slaves: 8, corpus: 1 (27s ago), crashers: 0, restarts: 1/66, execs: 66 (2/sec), cover: 0, uptime: 27s 2016/09/16 13:59:02 slaves: 8, corpus: 1 (30s ago), crashers: 0, restarts: 1/67, execs: 67 (2/sec), cover: 0, uptime: 30s 2016/09/16 13:59:05 slaves: 8, corpus: 1 (33s ago), crashers: 0, restarts: 1/83, execs: 83 (3/sec), cover: 0, uptime: 33s 2016/09/16 13:59:08 slaves: 8, corpus: 1 (36s ago), crashers: 0, restarts: 1/84, execs: 84 (2/sec), cover: 0, uptime: 36s 2016/09/16 13:59:11 slaves: 8, corpus: 2 (0s ago), crashers: 0, restarts: 1/85, execs: 85 (2/sec), cover: 0, uptime: 39s 2016/09/16 13:59:14 slaves: 8, corpus: 17 (2s ago), crashers: 0, restarts: 1/86, execs: 86 (2/sec), cover: 480, uptime: 42s 2016/09/16 13:59:17 slaves: 8, corpus: 17 (5s ago), crashers: 0, restarts: 1/66, execs: 132 (3/sec), cover: 487, uptime: 45s 2016/09/16 13:59:20 slaves: 8, corpus: 17 (8s ago), crashers: 0, restarts: 1/440, execs: 2645 (55/sec), cover: 487, uptime: 48s ``` Let it run, and if the # of crashers is > 0, check out the reports in the workdir where you should be able to find the panic goroutine stack traces. */ package roaring import ( "fmt" "sort" "github.com/mschoch/smat" "github.com/willf/bitset" ) // fuzz test using state machine driven by byte stream. func FuzzSmat(data []byte) int { return smat.Fuzz(&smatContext{}, smat.ActionID('S'), smat.ActionID('T'), smatActionMap, data) } var smatDebug = false func smatLog(prefix, format string, args ...interface{}) { if smatDebug { fmt.Print(prefix) fmt.Printf(format, args...) } } type smatContext struct { pairs []*smatPair // Two registers, x & y. x int y int actions int } type smatPair struct { bm *Bitmap bs *bitset.BitSet } // ------------------------------------------------------------------ var smatActionMap = smat.ActionMap{ smat.ActionID('X'): smatAction("x++", smatWrap(func(c *smatContext) { c.x++ })), smat.ActionID('x'): smatAction("x--", smatWrap(func(c *smatContext) { c.x-- })), smat.ActionID('Y'): smatAction("y++", smatWrap(func(c *smatContext) { c.y++ })), smat.ActionID('y'): smatAction("y--", smatWrap(func(c *smatContext) { c.y-- })), smat.ActionID('*'): smatAction("x*y", smatWrap(func(c *smatContext) { c.x = c.x * c.y })), smat.ActionID('<'): smatAction("x<<", smatWrap(func(c *smatContext) { c.x = c.x << 1 })), smat.ActionID('^'): smatAction("swap", smatWrap(func(c *smatContext) { c.x, c.y = c.y, c.x })), smat.ActionID('['): smatAction(" pushPair", smatWrap(smatPushPair)), smat.ActionID(']'): smatAction(" popPair", smatWrap(smatPopPair)), smat.ActionID('B'): smatAction(" setBit", smatWrap(smatSetBit)), smat.ActionID('b'): smatAction(" removeBit", smatWrap(smatRemoveBit)), smat.ActionID('o'): smatAction(" or", smatWrap(smatOr)), smat.ActionID('a'): smatAction(" and", smatWrap(smatAnd)), smat.ActionID('#'): smatAction(" cardinality", smatWrap(smatCardinality)), smat.ActionID('O'): smatAction(" orCardinality", smatWrap(smatOrCardinality)), smat.ActionID('A'): smatAction(" andCardinality", smatWrap(smatAndCardinality)), smat.ActionID('c'): smatAction(" clear", smatWrap(smatClear)), smat.ActionID('r'): smatAction(" runOptimize", smatWrap(smatRunOptimize)), smat.ActionID('e'): smatAction(" isEmpty", smatWrap(smatIsEmpty)), smat.ActionID('i'): smatAction(" intersects", smatWrap(smatIntersects)), smat.ActionID('f'): smatAction(" flip", smatWrap(smatFlip)), smat.ActionID('-'): smatAction(" difference", smatWrap(smatDifference)), } var smatRunningPercentActions []smat.PercentAction func init() { var ids []int for actionId := range smatActionMap { ids = append(ids, int(actionId)) } sort.Ints(ids) pct := 100 / len(smatActionMap) for _, actionId := range ids { smatRunningPercentActions = append(smatRunningPercentActions, smat.PercentAction{pct, smat.ActionID(actionId)}) } smatActionMap[smat.ActionID('S')] = smatAction("SETUP", smatSetupFunc) smatActionMap[smat.ActionID('T')] = smatAction("TEARDOWN", smatTeardownFunc) } // We only have one smat state: running. func smatRunning(next byte) smat.ActionID { return smat.PercentExecute(next, smatRunningPercentActions...) } func smatAction(name string, f func(ctx smat.Context) (smat.State, error)) func(smat.Context) (smat.State, error) { return func(ctx smat.Context) (smat.State, error) { c := ctx.(*smatContext) c.actions++ smatLog(" ", "%s\n", name) return f(ctx) } } // Creates an smat action func based on a simple callback. func smatWrap(cb func(c *smatContext)) func(smat.Context) (next smat.State, err error) { return func(ctx smat.Context) (next smat.State, err error) { c := ctx.(*smatContext) cb(c) return smatRunning, nil } } // Invokes a callback function with the input v bounded to len(c.pairs). func (c *smatContext) withPair(v int, cb func(*smatPair)) { if len(c.pairs) > 0 { if v < 0 { v = -v } v = v % len(c.pairs) cb(c.pairs[v]) } } // ------------------------------------------------------------------ func smatSetupFunc(ctx smat.Context) (next smat.State, err error) { return smatRunning, nil } func smatTeardownFunc(ctx smat.Context) (next smat.State, err error) { return nil, err } // ------------------------------------------------------------------ func smatPushPair(c *smatContext) { c.pairs = append(c.pairs, &smatPair{ bm: NewBitmap(), bs: bitset.New(100), }) } func smatPopPair(c *smatContext) { if len(c.pairs) > 0 { c.pairs = c.pairs[0 : len(c.pairs)-1] } } func smatSetBit(c *smatContext) { c.withPair(c.x, func(p *smatPair) { y := uint32(c.y) p.bm.AddInt(int(y)) p.bs.Set(uint(y)) p.checkEquals() }) } func smatRemoveBit(c *smatContext) { c.withPair(c.x, func(p *smatPair) { y := uint32(c.y) p.bm.Remove(y) p.bs.Clear(uint(y)) p.checkEquals() }) } func smatAnd(c *smatContext) { c.withPair(c.x, func(px *smatPair) { c.withPair(c.y, func(py *smatPair) { px.bm.And(py.bm) px.bs = px.bs.Intersection(py.bs) px.checkEquals() py.checkEquals() }) }) } func smatOr(c *smatContext) { c.withPair(c.x, func(px *smatPair) { c.withPair(c.y, func(py *smatPair) { px.bm.Or(py.bm) px.bs = px.bs.Union(py.bs) px.checkEquals() py.checkEquals() }) }) } func smatAndCardinality(c *smatContext) { c.withPair(c.x, func(px *smatPair) { c.withPair(c.y, func(py *smatPair) { c0 := px.bm.AndCardinality(py.bm) c1 := px.bs.IntersectionCardinality(py.bs) if c0 != uint64(c1) { panic("expected same add cardinality") } px.checkEquals() py.checkEquals() }) }) } func smatOrCardinality(c *smatContext) { c.withPair(c.x, func(px *smatPair) { c.withPair(c.y, func(py *smatPair) { c0 := px.bm.OrCardinality(py.bm) c1 := px.bs.UnionCardinality(py.bs) if c0 != uint64(c1) { panic("expected same or cardinality") } px.checkEquals() py.checkEquals() }) }) } func smatRunOptimize(c *smatContext) { c.withPair(c.x, func(px *smatPair) { px.bm.RunOptimize() px.checkEquals() }) } func smatClear(c *smatContext) { c.withPair(c.x, func(px *smatPair) { px.bm.Clear() px.bs = px.bs.ClearAll() px.checkEquals() }) } func smatCardinality(c *smatContext) { c.withPair(c.x, func(px *smatPair) { c0 := px.bm.GetCardinality() c1 := px.bs.Count() if c0 != uint64(c1) { panic("expected same cardinality") } }) } func smatIsEmpty(c *smatContext) { c.withPair(c.x, func(px *smatPair) { c0 := px.bm.IsEmpty() c1 := px.bs.None() if c0 != c1 { panic("expected same is empty") } }) } func smatIntersects(c *smatContext) { c.withPair(c.x, func(px *smatPair) { c.withPair(c.y, func(py *smatPair) { v0 := px.bm.Intersects(py.bm) v1 := px.bs.IntersectionCardinality(py.bs) > 0 if v0 != v1 { panic("intersects not equal") } px.checkEquals() py.checkEquals() }) }) } func smatFlip(c *smatContext) { c.withPair(c.x, func(p *smatPair) { y := uint32(c.y) p.bm.Flip(uint64(y), uint64(y)+1) p.bs = p.bs.Flip(uint(y)) p.checkEquals() }) } func smatDifference(c *smatContext) { c.withPair(c.x, func(px *smatPair) { c.withPair(c.y, func(py *smatPair) { px.bm.AndNot(py.bm) px.bs = px.bs.Difference(py.bs) px.checkEquals() py.checkEquals() }) }) } func (p *smatPair) checkEquals() { if !p.equalsBitSet(p.bs, p.bm) { panic("bitset mismatch") } } func (p *smatPair) equalsBitSet(a *bitset.BitSet, b *Bitmap) bool { for i, e := a.NextSet(0); e; i, e = a.NextSet(i + 1) { if !b.ContainsInt(int(i)) { fmt.Printf("in a bitset, not b bitmap, i: %d\n", i) fmt.Printf(" a bitset: %s\n b bitmap: %s\n", a.String(), b.String()) return false } } i := b.Iterator() for i.HasNext() { v := i.Next() if !a.Test(uint(v)) { fmt.Printf("in b bitmap, not a bitset, v: %d\n", v) fmt.Printf(" a bitset: %s\n b bitmap: %s\n", a.String(), b.String()) return false } } return true } roaring-0.4.21/smat_generate_test.go 0000664 0000000 0000000 00000002201 13542657257 0017456 0 ustar 00root root 0000000 0000000 // +build gofuzz package roaring import ( "fmt" "io/ioutil" "os" "testing" "github.com/mschoch/smat" ) func TestGenerateSmatCorpus(t *testing.T) { for i, actionSeq := range smatActionSeqs { byteSequence, err := actionSeq.ByteEncoding(&smatContext{}, smat.ActionID('S'), smat.ActionID('T'), smatActionMap) if err != nil { t.Fatalf("error from ByteEncoding, err: %v, i: %d, actonSeq: %#v", err, i, actionSeq) } os.MkdirAll("workdir/corpus", 0700) ioutil.WriteFile(fmt.Sprintf("workdir/corpus/%d", i), byteSequence, 0600) } } var smatActionSeqs = []smat.ActionSeq{ { smat.ActionID('X'), smat.ActionID('X'), smat.ActionID('Y'), smat.ActionID('Y'), smat.ActionID('<'), smat.ActionID('<'), smat.ActionID('*'), smat.ActionID('x'), smat.ActionID('y'), smat.ActionID('*'), smat.ActionID('['), smat.ActionID('['), smat.ActionID('B'), smat.ActionID('a'), smat.ActionID('o'), smat.ActionID('A'), smat.ActionID('O'), smat.ActionID('#'), smat.ActionID('X'), smat.ActionID('Y'), smat.ActionID('B'), smat.ActionID('e'), smat.ActionID('f'), smat.ActionID('-'), smat.ActionID('e'), }, } roaring-0.4.21/smat_hits_test.go 0000664 0000000 0000000 00000003020 13542657257 0016633 0 ustar 00root root 0000000 0000000 // Copyright (c) 2016 Couchbase, Inc. // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the // License. You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an "AS // IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either // express or implied. See the License for the specific language // governing permissions and limitations under the License. // +build gofuzz package roaring import ( "log" "testing" "github.com/mschoch/smat" ) // Crashers reported by smat, captured as pairs of strings. A pair is // a short descrption of the crash then the corresponding crash-input. var smatHits = []string{ "0001:\n" + "in a bitset, not b bitmap, pos: 0\n" + " a bitset: {0,1}\n" + " b bitmap: {1,0}\n" + "panic: bitset mismatch\n" + " SETUP\n" + " pushPair\n" + " setBit\n" + " y++\n" + " flip\n", "]5S\xa5", } // Test the previous issues found by smat. func TestSmatHits(t *testing.T) { smatDebugPrev := smatDebug smatDebug = true // Use true when diagnosing a crash. for i := 0; i < len(smatHits); i += 2 { desc := smatHits[i] hit := []byte(smatHits[i+1]) log.Printf("testing smat hit: (%d) %s\n", i/2, desc) // fuzz the hit input smat.Fuzz(&smatContext{}, smat.ActionID('S'), smat.ActionID('T'), smatActionMap, hit) } smatDebug = smatDebugPrev } roaring-0.4.21/testdata/ 0000775 0000000 0000000 00000000000 13542657257 0015070 5 ustar 00root root 0000000 0000000 roaring-0.4.21/testdata/bitmapwithoutruns.bin 0000664 0000000 0000000 00000215650 13542657257 0021403 0 ustar 00root root 0000000 0000000 :0 A ! $ TU UU TU TU ? Q 4` ( (! (A (a ( ( pX@(#'*.26:>hBPF8J NRUY]aexi`mHq0uy }ЄpX@(ȯhP8 ڨސx`H0 pX@(!$(,048h