pax_global_header00006660000000000000000000000064122404356700014515gustar00rootroot0000000000000052 comment=19ea785bb7d5d37eaaf39a52bda3a3f32385577b javaewah-JavaEWAH-0.7.9/000077500000000000000000000000001224043567000146445ustar00rootroot00000000000000javaewah-JavaEWAH-0.7.9/.gitignore000066400000000000000000000000601224043567000166300ustar00rootroot00000000000000out/ target/ # IDEA project files .idea/ *.iml javaewah-JavaEWAH-0.7.9/.travis.yml000066400000000000000000000001371224043567000167560ustar00rootroot00000000000000language: java jdk: - oraclejdk7 - openjdk7 - openjdk6 install: true script: mvn test javaewah-JavaEWAH-0.7.9/CHANGELOG000066400000000000000000000160131224043567000160570ustar00rootroot00000000000000version 0.7.9 (November 12th 2013) - Spelled out the license in the source code to avoid any confusion version 0.7.8 (October 1st 2013) - Resolved an issue with addStream... methods. version 0.7.7 (October 1st 2013) - Resolved an issue with 32-bit EWAH caused by a partial update in version 0.7.6. version 0.7.6 (September 30th 2013) - Fixed a memory leak version 0.7.5 (September 7th 2013) - Fixed recently introduced "Get" method. Better unit testing. version 0.7.4 (September 7th 2013) - Added a convenience method that allows you to query for the value of a bit (based on code by @zhenjl) version 0.7.3 (August 8th 2013) - Fixed bug that could sometimes arise with the new iterator-based processing (O. Kaser) version 0.7.2 (July 25th 2013) - Clarifying the difference between setSizeInBits(int) and setSizeInBits(int,boolean). Some code that worked previously (but was potentially unsafe) might throw an exception when using setSizeInBits(int). The fix is to call setSizeInBits(int,false) which will be equivalent, but safer. version 0.7.1 (July 16th 2013) - From now on, we require java 6 or better: in practice this means that we are starting to use annotations such as @Override. - We revert to the pre-0.7 behavior by setting the "sizeInBits" of the bitmaps according to the max of the input bitmaps (issue 27). version 0.7.0 (July 16th 2013) - Performance improvements when aggregating several bitmaps - Introduction of an iterator-based approach version 0.6.12 (May 21st 2013) - Fixed bug with addStreamOfNegatedDirtyWords (Vicent Marti) - Removed some unnecessary recursion version 0.6.11 (March 29, 2013) - fixed issue 26: Inconsistent iterators after using "setSizeInBits" version 0.6.10 (March 14, 2013) - replaced private methods by renamed public methods to enhance extensibility - fixed bug in and-aggregation (Gabriel Magniez) version 0.6.9 (March 8, 2013) - Using maven-bundle-plugin (for JGIT, as per Alexander Riss's request) - Minor: Fixed a typo in example.java version 0.6.8 (February 21, 2013) - Fixed an issue with hasNext in iterator (issue 22). - Added corresponding unit test. version 0.6.7 (February 15, 2013) - Fixed an issue with setSizeInBits (issue 21). - Added corresponding unit test. version 0.6.6 (December 12, 2012) - Fixed an off-by-one bug in setSizeInBits(final int size, final boolean defaultvalue). - Added corresponding unit test. version 0.6.5 (November 26, 2012) - Fixed a bug in IteratingBufferedRunningLengthWord.discharge that might cause "and" to return spurious values in some cases (issue #19 reported by Gregory Ssi-Yan-Kai) - added a unit test so that this sort of bug will be caught in the future version 0.6.4 (November 16, 2012) - Fixed a bug in IteratingBufferedRunningLengthWord.discardFirstWords that might cause "and" to return the empty set when there is actually a result (issue #18 reported by Bilal Tayara) - added a unit test so that this sort of bug will be caught in the future version 0.6.3 (October 31, 2012) - Fixed a design issue in NonEmptyVirtualStorage32 and NonEmptyVirtualStorage that might cause "equals" to give the wrong answer. version 0.6.2 (October 19, 2012) - Cleaned up "andNot" and "xor" so that the implementation is simpler - Minor refactoring version 0.6.1 (October 18, 2012) - Optimized the logical and operation and rewrote the logical or operation - Sometimes we used the term "literal" and sometimes "dirty". We now use only the term "literal". version 0.6.0 (October 15, 2012) - Moved packages from javaewah to com.googlecode.javaewah (request by Eclipse) version 0.5.6 (October 5, 2012) - Improve performance of the IntIterator implementation (Colby Ranger) - Fixed copyright notice (Google Inc. via Colby Ranger) version 0.5.5 (October 5, 2012) - Fixed bug with new hashCode (from previous version), added unit test (Colby Ranger) - better example.java - new "bitmapOf" method (Colby Ranger) version 0.5.4 (October 5, 2012) - Changed the semantics of "equals" to be more consistent with BitSet from the Java API. (Colby Ranger) version 0.5.3 (August 7, 2012) - Correct a bug where the intersects method was returning false positives. The bug occurs when we attempt to add zero set bits to the stream of dirty words. Fixed / tested for 64 bit and 32 bit variants, also testing every flow through the and() method when called from intersects method. - Re-enable the testOrCardinality for the 64 bit variant. - Change behavior of NonEmptyException so we only throw the Exception if the number of dirty words is positive (and thus we are trying to set none). - Fix documentation in 32 bit impl. - remove unneeded returns in 64 bit implementation. version 0.5.2 (July 9, 2012) - Now being explicit about range of allowed set values, throws exceptions when out of range. version 0.5.1 (May 28, 2012) - Fixed minor performance regression. version 0.5.0 (May 23, 2012) - Added a 32-bit version of the compression scheme. version 0.4.5 (May 21, 2012) - Introduced the toArray method to retrieve set bits. version 0.4.4 (May 21, 2012) - Perf. boost when decoding bitmaps. version 0.4.3 (April 9, 2012) - fast aggregation through logical AND of many bitmaps using a new method - fixed a rarely occuring bug in the set method due to faulty bitmap size extension version 0.4.2 (April 5, 2012) * Fixed a bug with intersects method version 0.4.1 (Mar 20, 2012) * Resolved "EWAHCompressedBitmap.iterator() does not follow Java's contract for the Iterator interface" * some performance enhancements for multi or. array iteration is performing better than list iteration. * documentation update version 0.4.0 (Mar 5, 2012) Several optimizations and (minor) API changes by David McIntosh. We expect that the API is backward compatible, howerver serialization won't be compatible with previous versions. 1) slight tweak to serialization to use the actualsizeinwords to set the buffer size when deserializing so it doesn't allocate more memory than it really needs. 2) added an orCardinality method that is a shortcut for bitmap1.or(bitmap2).cardinality() but doesn't need to create an intermediate bitmap. 3) added static or and orCardinality methods that can handle N number of bitmaps. For example, bitmap1.or(bitmap2).or(bitmap3) could be done as EWAHCompressedBitmap.or(bitmap1, bitmap2, bitmap3). It can be slower when N is small but avoids having to create N-2 intermediate bitmaps. 4) Long.bitCount is much faster when counting bits in the cardinality() method. version 0.3.3 (Mar 2, 2012) Correcting bug in intersect method (Robert Becho). version 0.3.2 (Jan 16, 2012) Better documentation and introduction of a few methods (intersects). version 0.3.1 (Jan 2, 2012) The code is now built using maven. version 0.3 (Jun 21, 2011) The code was optimized slightly for speed. You may a gain of about 20% on some operations, and no gain on other operations. version 0.2 (Mar 4, 2011) In this release, there is a minor change to the API: the getPositions method now returns an ArrayList instead of a Vector. version 0.1 : was never released publicly. javaewah-JavaEWAH-0.7.9/LICENSE000066400000000000000000000240411224043567000156520ustar00rootroot00000000000000Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: You must give any other recipients of the Work or Derivative Works a copy of this License; and You must cause any modified files to carry prominent notices stating that You changed the files; and You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. javaewah-JavaEWAH-0.7.9/LICENSE-2.0.txt000066400000000000000000000261361224043567000167740ustar00rootroot00000000000000 Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. javaewah-JavaEWAH-0.7.9/README.md000066400000000000000000000066411224043567000161320ustar00rootroot00000000000000JavaEWAH ========================================================== (c) 2009-2013 Daniel Lemire (http://lemire.me/en/), Cliff Moon (https://github.com/cliffmoon), David McIntosh (https://github.com/mctofu), Robert Becho (https://github.com/RBecho), Colby Ranger (https://github.com/crangeratgoogle) Veronika Zenz (https://github.com/veronikazenz) and Owen Kaser (https://github.com/owenkaser) This code is licensed under Apache License, Version 2.0 (ASL2.0). (GPL 2.0 derivatives are allowed.) This is a word-aligned compressed variant of the Java Bitset class. We provide both a 64-bit and a 32-bit RLE-like compression scheme. It can be used to implement bitmap indexes. The goal of word-aligned compression is not to achieve the best compression, but rather to improve query processing time. Hence, we try to save CPU cycles, maybe at the expense of storage. However, the EWAH scheme we implemented is always more efficient storage-wise than an uncompressed bitmap (as implemented in the java BitSet class by Sun). For better performance, use a 64-bit JVM over 64-bit CPUs when using the 64-bit scheme (javaewah.EWAHCompressedBitmap). The 32-bit version (javaewah32.EWAHCompressedBitmap32) should compress better but be comparatively slower. Java 6 or better is required. For more details regarding the compression format, please see Section 3 of the following paper: Daniel Lemire, Owen Kaser, Kamel Aouiche, Sorting improves word-aligned bitmap indexes. Data & Knowledge Engineering 69 (1), pages 3-28, 2010. http://arxiv.org/abs/0901.3751 (The PDF file is freely available on the arXiv site.) Benchmark --------- For a simple comparison between this library and other libraries such as WAH, ConciseSet, BitSet and other options, please see https://github.com/lemire/simplebitmapbenchmark Unit testing ------------ As of October 2011, this packages relies on Maven. To test it: mvn test See http://maven.apache.org/guides/introduction/introduction-to-the-lifecycle.html for details. Usage ----- See example.java. Maven central repository ------------------------ You can download JavaEWAH from the Maven central repository: http://repo1.maven.org/maven2/com/googlecode/javaewah/JavaEWAH/ You can also specify the dependency in the Maven "pom.xml" file: com.googlecode.javaewah JavaEWAH 0.7.9 Naturally, you should replace "version" by the version you desire. Travis (Continuous integration) ------------------------------- You can check whether the latest version builds on your favorite version of Java using Travis: https://travis-ci.org/lemire/javaewah/builds/11059867 Clojure ------- Joel Boehland wrote Clojure wrappers: https://github.com/jolby/clojure-ewah-bitmap Frequent questions ------------------ Question: How do I check the value of a bit? Answer: If you need to routinely check the value of a given bit quickly, then EWAH might not be the right format. However, if you must do it, you can proceed as follows: /** * Suppose you have the following bitmap: */ EWAHCompressedBitmap b = EWAHCompressedBitmap.bitmapOf(0,2,64,1<<30); /** * We want to know if bit 64 is set: */ boolean is64set = (b.and(EWAHCompressedBitmap.bitmapOf(64)).cardinality() == 1); javaewah-JavaEWAH-0.7.9/TODO000066400000000000000000000000111224043567000153240ustar00rootroot00000000000000TODO javaewah-JavaEWAH-0.7.9/example.java000066400000000000000000000043051224043567000171440ustar00rootroot00000000000000import com.googlecode.javaewah.EWAHCompressedBitmap; import java.io.*; /** * @author lemire * */ public class example { /** * @param args arguments from the command line * @throws IOException if an IO error occurs */ public static void main(final String[] args) throws java.io.IOException { EWAHCompressedBitmap ewahBitmap1 = EWAHCompressedBitmap.bitmapOf(0,2,64,1<<30); EWAHCompressedBitmap ewahBitmap2 = EWAHCompressedBitmap.bitmapOf(1,3,64,1<<30); System.out.println("bitmap 1: "+ewahBitmap1); System.out.println("bitmap 2: "+ewahBitmap2); // or EWAHCompressedBitmap orbitmap = ewahBitmap1.or(ewahBitmap2); System.out.println("bitmap 1 OR bitmap 2: "+orbitmap); System.out.println("memory usage: " + orbitmap.sizeInBytes() + " bytes"); // and EWAHCompressedBitmap andbitmap = ewahBitmap1.and(ewahBitmap2); System.out.println("bitmap 1 AND bitmap 2: "+andbitmap); System.out.println("memory usage: " + andbitmap.sizeInBytes() + " bytes"); // xor EWAHCompressedBitmap xorbitmap = ewahBitmap1.xor(ewahBitmap2); System.out.println("bitmap 1 XOR bitmap 2:"+xorbitmap); System.out.println("memory usage: " + xorbitmap.sizeInBytes() + " bytes"); // fast aggregation over many bitmaps EWAHCompressedBitmap ewahBitmap3 = EWAHCompressedBitmap.bitmapOf(55,5,1<<30); EWAHCompressedBitmap ewahBitmap4 = EWAHCompressedBitmap.bitmapOf(4,66,1<<30); System.out.println("bitmap 3: "+ewahBitmap3); System.out.println("bitmap 4: "+ewahBitmap4); andbitmap = EWAHCompressedBitmap.and(ewahBitmap1,ewahBitmap2, ewahBitmap3,ewahBitmap4); System.out.println("b1 AND b2 AND b3 AND b4: "+andbitmap); // serialization ByteArrayOutputStream bos = new ByteArrayOutputStream(); // Note: you could use a file output steam instead of ByteArrayOutputStream ObjectOutputStream oo = new ObjectOutputStream(bos); ewahBitmap1.writeExternal(oo); oo.close(); ewahBitmap1 = null; ewahBitmap1 = new EWAHCompressedBitmap(); ByteArrayInputStream bis = new ByteArrayInputStream(bos.toByteArray()); ewahBitmap1.readExternal(new ObjectInputStream(bis)); System.out.println("bitmap 1 (recovered) : "+ewahBitmap1); } } javaewah-JavaEWAH-0.7.9/pom.xml000066400000000000000000000103701224043567000161620ustar00rootroot00000000000000 4.0.0 com.googlecode.javaewah JavaEWAH 0.7.9 bundle 1.6 1.6 UTF-8 Apache 2 http://www.apache.org/licenses/LICENSE-2.0.txt repo A business-friendly OSS license scm:git:git@github.com:lemire/javaewah.git scm:git:git@github.com:lemire/javaewah.git scm:git:git@github.com:lemire/javaewah.git lemire Daniel Lemire lemire@gmail.com http://lemire.me/en/ LICEF Research Center http://licef.ca architect developer maintainer -5 http://lemire.me/fr/images/JPG/profile2011B_152.jpg junit junit 4.10 test Google Code Issue Tracking http://code.google.com/p/javaewah/issues/list org.sonatype.oss oss-parent 5 org.apache.felix maven-bundle-plugin 2.3.7 true com.googlecode.javaewah.* * org.apache.maven.plugins maven-gpg-plugin 1.4 sign-artifacts verify sign org.apache.maven.plugins maven-javadoc-plugin 2.8 attach-javadocs jar org.apache.maven.plugins maven-source-plugin 2.1.2 attach-sources jar JavaEWAH http://code.google.com/p/javaewah/ The bit array data structure is implemented in Java as the BitSet class. Unfortunately, this fails to scale without compression. JavaEWAH is a word-aligned compressed variant of the Java bitset class. It uses a 64-bit run-length encoding (RLE) compression scheme. The goal of word-aligned compression is not to achieve the best compression, but rather to improve query processing time. Hence, we try to save CPU cycles, maybe at the expense of storage. However, the EWAH scheme we implemented is always more efficient storage-wise than an uncompressed bitmap (implemented in Java as the BitSet class). Unlike some alternatives, javaewah does not rely on a patented scheme. javaewah-JavaEWAH-0.7.9/src/000077500000000000000000000000001224043567000154335ustar00rootroot00000000000000javaewah-JavaEWAH-0.7.9/src/main/000077500000000000000000000000001224043567000163575ustar00rootroot00000000000000javaewah-JavaEWAH-0.7.9/src/main/java/000077500000000000000000000000001224043567000173005ustar00rootroot00000000000000javaewah-JavaEWAH-0.7.9/src/main/java/com/000077500000000000000000000000001224043567000200565ustar00rootroot00000000000000javaewah-JavaEWAH-0.7.9/src/main/java/com/googlecode/000077500000000000000000000000001224043567000221655ustar00rootroot00000000000000javaewah-JavaEWAH-0.7.9/src/main/java/com/googlecode/javaewah/000077500000000000000000000000001224043567000237535ustar00rootroot00000000000000javaewah-JavaEWAH-0.7.9/src/main/java/com/googlecode/javaewah/BitCounter.java000066400000000000000000000045311224043567000266770ustar00rootroot00000000000000package com.googlecode.javaewah; /* * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser * Licensed under the Apache License, Version 2.0. */ /** * BitCounter is a fake bitset data structure. Instead of storing the actual * data, it only records the number of set bits. * * @since 0.4.0 * @author David McIntosh */ public final class BitCounter implements BitmapStorage { /** * Virtually add words directly to the bitmap * * @param newdata * the word */ @Override public void add(final long newdata) { this.oneBits += Long.bitCount(newdata); return; } /** * virtually add several literal words. * * @param data * the literal words * @param start * the starting point in the array * @param number * the number of literal words to add */ @Override public void addStreamOfLiteralWords(long[] data, int start, int number) { for (int i = start; i < start + number; i++) { add(data[i]); } return; } /** * virtually add many zeroes or ones. * * @param v * zeros or ones * @param number * how many to words add */ @Override public void addStreamOfEmptyWords(boolean v, long number) { if (v) { this.oneBits += number * EWAHCompressedBitmap.wordinbits; } return; } /** * virtually add several negated literal words. * * @param data * the literal words * @param start * the starting point in the array * @param number * the number of literal words to add */ // @Override : causes problems with Java 1.5 @Override public void addStreamOfNegatedLiteralWords(long[] data, int start, int number) { for (int i = start; i < start + number; i++) { add(~data[i]); } return; } /** * As you act on this class, it records the number of set (true) bits. * * @return number of set bits */ public int getCount() { return this.oneBits; } /** * should directly set the sizeinbits field, but is effectively ignored in * this class. * * @param bits * number of bits */ // @Override : causes problems with Java 1.5 @Override public void setSizeInBits(int bits) { // no action } private int oneBits; } javaewah-JavaEWAH-0.7.9/src/main/java/com/googlecode/javaewah/BitmapStorage.java000066400000000000000000000033551224043567000273650ustar00rootroot00000000000000package com.googlecode.javaewah; /* * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser * Licensed under the Apache License, Version 2.0. */ /** * Low level bitset writing methods. * * @since 0.4.0 * @author David McIntosh */ public interface BitmapStorage { /** * Adding words directly to the bitmap (for expert use). * * This is normally how you add data to the array. So you add bits in streams * of 8*8 bits. * * @param newdata * the word */ public void add(final long newdata); /** * if you have several literal words to copy over, this might be faster. * * @param data * the literal words * @param start * the starting point in the array * @param number * the number of literal words to add */ public void addStreamOfLiteralWords(final long[] data, final int start, final int number); /** * For experts: You want to add many zeroes or ones? This is the method you * use. * * @param v * zeros or ones * @param number * how many to words add */ public void addStreamOfEmptyWords(final boolean v, final long number); /** * Like "addStreamOfLiteralWords" but negates the words being added. * * @param data * the literal words * @param start * the starting point in the array * @param number * the number of literal words to add */ public void addStreamOfNegatedLiteralWords(long[] data, final int start, final int number); /** * directly set the sizeinbits field * * @param bits * number of bits */ public void setSizeInBits(final int bits); } javaewah-JavaEWAH-0.7.9/src/main/java/com/googlecode/javaewah/BufferedIterator.java000066400000000000000000000076731224043567000300670ustar00rootroot00000000000000package com.googlecode.javaewah; /* * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser * Licensed under the Apache License, Version 2.0. */ /** * This class can be used to iterate over blocks of bitmap data. * * @author Daniel Lemire * */ public class BufferedIterator implements IteratingRLW { /** * Instantiates a new iterating buffered running length word. * * @param iterator iterator */ public BufferedIterator(final CloneableIterator iterator) { this.masteriterator = iterator; if(this.masteriterator.hasNext()) { this.iterator = this.masteriterator.next(); this.brlw = new BufferedRunningLengthWord(this.iterator.next()); this.literalWordStartPosition = this.iterator.literalWords() + this.brlw.literalwordoffset; this.buffer = this.iterator.buffer(); } } /** * Discard first words, iterating to the next running length word if needed. * * @param x the number of words to be discarded */ @Override public void discardFirstWords(long x) { while (x > 0) { if (this.brlw.RunningLength > x) { this.brlw.RunningLength -= x; return; } x -= this.brlw.RunningLength; this.brlw.RunningLength = 0; long toDiscard = x > this.brlw.NumberOfLiteralWords ? this.brlw.NumberOfLiteralWords : x; this.literalWordStartPosition += toDiscard; this.brlw.NumberOfLiteralWords -= toDiscard; x -= toDiscard; if ((x > 0) || (this.brlw.size() == 0)) { if (!this.next()) { break; } } } } /** * Move to the next RunningLengthWord * @return whether the move was possible */ @Override public boolean next() { if (!this.iterator.hasNext()) { if(!reload()) { this.brlw.NumberOfLiteralWords = 0; this.brlw.RunningLength = 0; return false; } } this.brlw.reset(this.iterator.next()); this.literalWordStartPosition = this.iterator.literalWords(); // + this.brlw.literalwordoffset ==0 return true; } private boolean reload() { if(!this.masteriterator.hasNext()) { return false; } this.iterator = this.masteriterator.next(); this.buffer = this.iterator.buffer(); return true; } /** * Get the nth literal word for the current running length word * @param index zero based index * @return the literal word */ @Override public long getLiteralWordAt(int index) { return this.buffer[this.literalWordStartPosition + index]; } /** * Gets the number of literal words for the current running length word. * * @return the number of literal words */ @Override public int getNumberOfLiteralWords() { return this.brlw.NumberOfLiteralWords; } /** * Gets the running bit. * * @return the running bit */ @Override public boolean getRunningBit() { return this.brlw.RunningBit; } /** * Gets the running length. * * @return the running length */ @Override public long getRunningLength() { return this.brlw.RunningLength; } /** * Size in uncompressed words of the current running length word. * * @return the size */ @Override public long size() { return this.brlw.size(); } @Override public BufferedIterator clone() throws CloneNotSupportedException { BufferedIterator answer = (BufferedIterator) super.clone(); answer.brlw = this.brlw.clone(); answer.buffer = this.buffer; answer.iterator = this.iterator.clone(); answer.literalWordStartPosition = this.literalWordStartPosition; answer.masteriterator = this.masteriterator.clone(); return answer; } private BufferedRunningLengthWord brlw; private long[] buffer; private int literalWordStartPosition; private EWAHIterator iterator; private CloneableIterator masteriterator; }javaewah-JavaEWAH-0.7.9/src/main/java/com/googlecode/javaewah/BufferedRunningLengthWord.java000066400000000000000000000100601224043567000316740ustar00rootroot00000000000000package com.googlecode.javaewah; /* * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser * Licensed under the Apache License, Version 2.0. */ /** * Mostly for internal use. Similar to RunningLengthWord, but can * be modified without access to the array, and has faster access. * * @author Daniel Lemire * @since 0.1.0 * */ public final class BufferedRunningLengthWord implements Cloneable { /** * Instantiates a new buffered running length word. * * @param a the word */ public BufferedRunningLengthWord(final long a) { this.NumberOfLiteralWords = (int) (a >>> (1 + RunningLengthWord.runninglengthbits)); this.RunningBit = (a & 1) != 0; this.RunningLength = (int) ((a >>> 1) & RunningLengthWord.largestrunninglengthcount); } /** * Instantiates a new buffered running length word. * * @param rlw the rlw */ public BufferedRunningLengthWord(final RunningLengthWord rlw) { this(rlw.parent.buffer[rlw.position]); } /** * Discard first words. * * @param x the x */ public void discardFirstWords(long x) { if (this.RunningLength >= x) { this.RunningLength -= x; return; } x -= this.RunningLength; this.RunningLength = 0; this.literalwordoffset += x; this.NumberOfLiteralWords -= x; } /** * Gets the number of literal words. * * @return the number of literal words */ public int getNumberOfLiteralWords() { return this.NumberOfLiteralWords; } /** * Gets the running bit. * * @return the running bit */ public boolean getRunningBit() { return this.RunningBit; } /** * Gets the running length. * * @return the running length */ public long getRunningLength() { return this.RunningLength; } /** * Reset the values using the provided word. * * @param a the word */ public void reset(final long a) { this.NumberOfLiteralWords = (int) (a >>> (1 + RunningLengthWord.runninglengthbits)); this.RunningBit = (a & 1) != 0; this.RunningLength = (int) ((a >>> 1) & RunningLengthWord.largestrunninglengthcount); this.literalwordoffset = 0; } /** * Reset the values of this running length word so that it has the same values * as the other running length word. * * @param rlw the other running length word */ public void reset(final RunningLengthWord rlw) { reset(rlw.parent.buffer[rlw.position]); } /** * Sets the number of literal words. * * @param number the new number of literal words */ public void setNumberOfLiteralWords(final int number) { this.NumberOfLiteralWords = number; } /** * Sets the running bit. * * @param b the new running bit */ public void setRunningBit(final boolean b) { this.RunningBit = b; } /** * Sets the running length. * * @param number the new running length */ public void setRunningLength(final long number) { this.RunningLength = number; } /** * Size in uncompressed words. * * @return the long */ public long size() { return this.RunningLength + this.NumberOfLiteralWords; } /* * @see java.lang.Object#toString() */ @Override public String toString() { return "running bit = " + getRunningBit() + " running length = " + getRunningLength() + " number of lit. words " + getNumberOfLiteralWords(); } @Override public BufferedRunningLengthWord clone() throws CloneNotSupportedException { BufferedRunningLengthWord answer = (BufferedRunningLengthWord) super.clone(); answer.literalwordoffset = this.literalwordoffset; answer.NumberOfLiteralWords = this.NumberOfLiteralWords; answer.RunningBit = this.RunningBit; answer.RunningLength = this.RunningLength; return answer; } /** how many literal words have we read so far? */ public int literalwordoffset = 0; /** The Number of literal words. */ public int NumberOfLiteralWords; /** The Running bit. */ public boolean RunningBit; /** The Running length. */ public long RunningLength; }javaewah-JavaEWAH-0.7.9/src/main/java/com/googlecode/javaewah/CloneableIterator.java000066400000000000000000000010111224043567000302050ustar00rootroot00000000000000package com.googlecode.javaewah; /** * Like a standard Java iterator, except that you can clone it. * * @param the data type of the iterator */ public interface CloneableIterator extends Cloneable { /** * @return whether there is more */ public boolean hasNext(); /** * @return the next element */ public E next(); /** * @return a copy * @throws CloneNotSupportedException this should never happen in practice */ public CloneableIterator clone() throws CloneNotSupportedException; }javaewah-JavaEWAH-0.7.9/src/main/java/com/googlecode/javaewah/EWAHCompressedBitmap.java000066400000000000000000001527301224043567000305340ustar00rootroot00000000000000package com.googlecode.javaewah; /* * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser * Licensed under the Apache License, Version 2.0. */ import java.util.*; import java.io.*; /** *

* This implements the patent-free(1) EWAH scheme. Roughly speaking, it is a * 64-bit variant of the BBC compression scheme used by Oracle for its bitmap * indexes. *

* *

* The objective of this compression type is to provide some compression, while * reducing as much as possible the CPU cycle usage. *

* * *

* This implementation being 64-bit, it assumes a 64-bit CPU together with a * 64-bit Java Virtual Machine. This same code on a 32-bit machine may not be as * fast. *

* *

* There is also a 32-bit version of this code in the class * javaewah32.EWAHCompressedBitmap32 *

* * @see com.googlecode.javaewah32.EWAHCompressedBitmap32 * *

* For more details, see the following paper: *

* *
    *
  • Daniel Lemire, Owen Kaser, Kamel Aouiche, Sorting improves * word-aligned bitmap indexes. Data & Knowledge Engineering 69 (1), pages * 3-28, 2010. http://arxiv.org/abs/0901.3751
  • *
* *

* A 32-bit version of the compressed format was described by Wu et al. and * named WBC: *

* *
    *
  • K. Wu, E. J. Otoo, A. Shoshani, H. Nordberg, Notes on design and * implementation of compressed bit vectors, Tech. Rep. LBNL/PUB-3161, * Lawrence Berkeley National Laboratory, available from http://crd.lbl. * gov/~kewu/ps/PUB-3161.html (2001).
  • *
* *

* Probably, the best prior art is the Oracle bitmap compression scheme * (BBC): *

*
    *
  • G. Antoshenkov, Byte-Aligned Bitmap Compression, DCC'95, 1995.
  • *
* *

* 1- The authors do not know of any patent infringed by the following * implementation. However, similar schemes, like WAH are covered by * patents. *

* * @since 0.1.0 */ public final class EWAHCompressedBitmap implements Cloneable, Externalizable, Iterable, BitmapStorage, LogicalElement { /** * Creates an empty bitmap (no bit set to true). */ public EWAHCompressedBitmap() { this.buffer = new long[defaultbuffersize]; this.rlw = new RunningLengthWord(this, 0); } /** * Sets explicitly the buffer size (in 64-bit words). The initial memory usage * will be "buffersize * 64". For large poorly compressible bitmaps, using * large values may improve performance. * * @param buffersize * number of 64-bit words reserved when the object is created) */ public EWAHCompressedBitmap(final int buffersize) { this.buffer = new long[buffersize]; this.rlw = new RunningLengthWord(this, 0); } /** * Adding words directly to the bitmap (for expert use). * * This is normally how you add data to the array. So you add bits in streams * of 8*8 bits. * * Example: if you add 321, you are have added (in binary notation) * 0b101000001, so you have effectively called set(0), set(6), set(8) * in sequence. * * @param newdata * the word */ @Override public void add(final long newdata) { add(newdata, wordinbits); } /** * Adding words directly to the bitmap (for expert use). * * @param newdata * the word * @param bitsthatmatter * the number of significant bits (by default it should be 64) */ public void add(final long newdata, final int bitsthatmatter) { this.sizeinbits += bitsthatmatter; if (newdata == 0) { addEmptyWord(false); } else if (newdata == ~0l) { addEmptyWord(true); } else { addLiteralWord(newdata); } } /** * For internal use. * * @param v * the boolean value */ private void addEmptyWord(final boolean v) { final boolean noliteralword = (this.rlw.getNumberOfLiteralWords() == 0); final long runlen = this.rlw.getRunningLength(); if ((noliteralword) && (runlen == 0)) { this.rlw.setRunningBit(v); } if ((noliteralword) && (this.rlw.getRunningBit() == v) && (runlen < RunningLengthWord.largestrunninglengthcount)) { this.rlw.setRunningLength(runlen + 1); return; } push_back(0); this.rlw.position = this.actualsizeinwords - 1; this.rlw.setRunningBit(v); this.rlw.setRunningLength(1); return; } /** * For internal use. * * @param newdata * the literal word */ private void addLiteralWord(final long newdata) { final int numbersofar = this.rlw.getNumberOfLiteralWords(); if (numbersofar >= RunningLengthWord.largestliteralcount) { push_back(0); this.rlw.position = this.actualsizeinwords - 1; this.rlw.setNumberOfLiteralWords(1); push_back(newdata); } this.rlw.setNumberOfLiteralWords(numbersofar + 1); push_back(newdata); } /** * if you have several literal words to copy over, this might be faster. * * * @param data * the literal words * @param start * the starting point in the array * @param number * the number of literal words to add */ @Override public void addStreamOfLiteralWords(final long[] data, final int start, final int number) { int leftovernumber = number; while(leftovernumber > 0) { final int NumberOfLiteralWords = this.rlw.getNumberOfLiteralWords(); final int whatwecanadd = leftovernumber < RunningLengthWord.largestliteralcount - NumberOfLiteralWords ? leftovernumber : RunningLengthWord.largestliteralcount - NumberOfLiteralWords; this.rlw.setNumberOfLiteralWords(NumberOfLiteralWords + whatwecanadd); leftovernumber -= whatwecanadd; push_back(data, start, whatwecanadd); this.sizeinbits += whatwecanadd * wordinbits; if (leftovernumber > 0) { push_back(0); this.rlw.position = this.actualsizeinwords - 1; } } } /** * For experts: You want to add many zeroes or ones? This is the method you * use. * * @param v * the boolean value * @param number * the number */ @Override public void addStreamOfEmptyWords(final boolean v, long number) { if (number == 0) return; this.sizeinbits += number * wordinbits; if ((this.rlw.getRunningBit() != v) && (this.rlw.size() == 0)) { this.rlw.setRunningBit(v); } else if ((this.rlw.getNumberOfLiteralWords() != 0) || (this.rlw.getRunningBit() != v)) { push_back(0); this.rlw.position = this.actualsizeinwords - 1; if (v) this.rlw.setRunningBit(v); } final long runlen = this.rlw.getRunningLength(); final long whatwecanadd = number < RunningLengthWord.largestrunninglengthcount - runlen ? number : RunningLengthWord.largestrunninglengthcount - runlen; this.rlw.setRunningLength(runlen + whatwecanadd); number -= whatwecanadd; while (number >= RunningLengthWord.largestrunninglengthcount) { push_back(0); this.rlw.position = this.actualsizeinwords - 1; if (v) this.rlw.setRunningBit(v); this.rlw.setRunningLength(RunningLengthWord.largestrunninglengthcount); number -= RunningLengthWord.largestrunninglengthcount; } if (number > 0) { push_back(0); this.rlw.position = this.actualsizeinwords - 1; if (v) this.rlw.setRunningBit(v); this.rlw.setRunningLength(number); } } /** * Same as addStreamOfLiteralWords, but the words are negated. * * @param data * the literal words * @param start * the starting point in the array * @param number * the number of literal words to add */ @Override public void addStreamOfNegatedLiteralWords(final long[] data, final int start, final int number) { int leftovernumber = number; while (leftovernumber > 0) { final int NumberOfLiteralWords = this.rlw.getNumberOfLiteralWords(); final int whatwecanadd = leftovernumber < RunningLengthWord.largestliteralcount - NumberOfLiteralWords ? leftovernumber : RunningLengthWord.largestliteralcount - NumberOfLiteralWords; this.rlw.setNumberOfLiteralWords(NumberOfLiteralWords + whatwecanadd); leftovernumber -= whatwecanadd; negative_push_back(data, start, whatwecanadd); this.sizeinbits += whatwecanadd * wordinbits; if (leftovernumber > 0) { push_back(0); this.rlw.position = this.actualsizeinwords - 1; } } } /** * Returns a new compressed bitmap containing the bitwise AND values of the * current bitmap with some other bitmap. * * The running time is proportional to the sum of the compressed sizes (as * reported by sizeInBytes()). * * If you are not planning on adding to the resulting bitmap, you may call the trim() * method to reduce memory usage. * * @since 0.4.3 * @param a * the other bitmap * @return the EWAH compressed bitmap */ @Override public EWAHCompressedBitmap and(final EWAHCompressedBitmap a) { final EWAHCompressedBitmap container = new EWAHCompressedBitmap(); container .reserve(this.actualsizeinwords > a.actualsizeinwords ? this.actualsizeinwords : a.actualsizeinwords); andToContainer(a, container); return container; } /** * Computes new compressed bitmap containing the bitwise AND values of the * current bitmap with some other bitmap. * * The running time is proportional to the sum of the compressed sizes (as * reported by sizeInBytes()). * * @since 0.4.0 * @param a * the other bitmap * @param container * where we store the result */ public void andToContainer(final EWAHCompressedBitmap a, final BitmapStorage container) { final EWAHIterator i = a.getEWAHIterator(); final EWAHIterator j = getEWAHIterator(); final IteratingBufferedRunningLengthWord rlwi = new IteratingBufferedRunningLengthWord(i); final IteratingBufferedRunningLengthWord rlwj = new IteratingBufferedRunningLengthWord(j); while ((rlwi.size()>0) && (rlwj.size()>0)) { while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) { final boolean i_is_prey = rlwi.getRunningLength() < rlwj .getRunningLength(); final IteratingBufferedRunningLengthWord prey = i_is_prey ? rlwi : rlwj; final IteratingBufferedRunningLengthWord predator = i_is_prey ? rlwj : rlwi; if (predator.getRunningBit() == false) { container.addStreamOfEmptyWords(false, predator.getRunningLength()); prey.discardFirstWords(predator.getRunningLength()); predator.discardFirstWords(predator.getRunningLength()); } else { final long index = prey.discharge(container, predator.getRunningLength()); container.addStreamOfEmptyWords(false, predator.getRunningLength() - index); predator.discardFirstWords(predator.getRunningLength()); } } final int nbre_literal = Math.min(rlwi.getNumberOfLiteralWords(), rlwj.getNumberOfLiteralWords()); if (nbre_literal > 0) { for (int k = 0; k < nbre_literal; ++k) container.add(rlwi.getLiteralWordAt(k) & rlwj.getLiteralWordAt(k)); rlwi.discardFirstWords(nbre_literal); rlwj.discardFirstWords(nbre_literal); } } if(adjustContainerSizeWhenAggregating) { final boolean i_remains = rlwi.size()>0; final IteratingBufferedRunningLengthWord remaining = i_remains ? rlwi : rlwj; remaining.dischargeAsEmpty(container); container.setSizeInBits(Math.max(sizeInBits(), a.sizeInBits())); } } /** * Returns the cardinality of the result of a bitwise AND of the values of the * current bitmap with some other bitmap. Avoids needing to allocate an * intermediate bitmap to hold the result of the OR. * * @since 0.4.0 * @param a * the other bitmap * @return the cardinality */ public int andCardinality(final EWAHCompressedBitmap a) { final BitCounter counter = new BitCounter(); andToContainer(a, counter); return counter.getCount(); } /** * Returns a new compressed bitmap containing the bitwise AND NOT values of * the current bitmap with some other bitmap. * * The running time is proportional to the sum of the compressed sizes (as * reported by sizeInBytes()). * * If you are not planning on adding to the resulting bitmap, you may call the trim() * method to reduce memory usage. * * @param a * the other bitmap * @return the EWAH compressed bitmap */ @Override public EWAHCompressedBitmap andNot(final EWAHCompressedBitmap a) { final EWAHCompressedBitmap container = new EWAHCompressedBitmap(); container .reserve(this.actualsizeinwords > a.actualsizeinwords ? this.actualsizeinwords : a.actualsizeinwords); andNotToContainer(a, container); return container; } /** * Returns a new compressed bitmap containing the bitwise AND NOT values of * the current bitmap with some other bitmap. This method is expected to * be faster than doing A.and(B.clone().not()). * * The running time is proportional to the sum of the compressed sizes (as * reported by sizeInBytes()). * * @since 0.4.0 * @param a the other bitmap * @param container where to store the result */ public void andNotToContainer(final EWAHCompressedBitmap a, final BitmapStorage container) { final EWAHIterator i = getEWAHIterator(); final EWAHIterator j = a.getEWAHIterator(); final IteratingBufferedRunningLengthWord rlwi = new IteratingBufferedRunningLengthWord(i); final IteratingBufferedRunningLengthWord rlwj = new IteratingBufferedRunningLengthWord(j); while ((rlwi.size()>0) && (rlwj.size()>0)) { while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) { final boolean i_is_prey = rlwi.getRunningLength() < rlwj .getRunningLength(); final IteratingBufferedRunningLengthWord prey = i_is_prey ? rlwi : rlwj; final IteratingBufferedRunningLengthWord predator = i_is_prey ? rlwj : rlwi; if ( ((predator.getRunningBit() == true) && (i_is_prey)) || ((predator.getRunningBit() == false) && (!i_is_prey))){ container.addStreamOfEmptyWords(false, predator.getRunningLength()); prey.discardFirstWords(predator.getRunningLength()); predator.discardFirstWords(predator.getRunningLength()); } else if (i_is_prey) { long index = prey.discharge(container, predator.getRunningLength()); container.addStreamOfEmptyWords(false, predator.getRunningLength() - index); predator.discardFirstWords(predator.getRunningLength()); } else { long index = prey.dischargeNegated(container, predator.getRunningLength()); container.addStreamOfEmptyWords(true, predator.getRunningLength() - index); predator.discardFirstWords(predator.getRunningLength()); } } final int nbre_literal = Math.min(rlwi.getNumberOfLiteralWords(), rlwj.getNumberOfLiteralWords()); if (nbre_literal > 0) { for (int k = 0; k < nbre_literal; ++k) container.add(rlwi.getLiteralWordAt(k) & (~rlwj.getLiteralWordAt(k))); rlwi.discardFirstWords(nbre_literal); rlwj.discardFirstWords(nbre_literal); } } final boolean i_remains = rlwi.size()>0; final IteratingBufferedRunningLengthWord remaining = i_remains ? rlwi : rlwj; if(i_remains) remaining.discharge(container); else if(adjustContainerSizeWhenAggregating) remaining.dischargeAsEmpty(container); if(adjustContainerSizeWhenAggregating) container.setSizeInBits(Math.max(sizeInBits(), a.sizeInBits())); } /** * Returns the cardinality of the result of a bitwise AND NOT of the values of * the current bitmap with some other bitmap. Avoids needing to allocate an * intermediate bitmap to hold the result of the OR. * * @since 0.4.0 * @param a * the other bitmap * @return the cardinality */ public int andNotCardinality(final EWAHCompressedBitmap a) { final BitCounter counter = new BitCounter(); andNotToContainer(a, counter); return counter.getCount(); } /** * reports the number of bits set to true. Running time is proportional to * compressed size (as reported by sizeInBytes). * * @return the number of bits set to true */ public int cardinality() { int counter = 0; final EWAHIterator i = new EWAHIterator(this, this.actualsizeinwords); while (i.hasNext()) { RunningLengthWord localrlw = i.next(); if (localrlw.getRunningBit()) { counter += wordinbits * localrlw.getRunningLength(); } for (int j = 0; j < localrlw.getNumberOfLiteralWords(); ++j) { counter += Long.bitCount(i.buffer()[i.literalWords() + j]); } } return counter; } /** * Clear any set bits and set size in bits back to 0 */ public void clear() { this.sizeinbits = 0; this.actualsizeinwords = 1; this.rlw.position = 0; // buffer is not fully cleared but any new set operations should overwrite // stale data this.buffer[0] = 0; } /* * @see java.lang.Object#clone() */ @Override public EWAHCompressedBitmap clone() throws java.lang.CloneNotSupportedException { final EWAHCompressedBitmap clone = (EWAHCompressedBitmap) super.clone(); clone.buffer = this.buffer.clone(); clone.rlw = new RunningLengthWord(clone, this.rlw.position); clone.actualsizeinwords = this.actualsizeinwords; clone.sizeinbits = this.sizeinbits; return clone; } /** * Deserialize. * * @param in * the DataInput stream * @throws IOException * Signals that an I/O exception has occurred. */ public void deserialize(DataInput in) throws IOException { this.sizeinbits = in.readInt(); this.actualsizeinwords = in.readInt(); if (this.buffer.length < this.actualsizeinwords) { this.buffer = new long[this.actualsizeinwords]; } for (int k = 0; k < this.actualsizeinwords; ++k) this.buffer[k] = in.readLong(); this.rlw = new RunningLengthWord(this, in.readInt()); } /** * Check to see whether the two compressed bitmaps contain the same set bits. * * @see java.lang.Object#equals(java.lang.Object) */ @Override public boolean equals(Object o) { if (o instanceof EWAHCompressedBitmap) { try { this.xorToContainer((EWAHCompressedBitmap) o, new NonEmptyVirtualStorage()); return true; } catch (NonEmptyVirtualStorage.NonEmptyException e) { return false; } } return false; } /** * For experts: You want to add many zeroes or ones faster? * * This method does not update sizeinbits. * * @param v * the boolean value * @param number * the number (must be greater than 0) */ private void fastaddStreamOfEmptyWords(final boolean v, long number) { if ((this.rlw.getRunningBit() != v) && (this.rlw.size() == 0)) { this.rlw.setRunningBit(v); } else if ((this.rlw.getNumberOfLiteralWords() != 0) || (this.rlw.getRunningBit() != v)) { push_back(0); this.rlw.position = this.actualsizeinwords - 1; if (v) this.rlw.setRunningBit(v); } final long runlen = this.rlw.getRunningLength(); final long whatwecanadd = number < RunningLengthWord.largestrunninglengthcount - runlen ? number : RunningLengthWord.largestrunninglengthcount - runlen; this.rlw.setRunningLength(runlen + whatwecanadd); number -= whatwecanadd; while (number >= RunningLengthWord.largestrunninglengthcount) { push_back(0); this.rlw.position = this.actualsizeinwords - 1; if (v) this.rlw.setRunningBit(v); this.rlw.setRunningLength(RunningLengthWord.largestrunninglengthcount); number -= RunningLengthWord.largestrunninglengthcount; } if (number > 0) { push_back(0); this.rlw.position = this.actualsizeinwords - 1; if (v) this.rlw.setRunningBit(v); this.rlw.setRunningLength(number); } } /** * Gets an EWAHIterator over the data. This is a customized iterator which * iterates over run length word. For experts only. * * @return the EWAHIterator */ public EWAHIterator getEWAHIterator() { return new EWAHIterator(this, this.actualsizeinwords); } /** * @return the IteratingRLW iterator corresponding to this bitmap */ public IteratingRLW getIteratingRLW() { return new IteratingBufferedRunningLengthWord(this); } /** * get the locations of the true values as one vector. (may use more memory * than iterator()) * * @return the positions */ public List getPositions() { final ArrayList v = new ArrayList(); final EWAHIterator i = new EWAHIterator(this, this.actualsizeinwords); int pos = 0; while (i.hasNext()) { RunningLengthWord localrlw = i.next(); if (localrlw.getRunningBit()) { for (int j = 0; j < localrlw.getRunningLength(); ++j) { for (int c = 0; c < wordinbits; ++c) v.add(new Integer(pos++)); } } else { pos += wordinbits * localrlw.getRunningLength(); } for (int j = 0; j < localrlw.getNumberOfLiteralWords(); ++j) { long data = i.buffer()[i.literalWords() + j]; while (data != 0) { final int ntz = Long.numberOfTrailingZeros(data); data ^= (1l << ntz); v.add(new Integer(ntz + pos)); } pos += wordinbits; } } while ((v.size() > 0) && (v.get(v.size() - 1).intValue() >= this.sizeinbits)) v.remove(v.size() - 1); return v; } /** * Returns a customized hash code (based on Karp-Rabin). Naturally, if the * bitmaps are equal, they will hash to the same value. * */ @Override public int hashCode() { int karprabin = 0; final int B = 31; final EWAHIterator i = new EWAHIterator(this, this.actualsizeinwords); while( i.hasNext() ) { i.next(); if (i.rlw.getRunningBit() == true) { karprabin += B * karprabin + (i.rlw.getRunningLength() & ((1l << 32) - 1)); karprabin += B * karprabin + (i.rlw.getRunningLength() >>> 32); } for (int k = 0; k < i.rlw.getNumberOfLiteralWords(); ++k) { karprabin += B * karprabin + (this.buffer[i.literalWords() + k] & ((1l << 32) - 1)); karprabin += B * karprabin + (this.buffer[i.literalWords() + k] >>> 32); } } return karprabin; } /** * Return true if the two EWAHCompressedBitmap have both at least one true bit * in the same position. Equivalently, you could call "and" and check whether * there is a set bit, but intersects will run faster if you don't need the * result of the "and" operation. * * @since 0.3.2 * @param a * the other bitmap * @return whether they intersect */ public boolean intersects(final EWAHCompressedBitmap a) { NonEmptyVirtualStorage nevs = new NonEmptyVirtualStorage(); try { this.andToContainer(a, nevs); } catch (NonEmptyVirtualStorage.NonEmptyException nee) { return true; } return false; } /** * Iterator over the set bits (this is what most people will want to use to * browse the content if they want an iterator). The location of the set bits * is returned, in increasing order. * * @return the int iterator */ public IntIterator intIterator() { return new IntIteratorImpl( new EWAHIterator(this, this.actualsizeinwords)); } /** * iterate over the positions of the true values. This is similar to * intIterator(), but it uses Java generics. * * @return the iterator */ @Override public Iterator iterator() { return new Iterator() { @Override public boolean hasNext() { return this.under.hasNext(); } @Override public Integer next() { return new Integer(this.under.next()); } @Override public void remove() { throw new UnsupportedOperationException("bitsets do not support remove"); } final private IntIterator under = intIterator(); }; } /** * For internal use. * * @param data * the array of words to be added * @param start * the starting point * @param number * the number of words to add */ private void negative_push_back(final long[] data, final int start, final int number) { while (this.actualsizeinwords + number >= this.buffer.length) { final long oldbuffer[] = this.buffer; if((this.actualsizeinwords + number) < 32768) this.buffer = new long[ (this.actualsizeinwords + number) * 2]; else if((this.actualsizeinwords + number) * 3 / 2 < this.actualsizeinwords + number) // overflow this.buffer = new long[Integer.MAX_VALUE]; else this.buffer = new long[(this.actualsizeinwords + number) * 3 / 2]; System.arraycopy(oldbuffer, 0, this.buffer, 0, oldbuffer.length); this.rlw.parent.buffer = this.buffer; } for (int k = 0; k < number; ++k) this.buffer[this.actualsizeinwords + k] = ~data[start + k]; this.actualsizeinwords += number; } /** * Negate (bitwise) the current bitmap. To get a negated copy, do * EWAHCompressedBitmap x= ((EWAHCompressedBitmap) mybitmap.clone()); x.not(); * * The running time is proportional to the compressed size (as reported by * sizeInBytes()). * */ @Override public void not() { final EWAHIterator i = new EWAHIterator(this, this.actualsizeinwords); if (!i.hasNext()) return; while (true) { final RunningLengthWord rlw1 = i.next(); rlw1.setRunningBit(!rlw1.getRunningBit()); for (int j = 0; j < rlw1.getNumberOfLiteralWords(); ++j) { i.buffer()[i.literalWords() + j] = ~i.buffer()[i.literalWords() + j]; } if (!i.hasNext()) {// must potentially adjust the last literal word final int usedbitsinlast = this.sizeinbits % wordinbits; if (usedbitsinlast == 0) return; if (rlw1.getNumberOfLiteralWords() == 0) { if((rlw1.getRunningLength()>0) && (rlw1.getRunningBit())) { rlw1.setRunningLength(rlw1.getRunningLength()-1); this.addLiteralWord((~0l) >>> (wordinbits - usedbitsinlast)); } return; } i.buffer()[i.literalWords() + rlw1.getNumberOfLiteralWords() - 1] &= ((~0l) >>> (wordinbits - usedbitsinlast)); return; } } } /** * Returns a new compressed bitmap containing the bitwise OR values of the * current bitmap with some other bitmap. * * The running time is proportional to the sum of the compressed sizes (as * reported by sizeInBytes()). * * If you are not planning on adding to the resulting bitmap, you may call the trim() * method to reduce memory usage. * * @param a * the other bitmap * @return the EWAH compressed bitmap */ @Override public EWAHCompressedBitmap or(final EWAHCompressedBitmap a) { final EWAHCompressedBitmap container = new EWAHCompressedBitmap(); container.reserve(this.actualsizeinwords + a.actualsizeinwords); orToContainer(a, container); return container; } /** * Computes the bitwise or between the current bitmap and the bitmap "a". * Stores the result in the container. * * @since 0.4.0 * @param a * the other bitmap * @param container * where we store the result */ public void orToContainer(final EWAHCompressedBitmap a, final BitmapStorage container) { final EWAHIterator i = a.getEWAHIterator(); final EWAHIterator j = getEWAHIterator(); final IteratingBufferedRunningLengthWord rlwi = new IteratingBufferedRunningLengthWord(i); final IteratingBufferedRunningLengthWord rlwj = new IteratingBufferedRunningLengthWord(j); while ((rlwi.size()>0) && (rlwj.size()>0)) { while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) { final boolean i_is_prey = rlwi.getRunningLength() < rlwj .getRunningLength(); final IteratingBufferedRunningLengthWord prey = i_is_prey ? rlwi : rlwj; final IteratingBufferedRunningLengthWord predator = i_is_prey ? rlwj : rlwi; if (predator.getRunningBit() == true) { container.addStreamOfEmptyWords(true, predator.getRunningLength()); prey.discardFirstWords(predator.getRunningLength()); predator.discardFirstWords(predator.getRunningLength()); } else { long index = prey.discharge(container, predator.getRunningLength()); container.addStreamOfEmptyWords(false, predator.getRunningLength() - index); predator.discardFirstWords(predator.getRunningLength()); } } final int nbre_literal = Math.min(rlwi.getNumberOfLiteralWords(), rlwj.getNumberOfLiteralWords()); if (nbre_literal > 0) { for (int k = 0; k < nbre_literal; ++k) { container.add(rlwi.getLiteralWordAt(k) | rlwj.getLiteralWordAt(k)); } rlwi.discardFirstWords(nbre_literal); rlwj.discardFirstWords(nbre_literal); } } final boolean i_remains = rlwi.size()>0; final IteratingBufferedRunningLengthWord remaining = i_remains ? rlwi : rlwj; remaining.discharge(container); container.setSizeInBits(Math.max(sizeInBits(), a.sizeInBits())); } /** * Returns the cardinality of the result of a bitwise OR of the values of the * current bitmap with some other bitmap. Avoids needing to allocate an * intermediate bitmap to hold the result of the OR. * * @since 0.4.0 * @param a * the other bitmap * @return the cardinality */ public int orCardinality(final EWAHCompressedBitmap a) { final BitCounter counter = new BitCounter(); orToContainer(a, counter); return counter.getCount(); } /** * For internal use. * * @param data * the word to be added */ private void push_back(final long data) { if (this.actualsizeinwords == this.buffer.length) { final long oldbuffer[] = this.buffer; if(oldbuffer.length < 32768) this.buffer = new long[ oldbuffer.length * 2]; else if(oldbuffer.length * 3 / 2 < oldbuffer.length) // overflow this.buffer = new long[Integer.MAX_VALUE]; else this.buffer = new long[oldbuffer.length * 3 / 2]; System.arraycopy(oldbuffer, 0, this.buffer, 0, oldbuffer.length); this.rlw.parent.buffer = this.buffer; } this.buffer[this.actualsizeinwords++] = data; } /** * For internal use. * * @param data * the array of words to be added * @param start * the starting point * @param number * the number of words to add */ private void push_back(final long[] data, final int start, final int number) { if (this.actualsizeinwords + number >= this.buffer.length) { final long oldbuffer[] = this.buffer; if(this.actualsizeinwords + number < 32768) this.buffer = new long[(this.actualsizeinwords + number) * 2]; else if ((this.actualsizeinwords + number) * 3 / 2 < this.actualsizeinwords + number) // overflow this.buffer = new long[Integer.MAX_VALUE]; else this.buffer = new long[( this.actualsizeinwords + number) * 3 / 2]; System.arraycopy(oldbuffer, 0, this.buffer, 0, oldbuffer.length); this.rlw.parent.buffer = this.buffer; } System.arraycopy(data, start, this.buffer, this.actualsizeinwords, number); this.actualsizeinwords += number; } /* * @see java.io.Externalizable#readExternal(java.io.ObjectInput) */ @Override public void readExternal(ObjectInput in) throws IOException { deserialize(in); } /** * For internal use (trading off memory for speed). * * @param size * the number of words to allocate * @return True if the operation was a success. */ private boolean reserve(final int size) { if (size > this.buffer.length) { final long oldbuffer[] = this.buffer; this.buffer = new long[size]; System.arraycopy(oldbuffer, 0, this.buffer, 0, oldbuffer.length); this.rlw.parent.buffer = this.buffer; return true; } return false; } /** * Serialize. * * @param out * the DataOutput stream * @throws IOException * Signals that an I/O exception has occurred. */ public void serialize(DataOutput out) throws IOException { out.writeInt(this.sizeinbits); out.writeInt(this.actualsizeinwords); for (int k = 0; k < this.actualsizeinwords; ++k) out.writeLong(this.buffer[k]); out.writeInt(this.rlw.position); } /** * Report the size required to serialize this bitmap * * @return the size in bytes */ public int serializedSizeInBytes() { return this.sizeInBytes() + 3 * 4; } /** * Query the value of a single bit. Relying on this method when speed is * needed is discouraged. The complexity is linear with the size of the * bitmap. * * (This implementation is based on zhenjl's Go version of JavaEWAH.) * * @param i * the bit we are interested in * @return whether the bit is set to true */ public boolean get(final int i) { if ((i < 0) || (i >= this.sizeinbits)) return false; int WordChecked = 0; final IteratingRLW j = getIteratingRLW(); final int wordi = i/wordinbits; while (WordChecked <= wordi ) { WordChecked += j.getRunningLength(); if (wordi < WordChecked) { return j.getRunningBit(); } if (wordi < WordChecked + j.getNumberOfLiteralWords()) { final long w = j.getLiteralWordAt(wordi - WordChecked); return (w & (1l << i)) != 0; } WordChecked += j.getNumberOfLiteralWords(); j.next(); } return false; } /** * Set the bit at position i to true, the bits must be set in (strictly) increasing * order. For example, set(15) and then set(7) will fail. You must do set(7) * and then set(15). * * @param i * the index * @return true if the value was set (always true when i greater or equal to sizeInBits()). * @throws IndexOutOfBoundsException * if i is negative or greater than Integer.MAX_VALUE - 64 */ public boolean set(final int i) { if ((i > Integer.MAX_VALUE - wordinbits) || (i < 0)) throw new IndexOutOfBoundsException("Set values should be between 0 and " + (Integer.MAX_VALUE - wordinbits)); if (i < this.sizeinbits) return false; // distance in words: final int dist = (i + wordinbits) / wordinbits - (this.sizeinbits + wordinbits - 1) / wordinbits; this.sizeinbits = i + 1; if (dist > 0) {// easy if (dist > 1) fastaddStreamOfEmptyWords(false, dist - 1); addLiteralWord(1l << (i % wordinbits)); return true; } if (this.rlw.getNumberOfLiteralWords() == 0) { this.rlw.setRunningLength(this.rlw.getRunningLength() - 1); addLiteralWord(1l << (i % wordinbits)); return true; } this.buffer[this.actualsizeinwords - 1] |= 1l << (i % wordinbits); if (this.buffer[this.actualsizeinwords - 1] == ~0l) { this.buffer[this.actualsizeinwords - 1] = 0; --this.actualsizeinwords; this.rlw.setNumberOfLiteralWords(this.rlw.getNumberOfLiteralWords() - 1); // next we add one clean word addEmptyWord(true); } return true; } /** * Set the size in bits. This does not change the compressed bitmap. * * @since 0.4.0 */ @Override public void setSizeInBits(final int size) { if((size+EWAHCompressedBitmap.wordinbits-1)/EWAHCompressedBitmap.wordinbits!= (this.sizeinbits+EWAHCompressedBitmap.wordinbits-1)/EWAHCompressedBitmap.wordinbits) throw new RuntimeException("You can only reduce the size of the bitmap within the scope of the last word. To extend the bitmap, please call setSizeInbits(int,boolean)."); this.sizeinbits = size; } /** * Change the reported size in bits of the *uncompressed* bitmap represented * by this compressed bitmap. It may change the underlying compressed bitmap. * It is not possible to reduce the sizeInBits, but * it can be extended. The new bits are set to false or true depending on the * value of defaultvalue. * * @param size * the size in bits * @param defaultvalue * the default boolean value * @return true if the update was possible */ public boolean setSizeInBits(final int size, final boolean defaultvalue) { if (size < this.sizeinbits) return false; if (defaultvalue == false) extendEmptyBits(this, this.sizeinbits, size); else { // next bit could be optimized while (((this.sizeinbits % wordinbits) != 0) && (this.sizeinbits < size)) { this.set(this.sizeinbits); } this.addStreamOfEmptyWords(defaultvalue, (size / wordinbits) - this.sizeinbits / wordinbits); // next bit could be optimized while (this.sizeinbits < size) { this.set(this.sizeinbits); } } this.sizeinbits = size; return true; } /** * Returns the size in bits of the *uncompressed* bitmap represented by this * compressed bitmap. Initially, the sizeInBits is zero. It is extended * automatically when you set bits to true. * * @return the size in bits */ @Override public int sizeInBits() { return this.sizeinbits; } /** * Report the *compressed* size of the bitmap (equivalent to memory usage, * after accounting for some overhead). * * @return the size in bytes */ @Override public int sizeInBytes() { return this.actualsizeinwords * (wordinbits / 8); } /** * Populate an array of (sorted integers) corresponding to the location of the * set bits. * * @return the array containing the location of the set bits */ public int[] toArray() { int[] ans = new int[this.cardinality()]; int inanspos = 0; int pos = 0; final EWAHIterator i = new EWAHIterator(this, this.actualsizeinwords); while (i.hasNext()) { RunningLengthWord localrlw = i.next(); if (localrlw.getRunningBit()) { for (int j = 0; j < localrlw.getRunningLength(); ++j) { for (int c = 0; c < wordinbits; ++c) { ans[inanspos++] = pos++; } } } else { pos += wordinbits * localrlw.getRunningLength(); } for (int j = 0; j < localrlw.getNumberOfLiteralWords(); ++j) { long data = i.buffer()[i.literalWords() + j]; if (!usetrailingzeros) { for (int c = 0; c < wordinbits; ++c) { if ((data & (1l << c)) != 0) ans[inanspos++] = c + pos; } pos += wordinbits; } else { while (data != 0) { final int ntz = Long.numberOfTrailingZeros(data); data ^= (1l << ntz); ans[inanspos++] = ntz + pos; } pos += wordinbits; } } } return ans; } /** * A more detailed string describing the bitmap (useful for debugging). * * @return the string */ public String toDebugString() { String ans = " EWAHCompressedBitmap, size in bits = " + this.sizeinbits + " size in words = " + this.actualsizeinwords + "\n"; final EWAHIterator i = new EWAHIterator(this, this.actualsizeinwords); while (i.hasNext()) { RunningLengthWord localrlw = i.next(); if (localrlw.getRunningBit()) { ans += localrlw.getRunningLength() + " 1x11\n"; } else { ans += localrlw.getRunningLength() + " 0x00\n"; } ans += localrlw.getNumberOfLiteralWords() + " dirties\n"; for (int j = 0; j < localrlw.getNumberOfLiteralWords(); ++j) { long data = i.buffer()[i.literalWords() + j]; ans += "\t" + data + "\n"; } } return ans; } /** * A string describing the bitmap. * * @return the string */ @Override public String toString() { StringBuffer answer = new StringBuffer(); IntIterator i = this.intIterator(); answer.append("{"); if (i.hasNext()) answer.append(i.next()); while (i.hasNext()) { answer.append(","); answer.append(i.next()); } answer.append("}"); return answer.toString(); } /** * swap the content of the bitmap with another. * @param other bitmap to swap with */ public void swap(final EWAHCompressedBitmap other) { long[] tmp = this.buffer; this.buffer = other.buffer; other.buffer = tmp; int tmp2 = this.rlw.position; this.rlw.position = other.rlw.position; other.rlw.position = tmp2; int tmp3 = this.actualsizeinwords; this.actualsizeinwords = other.actualsizeinwords; other.actualsizeinwords = tmp3; int tmp4 = this.sizeinbits; this.sizeinbits = other.sizeinbits; other.sizeinbits = tmp4; } /** * Reduce the internal buffer to its minimal allowable size (given * by this.actualsizeinwords). This can free memory. */ public void trim() { this.buffer = Arrays.copyOf(this.buffer, this.actualsizeinwords); } /* * @see java.io.Externalizable#writeExternal(java.io.ObjectOutput) */ @Override public void writeExternal(ObjectOutput out) throws IOException { serialize(out); } /** * Returns a new compressed bitmap containing the bitwise XOR values of the * current bitmap with some other bitmap. * * The running time is proportional to the sum of the compressed sizes (as * reported by sizeInBytes()). * * If you are not planning on adding to the resulting bitmap, you may call the trim() * method to reduce memory usage. * * @param a * the other bitmap * @return the EWAH compressed bitmap */ @Override public EWAHCompressedBitmap xor(final EWAHCompressedBitmap a) { final EWAHCompressedBitmap container = new EWAHCompressedBitmap(); container.reserve(this.actualsizeinwords + a.actualsizeinwords); xorToContainer(a, container); return container; } /** * Computes a new compressed bitmap containing the bitwise XOR values of the * current bitmap with some other bitmap. * * The running time is proportional to the sum of the compressed sizes (as * reported by sizeInBytes()). * * @since 0.4.0 * @param a * the other bitmap * @param container * where we store the result */ public void xorToContainer(final EWAHCompressedBitmap a, final BitmapStorage container) { final EWAHIterator i = a.getEWAHIterator(); final EWAHIterator j = getEWAHIterator(); final IteratingBufferedRunningLengthWord rlwi = new IteratingBufferedRunningLengthWord(i); final IteratingBufferedRunningLengthWord rlwj = new IteratingBufferedRunningLengthWord(j); while ((rlwi.size()>0) && (rlwj.size()>0)) { while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) { final boolean i_is_prey = rlwi.getRunningLength() < rlwj .getRunningLength(); final IteratingBufferedRunningLengthWord prey = i_is_prey ? rlwi : rlwj; final IteratingBufferedRunningLengthWord predator = i_is_prey ? rlwj : rlwi; if (predator.getRunningBit() == false) { long index = prey.discharge(container, predator.getRunningLength()); container.addStreamOfEmptyWords(false, predator.getRunningLength() - index); predator.discardFirstWords(predator.getRunningLength()); } else { long index = prey.dischargeNegated(container, predator.getRunningLength()); container.addStreamOfEmptyWords(true, predator.getRunningLength() - index); predator.discardFirstWords(predator.getRunningLength()); } } final int nbre_literal = Math.min(rlwi.getNumberOfLiteralWords(), rlwj.getNumberOfLiteralWords()); if (nbre_literal > 0) { for (int k = 0; k < nbre_literal; ++k) container.add(rlwi.getLiteralWordAt(k) ^ rlwj.getLiteralWordAt(k)); rlwi.discardFirstWords(nbre_literal); rlwj.discardFirstWords(nbre_literal); } } final boolean i_remains = rlwi.size()>0; final IteratingBufferedRunningLengthWord remaining = i_remains ? rlwi : rlwj; remaining.discharge(container); container.setSizeInBits(Math.max(sizeInBits(), a.sizeInBits())); } /** * Returns the cardinality of the result of a bitwise XOR of the values of the * current bitmap with some other bitmap. Avoids needing to allocate an * intermediate bitmap to hold the result of the OR. * * @since 0.4.0 * @param a * the other bitmap * @return the cardinality */ public int xorCardinality(final EWAHCompressedBitmap a) { final BitCounter counter = new BitCounter(); xorToContainer(a, counter); return counter.getCount(); } /** * For internal use. Computes the bitwise and of the provided bitmaps and * stores the result in the container. * * @param container * where the result is stored * @param bitmaps * bitmaps to AND * @since 0.4.3 */ public static void andWithContainer(final BitmapStorage container, final EWAHCompressedBitmap... bitmaps) { if(bitmaps.length == 1) throw new IllegalArgumentException("Need at least one bitmap"); if(bitmaps.length == 2) { bitmaps[0].andToContainer(bitmaps[1],container); return; } EWAHCompressedBitmap answer = new EWAHCompressedBitmap(); EWAHCompressedBitmap tmp = new EWAHCompressedBitmap(); bitmaps[0].andToContainer(bitmaps[1], answer); for(int k = 2; k < bitmaps.length - 1; ++k) { answer.andToContainer(bitmaps[k], tmp); tmp.swap(answer); tmp.clear(); } answer.andToContainer(bitmaps[bitmaps.length - 1], container); } /** * Returns a new compressed bitmap containing the bitwise AND values of the * provided bitmaps. * * It may or may not be faster than doing the aggregation two-by-two (A.and(B).and(C)). * * If only one bitmap is provided, it is returned as is. * * If you are not planning on adding to the resulting bitmap, you may call the trim() * method to reduce memory usage. * * @since 0.4.3 * @param bitmaps * bitmaps to AND together * @return result of the AND */ public static EWAHCompressedBitmap and(final EWAHCompressedBitmap... bitmaps) { if(bitmaps.length == 1) return bitmaps[0]; if(bitmaps.length == 2) return bitmaps[0].and(bitmaps[1]); EWAHCompressedBitmap answer = new EWAHCompressedBitmap(); EWAHCompressedBitmap tmp = new EWAHCompressedBitmap(); bitmaps[0].andToContainer(bitmaps[1], answer); for(int k = 2; k < bitmaps.length; ++k) { answer.andToContainer(bitmaps[k], tmp); tmp.swap(answer); tmp.clear(); } return answer; } /** * Returns the cardinality of the result of a bitwise AND of the values of the * provided bitmaps. Avoids needing to allocate an intermediate bitmap to hold * the result of the AND. * * @since 0.4.3 * @param bitmaps * bitmaps to AND * @return the cardinality */ public static int andCardinality(final EWAHCompressedBitmap... bitmaps) { if(bitmaps.length == 1) return bitmaps[0].cardinality(); final BitCounter counter = new BitCounter(); andWithContainer(counter, bitmaps); return counter.getCount(); } /** * Return a bitmap with the bit set to true at the given * positions. The positions should be given in sorted order. * * (This is a convenience method.) * * @since 0.4.5 * @param setbits list of set bit positions * @return the bitmap */ public static EWAHCompressedBitmap bitmapOf(int ... setbits) { EWAHCompressedBitmap a = new EWAHCompressedBitmap(); for (int k : setbits) a.set(k); return a; } /** * For internal use. This simply adds a stream of words made of zeroes so that * we pad to the desired size. * * @param storage * bitmap to extend * @param currentSize * current size (in bits) * @param newSize * new desired size (in bits) * @since 0.4.3 */ private static void extendEmptyBits(final BitmapStorage storage, final int currentSize, final int newSize) { final int currentLeftover = currentSize % wordinbits; final int finalLeftover = newSize % wordinbits; storage.addStreamOfEmptyWords(false, (newSize / wordinbits) - currentSize / wordinbits + (finalLeftover != 0 ? 1 : 0) + (currentLeftover != 0 ? -1 : 0)); } /** * Uses an adaptive technique to compute the logical OR. * Mostly for internal use. * * @param container where the aggregate is written. * @param bitmaps to be aggregated */ public static void orWithContainer(final BitmapStorage container, final EWAHCompressedBitmap... bitmaps) { if (bitmaps.length < 2) throw new IllegalArgumentException("You should provide at least two bitmaps, provided "+bitmaps.length); long size = 0L; long sinbits = 0L; for (EWAHCompressedBitmap b : bitmaps) { size += b.sizeInBytes(); if (sinbits < b.sizeInBits()) sinbits = b.sizeInBits(); } if (size * 8 > sinbits) { FastAggregation.bufferedorWithContainer(container, 65536, bitmaps); } else { FastAggregation.orToContainer(container, bitmaps); } } /** * Uses an adaptive technique to compute the logical XOR. * Mostly for internal use. * * @param container where the aggregate is written. * @param bitmaps to be aggregated */ public static void xorWithContainer(final BitmapStorage container, final EWAHCompressedBitmap... bitmaps) { if (bitmaps.length < 2) throw new IllegalArgumentException("You should provide at least two bitmaps, provided "+bitmaps.length); long size = 0L; long sinbits = 0L; for (EWAHCompressedBitmap b : bitmaps) { size += b.sizeInBytes(); if (sinbits < b.sizeInBits()) sinbits = b.sizeInBits(); } if (size * 8 > sinbits) { FastAggregation.bufferedxorWithContainer(container, 65536, bitmaps); } else { FastAggregation.xorToContainer(container, bitmaps); } } /** * Returns a new compressed bitmap containing the bitwise OR values of the * provided bitmaps. This is typically faster than doing the aggregation * two-by-two (A.or(B).or(C).or(D)). * * If only one bitmap is provided, it is returned as is. * * If you are not planning on adding to the resulting bitmap, you may call the trim() * method to reduce memory usage. * * @since 0.4.0 * @param bitmaps * bitmaps to OR together * @return result of the OR */ public static EWAHCompressedBitmap or(final EWAHCompressedBitmap... bitmaps) { if(bitmaps.length == 1) return bitmaps[0]; final EWAHCompressedBitmap container = new EWAHCompressedBitmap(); int largestSize = 0; for (EWAHCompressedBitmap bitmap : bitmaps) { largestSize = Math.max(bitmap.actualsizeinwords, largestSize); } container.reserve((int) (largestSize * 1.5)); orWithContainer(container, bitmaps); return container; } /** * Returns a new compressed bitmap containing the bitwise XOR values of the * provided bitmaps. This is typically faster than doing the aggregation * two-by-two (A.xor(B).xor(C).xor(D)). * * If only one bitmap is provided, it is returned as is. * * If you are not planning on adding to the resulting bitmap, you may call the trim() * method to reduce memory usage. * * @param bitmaps * bitmaps to XOR together * @return result of the XOR */ public static EWAHCompressedBitmap xor(final EWAHCompressedBitmap... bitmaps) { if(bitmaps.length == 1) return bitmaps[0]; final EWAHCompressedBitmap container = new EWAHCompressedBitmap(); int largestSize = 0; for (EWAHCompressedBitmap bitmap : bitmaps) { largestSize = Math.max(bitmap.actualsizeinwords, largestSize); } container.reserve((int) (largestSize * 1.5)); xorWithContainer(container, bitmaps); return container; } /** * Returns the cardinality of the result of a bitwise OR of the values of the * provided bitmaps. Avoids needing to allocate an intermediate bitmap to hold * the result of the OR. * * @since 0.4.0 * @param bitmaps * bitmaps to OR * @return the cardinality */ public static int orCardinality(final EWAHCompressedBitmap... bitmaps) { if(bitmaps.length == 1) return bitmaps[0].cardinality(); final BitCounter counter = new BitCounter(); orWithContainer(counter, bitmaps); return counter.getCount(); } /** The actual size in words. */ int actualsizeinwords = 1; /** The buffer (array of 64-bit words) */ long buffer[] = null; /** The current (last) running length word. */ RunningLengthWord rlw = null; /** sizeinbits: number of bits in the (uncompressed) bitmap. */ int sizeinbits = 0; /** * The Constant defaultbuffersize: default memory allocation when the object * is constructed. */ static final int defaultbuffersize = 4; /** optimization option **/ public static final boolean usetrailingzeros = true; /** whether we adjust after some aggregation by adding in zeroes **/ public static final boolean adjustContainerSizeWhenAggregating = true; /** The Constant wordinbits represents the number of bits in a long. */ public static final int wordinbits = 64; } javaewah-JavaEWAH-0.7.9/src/main/java/com/googlecode/javaewah/EWAHIterator.java000066400000000000000000000044501224043567000270570ustar00rootroot00000000000000package com.googlecode.javaewah; /* * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser * Licensed under the Apache License, Version 2.0. */ /** * The class EWAHIterator represents a special type of * efficient iterator iterating over (uncompressed) words of bits. * It is not meant for end users. * @author Daniel Lemire * @since 0.1.0 * */ public final class EWAHIterator implements Cloneable { /** * Instantiates a new EWAH iterator. * * @param a the array of words * @param sizeinwords the number of words that are significant in the array of words */ public EWAHIterator(final EWAHCompressedBitmap a, final int sizeinwords) { this.rlw = new RunningLengthWord(a, 0); this.size = sizeinwords; this.pointer = 0; } /** * Allow expert developers to instantiate an EWAHIterator. * * @param bitmap we want to iterate over * @return an iterator */ public static EWAHIterator getEWAHIterator(EWAHCompressedBitmap bitmap) { return bitmap.getEWAHIterator(); } /** * Access to the array of words * * @return the long[] */ public long[] buffer() { return this.rlw.parent.buffer; } /** * Position of the literal words represented by this running length word. * * @return the int */ public int literalWords() { return this.pointer - this.rlw.getNumberOfLiteralWords(); } /** * Checks for next. * * @return true, if successful */ public boolean hasNext() { return this.pointer < this.size; } /** * Next running length word. * * @return the running length word */ public RunningLengthWord next() { this.rlw.position = this.pointer; this.pointer += this.rlw.getNumberOfLiteralWords() + 1; return this.rlw; } @Override public EWAHIterator clone() throws CloneNotSupportedException { EWAHIterator ans = (EWAHIterator) super.clone(); ans.rlw = this.rlw.clone(); ans.size = this.size; ans.pointer = this.pointer; return ans; } /** The pointer represent the location of the current running length * word in the array of words (embedded in the rlw attribute). */ int pointer; /** The current running length word. */ RunningLengthWord rlw; /** The size in words. */ int size; } javaewah-JavaEWAH-0.7.9/src/main/java/com/googlecode/javaewah/FastAggregation.java000066400000000000000000000333761224043567000276770ustar00rootroot00000000000000package com.googlecode.javaewah; import java.util.Arrays; import java.util.Comparator; import java.util.PriorityQueue; /* * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser * Licensed under the Apache License, Version 2.0. */ /** * Fast algorithms to aggregate many bitmaps. These algorithms are just given as * reference. They may not be faster than the corresponding methods in the * EWAHCompressedBitmap class. * * @author Daniel Lemire * */ public class FastAggregation { /** * Compute the and aggregate using a temporary uncompressed bitmap. * @param bitmaps the source bitmaps * @param bufsize buffer size used during the computation in 64-bit words (per input bitmap) * @return the or aggregate. */ public static EWAHCompressedBitmap bufferedand(final int bufsize, final EWAHCompressedBitmap... bitmaps) { EWAHCompressedBitmap answer = new EWAHCompressedBitmap(); bufferedandWithContainer(answer,bufsize, bitmaps); return answer; } /** * Compute the and aggregate using a temporary uncompressed bitmap. * * @param container where the aggregate is written * @param bufsize buffer size used during the computation in 64-bit words (per input bitmap) * @param bitmaps the source bitmaps */ public static void bufferedandWithContainer(final BitmapStorage container,final int bufsize, final EWAHCompressedBitmap... bitmaps) { java.util.LinkedList al = new java.util.LinkedList(); for (EWAHCompressedBitmap bitmap : bitmaps) { al.add(new IteratingBufferedRunningLengthWord(bitmap)); } long[] hardbitmap = new long[bufsize*bitmaps.length]; for(IteratingRLW i : al) if (i.size() == 0) { al.clear(); break; } while (!al.isEmpty()) { Arrays.fill(hardbitmap, ~0l); long effective = Integer.MAX_VALUE; for(IteratingRLW i : al) { int eff = IteratorAggregation.inplaceand(hardbitmap, i); if (eff < effective) effective = eff; } for (int k = 0; k < effective; ++k) container.add(hardbitmap[k]); for(IteratingRLW i : al) if (i.size() == 0) { al.clear(); break; } } } /** * Compute the or aggregate using a temporary uncompressed bitmap. * @param bitmaps the source bitmaps * @param bufsize buffer size used during the computation in 64-bit words * @return the or aggregate. */ public static EWAHCompressedBitmap bufferedor(final int bufsize, final EWAHCompressedBitmap... bitmaps) { EWAHCompressedBitmap answer = new EWAHCompressedBitmap(); bufferedorWithContainer(answer, bufsize, bitmaps); return answer; } /** * Compute the or aggregate using a temporary uncompressed bitmap. * * @param container where the aggregate is written * @param bufsize buffer size used during the computation in 64-bit words * @param bitmaps the source bitmaps */ public static void bufferedorWithContainer(final BitmapStorage container, final int bufsize, final EWAHCompressedBitmap... bitmaps) { int range = 0; EWAHCompressedBitmap[] sbitmaps = bitmaps.clone(); Arrays.sort(sbitmaps, new Comparator() { @Override public int compare(EWAHCompressedBitmap a, EWAHCompressedBitmap b) { return b.sizeinbits - a.sizeinbits; } }); java.util.ArrayList al = new java.util.ArrayList(); for (EWAHCompressedBitmap bitmap : sbitmaps) { if (bitmap.sizeinbits > range) range = bitmap.sizeinbits; al.add(new IteratingBufferedRunningLengthWord(bitmap)); } long[] hardbitmap = new long[bufsize]; int maxr = al.size(); while (maxr > 0) { long effective = 0; for (int k = 0; k < maxr; ++k) { if (al.get(k).size() > 0) { int eff = IteratorAggregation.inplaceor(hardbitmap, al.get(k)); if (eff > effective) effective = eff; } else maxr = k; } for (int k = 0; k < effective; ++k) container.add(hardbitmap[k]); Arrays.fill(hardbitmap, 0); } container.setSizeInBits(range); } /** * Compute the xor aggregate using a temporary uncompressed bitmap. * @param bitmaps the source bitmaps * @param bufsize buffer size used during the computation in 64-bit words * @return the xor aggregate. */ public static EWAHCompressedBitmap bufferedxor(final int bufsize, final EWAHCompressedBitmap... bitmaps) { EWAHCompressedBitmap answer = new EWAHCompressedBitmap(); bufferedxorWithContainer(answer, bufsize,bitmaps); return answer; } /** * Compute the xor aggregate using a temporary uncompressed bitmap. * * @param container where the aggregate is written * @param bufsize buffer size used during the computation in 64-bit words * @param bitmaps the source bitmaps */ public static void bufferedxorWithContainer(final BitmapStorage container, final int bufsize, final EWAHCompressedBitmap... bitmaps) { int range = 0; EWAHCompressedBitmap[] sbitmaps = bitmaps.clone(); Arrays.sort(sbitmaps, new Comparator() { @Override public int compare(EWAHCompressedBitmap a, EWAHCompressedBitmap b) { return b.sizeinbits - a.sizeinbits; } }); java.util.ArrayList al = new java.util.ArrayList(); for (EWAHCompressedBitmap bitmap : sbitmaps) { if (bitmap.sizeinbits > range) range = bitmap.sizeinbits; al.add(new IteratingBufferedRunningLengthWord(bitmap)); } long[] hardbitmap = new long[bufsize]; int maxr = al.size(); while (maxr > 0) { long effective = 0; for (int k = 0; k < maxr; ++k) { if (al.get(k).size() > 0) { int eff = IteratorAggregation.inplacexor(hardbitmap, al.get(k)); if (eff > effective) effective = eff; } else maxr = k; } for (int k = 0; k < effective; ++k) container.add(hardbitmap[k]); Arrays.fill(hardbitmap, 0); } container.setSizeInBits(range); } /** * Uses a priority queue to compute the or aggregate. * @param a class extending LogicalElement (like a compressed bitmap) * @param bitmaps * bitmaps to be aggregated * @return the or aggregate */ @SuppressWarnings({ "rawtypes", "unchecked" }) public static T or(T... bitmaps) { PriorityQueue pq = new PriorityQueue(bitmaps.length, new Comparator() { @Override public int compare(T a, T b) { return a.sizeInBytes() - b.sizeInBytes(); } }); for (T x : bitmaps) { pq.add(x); } while (pq.size() > 1) { T x1 = pq.poll(); T x2 = pq.poll(); pq.add((T) x1.or(x2)); } return pq.poll(); } /** * Uses a priority queue to compute the or aggregate. * @param container where we write the result * @param bitmaps to be aggregated */ public static void orToContainer(final BitmapStorage container, final EWAHCompressedBitmap ... bitmaps) { if(bitmaps.length < 2) throw new IllegalArgumentException("We need at least two bitmaps"); PriorityQueue pq = new PriorityQueue(bitmaps.length, new Comparator() { @Override public int compare(EWAHCompressedBitmap a, EWAHCompressedBitmap b) { return a.sizeInBytes() - b.sizeInBytes(); } }); for (EWAHCompressedBitmap x : bitmaps) { pq.add(x); } while (pq.size() > 2) { EWAHCompressedBitmap x1 = pq.poll(); EWAHCompressedBitmap x2 = pq.poll(); pq.add(x1.or(x2)); } pq.poll().orToContainer(pq.poll(), container); } /** * Uses a priority queue to compute the xor aggregate. * * @param a class extending LogicalElement (like a compressed bitmap) * @param bitmaps * bitmaps to be aggregated * @return the xor aggregate */ @SuppressWarnings({ "rawtypes", "unchecked" }) public static T xor(T... bitmaps) { PriorityQueue pq = new PriorityQueue(bitmaps.length, new Comparator() { @Override public int compare(T a, T b) { return a.sizeInBytes() - b.sizeInBytes(); } }); for (T x : bitmaps) pq.add(x); while (pq.size() > 1) { T x1 = pq.poll(); T x2 = pq.poll(); pq.add((T) x1.xor(x2)); } return pq.poll(); } /** * Uses a priority queue to compute the xor aggregate. * @param container where we write the result * @param bitmaps to be aggregated */ public static void xorToContainer(final BitmapStorage container, final EWAHCompressedBitmap ... bitmaps) { if(bitmaps.length < 2) throw new IllegalArgumentException("We need at least two bitmaps"); PriorityQueue pq = new PriorityQueue(bitmaps.length, new Comparator() { @Override public int compare(EWAHCompressedBitmap a, EWAHCompressedBitmap b) { return a.sizeInBytes() - b.sizeInBytes(); } }); for (EWAHCompressedBitmap x : bitmaps) { pq.add(x); } while (pq.size() > 2) { EWAHCompressedBitmap x1 = pq.poll(); EWAHCompressedBitmap x2 = pq.poll(); pq.add(x1.xor(x2)); } pq.poll().xorToContainer(pq.poll(), container); } /** * For internal use. Computes the bitwise or of the provided bitmaps and * stores the result in the container. (This used to be the default.) * * @deprecated use EWAHCompressedBitmap.or instead * @since 0.4.0 * @param container where store the result * @param bitmaps to be aggregated */ @Deprecated public static void legacy_orWithContainer(final BitmapStorage container, final EWAHCompressedBitmap... bitmaps) { if (bitmaps.length == 2) { // should be more efficient bitmaps[0].orToContainer(bitmaps[1], container); return; } // Sort the bitmaps in descending order by sizeinbits. We will exhaust the // sorted bitmaps from right to left. final EWAHCompressedBitmap[] sortedBitmaps = bitmaps.clone(); Arrays.sort(sortedBitmaps, new Comparator() { @Override public int compare(EWAHCompressedBitmap a, EWAHCompressedBitmap b) { return a.sizeinbits < b.sizeinbits ? 1 : a.sizeinbits == b.sizeinbits ? 0 : -1; } }); final IteratingBufferedRunningLengthWord[] rlws = new IteratingBufferedRunningLengthWord[bitmaps.length]; int maxAvailablePos = 0; for (EWAHCompressedBitmap bitmap : sortedBitmaps) { EWAHIterator iterator = bitmap.getEWAHIterator(); if (iterator.hasNext()) { rlws[maxAvailablePos++] = new IteratingBufferedRunningLengthWord( iterator); } } if (maxAvailablePos == 0) { // this never happens... container.setSizeInBits(0); return; } int maxSize = sortedBitmaps[0].sizeinbits; while (true) { long maxOneRl = 0; long minZeroRl = Long.MAX_VALUE; long minSize = Long.MAX_VALUE; int numEmptyRl = 0; for (int i = 0; i < maxAvailablePos; i++) { IteratingBufferedRunningLengthWord rlw = rlws[i]; long size = rlw.size(); if (size == 0) { maxAvailablePos = i; break; } minSize = Math.min(minSize, size); if (rlw.getRunningBit()) { long rl = rlw.getRunningLength(); maxOneRl = Math.max(maxOneRl, rl); minZeroRl = 0; if (rl == 0 && size > 0) { numEmptyRl++; } } else { long rl = rlw.getRunningLength(); minZeroRl = Math.min(minZeroRl, rl); if (rl == 0 && size > 0) { numEmptyRl++; } } } if (maxAvailablePos == 0) { break; } else if (maxAvailablePos == 1) { // only one bitmap is left so just write the rest of it out rlws[0].discharge(container); break; } if (maxOneRl > 0) { container.addStreamOfEmptyWords(true, maxOneRl); for (int i = 0; i < maxAvailablePos; i++) { IteratingBufferedRunningLengthWord rlw = rlws[i]; rlw.discardFirstWords(maxOneRl); } } else if (minZeroRl > 0) { container.addStreamOfEmptyWords(false, minZeroRl); for (int i = 0; i < maxAvailablePos; i++) { IteratingBufferedRunningLengthWord rlw = rlws[i]; rlw.discardFirstWords(minZeroRl); } } else { int index = 0; if (numEmptyRl == 1) { // if one rlw has literal words to process and the rest have a run of // 0's we can write them out here IteratingBufferedRunningLengthWord emptyRl = null; long minNonEmptyRl = Long.MAX_VALUE; for (int i = 0; i < maxAvailablePos; i++) { IteratingBufferedRunningLengthWord rlw = rlws[i]; long rl = rlw.getRunningLength(); if (rl == 0) { assert emptyRl == null; emptyRl = rlw; } else { minNonEmptyRl = Math.min(minNonEmptyRl, rl); } } long wordsToWrite = minNonEmptyRl > minSize ? minSize : minNonEmptyRl; if (emptyRl != null) emptyRl.writeLiteralWords((int) wordsToWrite, container); index += wordsToWrite; } while (index < minSize) { long word = 0; for (int i = 0; i < maxAvailablePos; i++) { IteratingBufferedRunningLengthWord rlw = rlws[i]; if (rlw.getRunningLength() <= index) { word |= rlw.getLiteralWordAt(index - (int) rlw.getRunningLength()); } } container.add(word); index++; } for (int i = 0; i < maxAvailablePos; i++) { IteratingBufferedRunningLengthWord rlw = rlws[i]; rlw.discardFirstWords(minSize); } } } container.setSizeInBits(maxSize); } } javaewah-JavaEWAH-0.7.9/src/main/java/com/googlecode/javaewah/IntIterator.java000066400000000000000000000011271224043567000270630ustar00rootroot00000000000000package com.googlecode.javaewah; /* * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser * Licensed under the Apache License, Version 2.0. */ /** * * The IntIterator interface is used to iterate over a stream of integers. * * @author Daniel Lemire * @since 0.2.0 * */ public interface IntIterator { /** * Is there more? * * @return true, if there is more, false otherwise */ public boolean hasNext(); /** * Return the next integer * * @return the integer */ public int next(); } javaewah-JavaEWAH-0.7.9/src/main/java/com/googlecode/javaewah/IntIteratorImpl.java000066400000000000000000000043361224043567000277120ustar00rootroot00000000000000package com.googlecode.javaewah; /* * Copyright 2012, Google Inc. * Licensed under the Apache License, Version 2.0. */ import static com.googlecode.javaewah.EWAHCompressedBitmap.wordinbits; /** * The IntIteratorImpl is the 64 bit implementation of the * IntIterator interface, which efficiently returns the stream of integers * represented by an EWAHIterator. * * @author Colby Ranger * @since 0.5.6 */ final class IntIteratorImpl implements IntIterator { private final EWAHIterator ewahIter; private final long[] ewahBuffer; private int position; private int runningLength; private long word; private int wordPosition; private int wordLength; private int literalPosition; private boolean hasnext; IntIteratorImpl(EWAHIterator ewahIter) { this.ewahIter = ewahIter; this.ewahBuffer = ewahIter.buffer(); this.hasnext = this.moveToNext(); } public final boolean moveToNext() { while (!runningHasNext() && !literalHasNext()) { if (!this.ewahIter.hasNext()) { return false; } setRunningLengthWord(this.ewahIter.next()); } return true; } @Override public boolean hasNext() { return this.hasnext; } @Override public final int next() { final int answer; if (runningHasNext()) { answer = this.position++; } else { final int bit = Long.numberOfTrailingZeros(this.word); this.word ^= (1l << bit); answer = this.literalPosition + bit; } this.hasnext = this.moveToNext(); return answer; } private final void setRunningLengthWord(RunningLengthWord rlw) { this.runningLength = wordinbits * (int) rlw.getRunningLength() + this.position; if (!rlw.getRunningBit()) { this.position = this.runningLength; } this.wordPosition = this.ewahIter.literalWords(); this.wordLength = this.wordPosition + rlw.getNumberOfLiteralWords(); } private final boolean runningHasNext() { return this.position < this.runningLength; } private final boolean literalHasNext() { while (this.word == 0 && this.wordPosition < this.wordLength) { this.word = this.ewahBuffer[this.wordPosition++]; this.literalPosition = this.position; this.position += wordinbits; } return this.word != 0; } } javaewah-JavaEWAH-0.7.9/src/main/java/com/googlecode/javaewah/IntIteratorOverIteratingRLW.java000066400000000000000000000044421224043567000321560ustar00rootroot00000000000000package com.googlecode.javaewah; import static com.googlecode.javaewah.EWAHCompressedBitmap.wordinbits; /* * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser * Licensed under the Apache License, Version 2.0. */ /** * Implementation of an IntIterator over an IteratingRLW. * * */ public class IntIteratorOverIteratingRLW implements IntIterator { IteratingRLW parent; private int position; private int runningLength; private long word; private int wordPosition; private int wordLength; private int literalPosition; private boolean hasnext; /** * @param p iterator we wish to iterate over */ public IntIteratorOverIteratingRLW(final IteratingRLW p) { this.parent = p; this.position = 0; setupForCurrentRunningLengthWord(); this.hasnext = moveToNext(); } /** * @return whether we could find another set bit; don't move if there is an unprocessed value */ private final boolean moveToNext() { while (!runningHasNext() && !literalHasNext()) { if (this.parent.next()) setupForCurrentRunningLengthWord(); else return false; } return true; } @Override public boolean hasNext() { return this.hasnext; } @Override public final int next() { final int answer; if (runningHasNext()) { answer = this.position++; } else { final int bit = Long.numberOfTrailingZeros(this.word); this.word ^= (1l << bit); answer = this.literalPosition + bit; } this.hasnext = this.moveToNext(); return answer; } private final void setupForCurrentRunningLengthWord() { this.runningLength = wordinbits * (int) this.parent.getRunningLength() + this.position; if (!this.parent.getRunningBit()) { this.position = this.runningLength; } this.wordPosition = 0; this.wordLength = this.parent.getNumberOfLiteralWords(); } private final boolean runningHasNext() { return this.position < this.runningLength; } private final boolean literalHasNext() { while (this.word == 0 && this.wordPosition < this.wordLength) { this.word = this.parent.getLiteralWordAt(this.wordPosition++); this.literalPosition = this.position; this.position += wordinbits; } return this.word != 0; } } IteratingBufferedRunningLengthWord.java000066400000000000000000000200511224043567000334650ustar00rootroot00000000000000javaewah-JavaEWAH-0.7.9/src/main/java/com/googlecode/javaewahpackage com.googlecode.javaewah; /* * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser * Licensed under the Apache License, Version 2.0. */ /** * Mostly for internal use. Similar to BufferedRunningLengthWord, but automatically * advances to the next BufferedRunningLengthWord as words are discarded. * * @since 0.4.0 * @author David McIntosh */ public final class IteratingBufferedRunningLengthWord implements IteratingRLW, Cloneable{ /** * Instantiates a new iterating buffered running length word. * * @param iterator iterator */ public IteratingBufferedRunningLengthWord(final EWAHIterator iterator) { this.iterator = iterator; this.brlw = new BufferedRunningLengthWord(this.iterator.next()); this.literalWordStartPosition = this.iterator.literalWords() + this.brlw.literalwordoffset; this.buffer = this.iterator.buffer(); } /** * Instantiates a new iterating buffered running length word. * @param bitmap over which we want to iterate * */ public IteratingBufferedRunningLengthWord(final EWAHCompressedBitmap bitmap) { this.iterator = EWAHIterator.getEWAHIterator(bitmap); this.brlw = new BufferedRunningLengthWord(this.iterator.next()); this.literalWordStartPosition = this.iterator.literalWords() + this.brlw.literalwordoffset; this.buffer = this.iterator.buffer(); } /** * Discard first words, iterating to the next running length word if needed. * * @param x the number of words to be discarded */ @Override public void discardFirstWords(long x) { while (x > 0) { if (this.brlw.RunningLength > x) { this.brlw.RunningLength -= x; return; } x -= this.brlw.RunningLength; this.brlw.RunningLength = 0; long toDiscard = x > this.brlw.NumberOfLiteralWords ? this.brlw.NumberOfLiteralWords : x; this.literalWordStartPosition += toDiscard; this.brlw.NumberOfLiteralWords -= toDiscard; x -= toDiscard; if ((x > 0) || (this.brlw.size() == 0)) { if (!this.iterator.hasNext()) { break; } this.brlw.reset(this.iterator.next()); this.literalWordStartPosition = this.iterator.literalWords(); // + this.brlw.literalwordoffset ==0 } } } /** * Move to the next RunningLengthWord * @return whether the move was possible */ @Override public boolean next() { if (!this.iterator.hasNext()) { this.brlw.NumberOfLiteralWords = 0; this.brlw.RunningLength = 0; return false; } this.brlw.reset(this.iterator.next()); this.literalWordStartPosition = this.iterator.literalWords(); // + this.brlw.literalwordoffset ==0 return true; } /** * Write out up to max words, returns how many were written * @param container target for writes * @param max maximal number of writes * @return how many written */ public long discharge(BitmapStorage container, long max) { long index = 0; while ((index < max) && (size() > 0)) { // first run long pl = getRunningLength(); if (index + pl > max) { pl = max - index; } container.addStreamOfEmptyWords(getRunningBit(), pl); index += pl; int pd = getNumberOfLiteralWords(); if (pd + index > max) { pd = (int) (max - index); } writeLiteralWords(pd, container); discardFirstWords(pl+pd); index += pd; } return index; } /** * Write out up to max words (negated), returns how many were written * @param container target for writes * @param max maximal number of writes * @return how many written */ public long dischargeNegated(BitmapStorage container, long max) { long index = 0; while ((index < max) && (size() > 0)) { // first run long pl = getRunningLength(); if (index + pl > max) { pl = max - index; } container.addStreamOfEmptyWords(!getRunningBit(), pl); index += pl; int pd = getNumberOfLiteralWords(); if (pd + index > max) { pd = (int) (max - index); } writeNegatedLiteralWords(pd, container); discardFirstWords(pl+pd); index += pd; } return index; } /** * Write out the remain words, transforming them to zeroes. * @param container target for writes */ public void dischargeAsEmpty(BitmapStorage container) { while(size()>0) { container.addStreamOfEmptyWords(false, size()); discardFirstWords(size()); } } /** * Write out the remaining words * @param container target for writes */ public void discharge(BitmapStorage container) { this.brlw.literalwordoffset = this.literalWordStartPosition - this.iterator.literalWords(); discharge(this.brlw, this.iterator, container); } /** * Get the nth literal word for the current running length word * @param index zero based index * @return the literal word */ @Override public long getLiteralWordAt(int index) { return this.buffer[this.literalWordStartPosition + index]; } /** * Gets the number of literal words for the current running length word. * * @return the number of literal words */ @Override public int getNumberOfLiteralWords() { return this.brlw.NumberOfLiteralWords; } /** * Gets the running bit. * * @return the running bit */ @Override public boolean getRunningBit() { return this.brlw.RunningBit; } /** * Gets the running length. * * @return the running length */ @Override public long getRunningLength() { return this.brlw.RunningLength; } /** * Size in uncompressed words of the current running length word. * * @return the long */ @Override public long size() { return this.brlw.size(); } /** * write the first N literal words to the target bitmap. Does not discard the words or perform iteration. * @param numWords number of words to be written * @param container where we write */ public void writeLiteralWords(int numWords, BitmapStorage container) { container.addStreamOfLiteralWords(this.buffer, this.literalWordStartPosition, numWords); } /** * write the first N literal words (negated) to the target bitmap. Does not discard the words or perform iteration. * @param numWords number of words to be written * @param container where we write */ public void writeNegatedLiteralWords(int numWords, BitmapStorage container) { container.addStreamOfNegatedLiteralWords(this.buffer, this.literalWordStartPosition, numWords); } /** * For internal use. (One could use the non-static discharge method instead, * but we expect them to be slower.) * * @param initialWord * the initial word * @param iterator * the iterator * @param container * the container */ private static void discharge(final BufferedRunningLengthWord initialWord, final EWAHIterator iterator, final BitmapStorage container) { BufferedRunningLengthWord runningLengthWord = initialWord; for (;;) { final long runningLength = runningLengthWord.getRunningLength(); container.addStreamOfEmptyWords(runningLengthWord.getRunningBit(), runningLength); container.addStreamOfLiteralWords(iterator.buffer(), iterator.literalWords() + runningLengthWord.literalwordoffset, runningLengthWord.getNumberOfLiteralWords()); if (!iterator.hasNext()) break; runningLengthWord = new BufferedRunningLengthWord(iterator.next()); } } @Override public IteratingBufferedRunningLengthWord clone() throws CloneNotSupportedException { IteratingBufferedRunningLengthWord answer = (IteratingBufferedRunningLengthWord) super.clone(); answer.brlw = this.brlw.clone(); answer.buffer = this.buffer; answer.iterator = this.iterator.clone(); answer.literalWordStartPosition = this.literalWordStartPosition; return answer; } private BufferedRunningLengthWord brlw; private long[] buffer; private int literalWordStartPosition; private EWAHIterator iterator; } javaewah-JavaEWAH-0.7.9/src/main/java/com/googlecode/javaewah/IteratingRLW.java000066400000000000000000000022651224043567000271360ustar00rootroot00000000000000package com.googlecode.javaewah; /* * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser * Licensed under the Apache License, Version 2.0. */ /** * High-level iterator over a compressed bitmap. * */ public interface IteratingRLW { /** * @return whether there is more */ public boolean next() ; /** * @param index where the literal word is * @return the literal word at the given index. */ public long getLiteralWordAt(int index); /** * @return the number of literal (non-fill) words */ public int getNumberOfLiteralWords() ; /** * @return the bit used for the fill bits */ public boolean getRunningBit() ; /** * @return sum of getRunningLength() and getNumberOfLiteralWords() */ public long size() ; /** * @return length of the run of fill words */ public long getRunningLength() ; /** * @param x the number of words to discard */ public void discardFirstWords(long x); /** * @return a copy of the iterator * @throws CloneNotSupportedException this should not be thrown in theory */ public IteratingRLW clone() throws CloneNotSupportedException; } javaewah-JavaEWAH-0.7.9/src/main/java/com/googlecode/javaewah/IteratorAggregation.java000066400000000000000000000433541224043567000305700ustar00rootroot00000000000000package com.googlecode.javaewah; import java.util.Arrays; import java.util.Iterator; import java.util.LinkedList; /* * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser * Licensed under the Apache License, Version 2.0. */ /** * Set of helper functions to aggregate bitmaps. * */ public class IteratorAggregation { /** * @param x iterator to negate * @return negated version of the iterator */ public static IteratingRLW not(final IteratingRLW x) { return new IteratingRLW() { @Override public boolean next() { return x.next(); } @Override public long getLiteralWordAt(int index) { return ~x.getLiteralWordAt(index); } @Override public int getNumberOfLiteralWords() { return x.getNumberOfLiteralWords(); } @Override public boolean getRunningBit() { return ! x.getRunningBit(); } @Override public long size() { return x.size(); } @Override public long getRunningLength() { return x.getRunningLength(); } @Override public void discardFirstWords(long y) { x.discardFirstWords(y); } @Override public IteratingRLW clone() throws CloneNotSupportedException { throw new CloneNotSupportedException(); } }; } /** * Aggregate the iterators using a bitmap buffer. * * @param al set of iterators to aggregate * @return and aggregate */ public static IteratingRLW bufferedand(final IteratingRLW... al) { return bufferedand(DEFAULTMAXBUFSIZE,al); } /** * Aggregate the iterators using a bitmap buffer. * * @param al set of iterators to aggregate * @param bufsize size of the internal buffer used by the iterator in 64-bit words (per input iterator) * @return and aggregate */ public static IteratingRLW bufferedand(final int bufsize, final IteratingRLW... al) { if (al.length == 0) throw new IllegalArgumentException("Need at least one iterator"); if (al.length == 1) return al[0]; final LinkedList basell = new LinkedList(); for (IteratingRLW i : al) basell.add(i); return new BufferedIterator(new BufferedAndIterator(basell,bufsize)); } /** * Aggregate the iterators using a bitmap buffer. * * @param al set of iterators to aggregate * @return or aggregate */ public static IteratingRLW bufferedor(final IteratingRLW... al) { return bufferedor(DEFAULTMAXBUFSIZE,al); } /** * Aggregate the iterators using a bitmap buffer. * * @param al iterators to aggregate * @param bufsize size of the internal buffer used by the iterator in 64-bit words * @return or aggregate */ public static IteratingRLW bufferedor(final int bufsize, final IteratingRLW... al) { if (al.length == 0) throw new IllegalArgumentException("Need at least one iterator"); if (al.length == 1) return al[0]; final LinkedList basell = new LinkedList(); for (IteratingRLW i : al) basell.add(i); return new BufferedIterator(new BufferedORIterator(basell,bufsize)); } /** * Aggregate the iterators using a bitmap buffer. * * @param al set of iterators to aggregate * @return xor aggregate */ public static IteratingRLW bufferedxor(final IteratingRLW... al) { return bufferedxor(DEFAULTMAXBUFSIZE,al); } /** * Aggregate the iterators using a bitmap buffer. * * @param al iterators to aggregate * @param bufsize size of the internal buffer used by the iterator in 64-bit words * @return xor aggregate */ public static IteratingRLW bufferedxor(final int bufsize, final IteratingRLW... al) { if (al.length == 0) throw new IllegalArgumentException("Need at least one iterator"); if (al.length == 1) return al[0]; final LinkedList basell = new LinkedList(); for (IteratingRLW i : al) basell.add(i); return new BufferedIterator(new BufferedXORIterator(basell, bufsize)); } /** * Write out the content of the iterator, but as if it were all zeros. * * @param container * where we write * @param i * the iterator */ protected static void dischargeAsEmpty(final BitmapStorage container, final IteratingRLW i) { while (i.size() > 0) { container.addStreamOfEmptyWords(false, i.size()); i.next(); } } /** * Write out up to max words, returns how many were written * @param container target for writes * @param i source of data * @param max maximal number of writes * @return how many written */ protected static long discharge(final BitmapStorage container, IteratingRLW i, long max) { long counter = 0; while (i.size() > 0 && counter < max) { long L1 = i.getRunningLength(); if (L1 > 0) { if (L1 + counter > max) L1 = max - counter; container.addStreamOfEmptyWords(i.getRunningBit(), L1); counter += L1; } long L = i.getNumberOfLiteralWords(); if(L + counter > max) L = max - counter; for (int k = 0; k < L; ++k) { container.add(i.getLiteralWordAt(k)); } counter += L; i.discardFirstWords(L+L1); } return counter; } /** * Write out up to max negated words, returns how many were written * @param container target for writes * @param i source of data * @param max maximal number of writes * @return how many written */ protected static long dischargeNegated(final BitmapStorage container, IteratingRLW i, long max) { long counter = 0; while (i.size() > 0 && counter < max) { long L1 = i.getRunningLength(); if (L1 > 0) { if (L1 + counter > max) L1 = max - counter; container.addStreamOfEmptyWords(!i.getRunningBit(), L1); counter += L1; } long L = i.getNumberOfLiteralWords(); if(L + counter > max) L = max - counter; for (int k = 0; k < L; ++k) { container.add(~i.getLiteralWordAt(k)); } counter += L; i.discardFirstWords(L+L1); } return counter; } static void andToContainer(final BitmapStorage container, int desiredrlwcount, final IteratingRLW rlwi, IteratingRLW rlwj) { while ((rlwi.size()>0) && (rlwj.size()>0) && (desiredrlwcount-- >0) ) { while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) { final boolean i_is_prey = rlwi.getRunningLength() < rlwj .getRunningLength(); final IteratingRLW prey = i_is_prey ? rlwi : rlwj; final IteratingRLW predator = i_is_prey ? rlwj : rlwi; if (predator.getRunningBit() == false) { container.addStreamOfEmptyWords(false, predator.getRunningLength()); prey.discardFirstWords(predator.getRunningLength()); predator.discardFirstWords(predator.getRunningLength()); } else { final long index = discharge(container, prey, predator.getRunningLength()); container.addStreamOfEmptyWords(false, predator.getRunningLength() - index); predator.discardFirstWords(predator.getRunningLength()); } } final int nbre_literal = Math.min(rlwi.getNumberOfLiteralWords(), rlwj.getNumberOfLiteralWords()); if (nbre_literal > 0) { desiredrlwcount -= nbre_literal; for (int k = 0; k < nbre_literal; ++k) container.add(rlwi.getLiteralWordAt(k) & rlwj.getLiteralWordAt(k)); rlwi.discardFirstWords(nbre_literal); rlwj.discardFirstWords(nbre_literal); } } } static void andToContainer(final BitmapStorage container, final IteratingRLW rlwi, IteratingRLW rlwj) { while ((rlwi.size()>0) && (rlwj.size()>0) ) { while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) { final boolean i_is_prey = rlwi.getRunningLength() < rlwj .getRunningLength(); final IteratingRLW prey = i_is_prey ? rlwi : rlwj; final IteratingRLW predator = i_is_prey ? rlwj : rlwi; if (predator.getRunningBit() == false) { container.addStreamOfEmptyWords(false, predator.getRunningLength()); prey.discardFirstWords(predator.getRunningLength()); predator.discardFirstWords(predator.getRunningLength()); } else { final long index = discharge(container, prey, predator.getRunningLength()); container.addStreamOfEmptyWords(false, predator.getRunningLength() - index); predator.discardFirstWords(predator.getRunningLength()); } } final int nbre_literal = Math.min(rlwi.getNumberOfLiteralWords(), rlwj.getNumberOfLiteralWords()); if (nbre_literal > 0) { for (int k = 0; k < nbre_literal; ++k) container.add(rlwi.getLiteralWordAt(k) & rlwj.getLiteralWordAt(k)); rlwi.discardFirstWords(nbre_literal); rlwj.discardFirstWords(nbre_literal); } } } /** * Compute the first few words of the XOR aggregate between two iterators. * * @param container where to write * @param desiredrlwcount number of words to be written (max) * @param rlwi first iterator to aggregate * @param rlwj second iterator to aggregate */ public static void xorToContainer(final BitmapStorage container, int desiredrlwcount, final IteratingRLW rlwi, final IteratingRLW rlwj) { while ((rlwi.size()>0) && (rlwj.size()>0) && (desiredrlwcount-- >0) ) { while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) { final boolean i_is_prey = rlwi.getRunningLength() < rlwj .getRunningLength(); final IteratingRLW prey = i_is_prey ? rlwi : rlwj; final IteratingRLW predator = i_is_prey ? rlwj : rlwi; if (predator.getRunningBit() == false) { long index = discharge(container, prey, predator.getRunningLength()); container.addStreamOfEmptyWords(false, predator.getRunningLength() - index); predator.discardFirstWords(predator.getRunningLength()); } else { long index = dischargeNegated(container, prey, predator.getRunningLength()); container.addStreamOfEmptyWords(true, predator.getRunningLength() - index); predator.discardFirstWords(predator.getRunningLength()); } } final int nbre_literal = Math.min(rlwi.getNumberOfLiteralWords(), rlwj.getNumberOfLiteralWords()); if (nbre_literal > 0) { desiredrlwcount -= nbre_literal; for (int k = 0; k < nbre_literal; ++k) container.add(rlwi.getLiteralWordAt(k) ^ rlwj.getLiteralWordAt(k)); rlwi.discardFirstWords(nbre_literal); rlwj.discardFirstWords(nbre_literal); } } } protected static int inplaceor(long[] bitmap, IteratingRLW i) { int pos = 0; long s; while ((s = i.size()) > 0) { if (pos + s < bitmap.length) { final int L = (int) i.getRunningLength(); if (i.getRunningBit()) java.util.Arrays.fill(bitmap, pos, pos + L, ~0l); pos += L; final int LR = i.getNumberOfLiteralWords(); for (int k = 0; k < LR; ++k) bitmap[pos++] |= i.getLiteralWordAt(k); if (!i.next()) { return pos; } } else { int howmany = bitmap.length - pos; int L = (int) i.getRunningLength(); if (pos + L > bitmap.length) { if (i.getRunningBit()) { java.util.Arrays.fill(bitmap, pos, bitmap.length, ~0l); } i.discardFirstWords(howmany); return bitmap.length; } if (i.getRunningBit()) java.util.Arrays.fill(bitmap, pos, pos + L, ~0l); pos += L; for (int k = 0; pos < bitmap.length; ++k) bitmap[pos++] |= i.getLiteralWordAt(k); i.discardFirstWords(howmany); return pos; } } return pos; } protected static int inplacexor(long[] bitmap, IteratingRLW i) { int pos = 0; long s; while ((s = i.size()) > 0) { if (pos + s < bitmap.length) { final int L = (int) i.getRunningLength(); if (i.getRunningBit()) { for(int k = pos ; k < pos + L; ++k) bitmap[k] = ~bitmap[k]; } pos += L; final int LR = i.getNumberOfLiteralWords(); for (int k = 0; k < LR; ++k) bitmap[pos++] ^= i.getLiteralWordAt(k); if (!i.next()) { return pos; } } else { int howmany = bitmap.length - pos; int L = (int) i.getRunningLength(); if (pos + L > bitmap.length) { if (i.getRunningBit()) { for(int k = pos ; k < bitmap.length; ++k) bitmap[k] = ~bitmap[k]; } i.discardFirstWords(howmany); return bitmap.length; } if (i.getRunningBit()) for(int k = pos ; k < pos + L; ++k) bitmap[k] = ~bitmap[k]; pos += L; for (int k = 0; pos < bitmap.length; ++k) bitmap[pos++] ^= i.getLiteralWordAt(k); i.discardFirstWords(howmany); return pos; } } return pos; } protected static int inplaceand(long[] bitmap, IteratingRLW i) { int pos = 0; long s; while ((s = i.size()) > 0) { if (pos + s < bitmap.length) { final int L = (int) i.getRunningLength(); if (!i.getRunningBit()) { for(int k = pos ; k < pos + L; ++k) bitmap[k] = 0; } pos += L; final int LR = i.getNumberOfLiteralWords(); for (int k = 0; k < LR; ++k) bitmap[pos++] &= i.getLiteralWordAt(k); if (!i.next()) { return pos; } } else { int howmany = bitmap.length - pos; int L = (int) i.getRunningLength(); if (pos + L > bitmap.length) { if (!i.getRunningBit()) { for(int k = pos ; k < bitmap.length; ++k) bitmap[k] = 0; } i.discardFirstWords(howmany); return bitmap.length; } if (!i.getRunningBit()) for(int k = pos ; k < pos + L; ++k) bitmap[k] = 0; pos += L; for (int k = 0; pos < bitmap.length; ++k) bitmap[pos++] &= i.getLiteralWordAt(k); i.discardFirstWords(howmany); return pos; } } return pos; } /** * An optimization option. Larger values may improve speed, but at * the expense of memory. */ public final static int DEFAULTMAXBUFSIZE = 65536; } class BufferedORIterator implements CloneableIterator { EWAHCompressedBitmap buffer = new EWAHCompressedBitmap(); long[] hardbitmap; LinkedList ll; int buffersize; BufferedORIterator(LinkedList basell, int bufsize) { this.ll = basell; this.hardbitmap = new long[bufsize]; } @Override public BufferedXORIterator clone() throws CloneNotSupportedException { BufferedXORIterator answer = (BufferedXORIterator) super.clone(); answer.buffer = this.buffer.clone(); answer.hardbitmap = this.hardbitmap.clone(); answer.ll = (LinkedList) this.ll.clone(); return answer; } @Override public boolean hasNext() { return !this.ll.isEmpty(); } @Override public EWAHIterator next() { this.buffer.clear(); long effective = 0; Iterator i = this.ll.iterator(); while (i.hasNext()) { IteratingRLW rlw = i.next(); if (rlw.size() > 0) { int eff = IteratorAggregation.inplaceor(this.hardbitmap, rlw); if (eff > effective) effective = eff; } else i.remove(); } for (int k = 0; k < effective; ++k) { this.buffer.add(this.hardbitmap[k]); } Arrays.fill(this.hardbitmap, 0); return this.buffer.getEWAHIterator(); } } class BufferedXORIterator implements CloneableIterator { EWAHCompressedBitmap buffer = new EWAHCompressedBitmap(); long[] hardbitmap; LinkedList ll; int buffersize; BufferedXORIterator(LinkedList basell, int bufsize) { this.ll = basell; this.hardbitmap = new long[bufsize]; } @Override public BufferedXORIterator clone() throws CloneNotSupportedException { BufferedXORIterator answer = (BufferedXORIterator) super.clone(); answer.buffer = this.buffer.clone(); answer.hardbitmap = this.hardbitmap.clone(); answer.ll = (LinkedList) this.ll.clone(); return answer; } @Override public boolean hasNext() { return !this.ll.isEmpty(); } @Override public EWAHIterator next() { this.buffer.clear(); long effective = 0; Iterator i = this.ll.iterator(); while (i.hasNext()) { IteratingRLW rlw = i.next(); if (rlw.size() > 0) { int eff = IteratorAggregation.inplacexor(this.hardbitmap, rlw); if (eff > effective) effective = eff; } else i.remove(); } for (int k = 0; k < effective; ++k) this.buffer.add(this.hardbitmap[k]); Arrays.fill(this.hardbitmap, 0); return this.buffer.getEWAHIterator(); } } class BufferedAndIterator implements CloneableIterator { EWAHCompressedBitmap buffer = new EWAHCompressedBitmap(); LinkedList ll; int buffersize; public BufferedAndIterator(LinkedList basell, int bufsize) { this.ll = basell; this.buffersize = bufsize; } @Override public boolean hasNext() { return !this.ll.isEmpty(); } @Override public BufferedAndIterator clone() throws CloneNotSupportedException { BufferedAndIterator answer = (BufferedAndIterator) super.clone(); answer.buffer = this.buffer.clone(); answer.ll = (LinkedList) this.ll.clone(); return answer; } @Override public EWAHIterator next() { this.buffer.clear(); IteratorAggregation.andToContainer(this.buffer, this.buffersize * this.ll.size(), this.ll.get(0), this.ll.get(1)); if (this.ll.size() > 2) { Iterator i = this.ll.iterator(); i.next(); i.next(); EWAHCompressedBitmap tmpbuffer = new EWAHCompressedBitmap(); while (i.hasNext() && this.buffer.sizeInBytes() > 0) { IteratorAggregation.andToContainer(tmpbuffer, this.buffer.getIteratingRLW(), i.next()); this.buffer.swap(tmpbuffer); tmpbuffer.clear(); } } Iterator i = this.ll.iterator(); while(i.hasNext()) { if(i.next().size() == 0) { this.ll.clear(); break; } } return this.buffer.getEWAHIterator(); } } javaewah-JavaEWAH-0.7.9/src/main/java/com/googlecode/javaewah/IteratorUtil.java000066400000000000000000000066061224043567000272550ustar00rootroot00000000000000package com.googlecode.javaewah; import java.util.Iterator; /* * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser * Licensed under the Apache License, Version 2.0. */ /** * Convenience functions for working over iterators * */ public class IteratorUtil { /** * @param i iterator we wish to iterate over * @return an iterator over the set bits corresponding to the iterator */ public static IntIterator toSetBitsIntIterator(final IteratingRLW i) { return new IntIteratorOverIteratingRLW(i); } /** * @param i iterator we wish to iterate over * @return an iterator over the set bits corresponding to the iterator */ public static Iterator toSetBitsIterator(final IteratingRLW i) { return new Iterator() { @Override public boolean hasNext() { return this.under.hasNext(); } @Override public Integer next() { return new Integer(this.under.next()); } @Override public void remove() { } final private IntIterator under = toSetBitsIntIterator(i); }; } /** * Generate a bitmap from an iterator * * @param i iterator we wish to materialize * @param c where we write */ public static void materialize(final IteratingRLW i, final BitmapStorage c) { while (true) { if (i.getRunningLength() > 0) { c.addStreamOfEmptyWords(i.getRunningBit(), i.getRunningLength()); } for (int k = 0; k < i.getNumberOfLiteralWords(); ++k) c.add(i.getLiteralWordAt(k)); if (!i.next()) break; } } /** * @param i iterator we wish to iterate over * @return the cardinality (number of set bits) corresponding to the iterator */ public static int cardinality(final IteratingRLW i) { int answer = 0; while (true) { if(i.getRunningBit()) answer += i.getRunningLength() * EWAHCompressedBitmap.wordinbits; for (int k = 0; k < i.getNumberOfLiteralWords(); ++k) answer += Long.bitCount(i.getLiteralWordAt(k)); if(!i.next()) break; } return answer; } /** * @param x set of bitmaps * @return an array of iterators corresponding to the array of bitmaps */ public static IteratingRLW[] toIterators(final EWAHCompressedBitmap... x) { IteratingRLW[] X = new IteratingRLW[x.length]; for (int k = 0; k < X.length; ++k) { X[k] = new IteratingBufferedRunningLengthWord(x[k]); } return X; } /** * Turn an iterator into a bitmap. * * @param i iterator we wish to materialize * @param c where we write * @param Max maximum number of words we wish to materialize * @return how many words were actually materialized */ public static long materialize(final IteratingRLW i, final BitmapStorage c, long Max) { final long origMax = Max; while (true) { if (i.getRunningLength() > 0) { long L = i.getRunningLength(); if(L > Max) L = Max; c.addStreamOfEmptyWords(i.getRunningBit(), L); Max -= L; } long L = i.getNumberOfLiteralWords(); for (int k = 0; k < L; ++k) c.add(i.getLiteralWordAt(k)); if(Max>0) { if (!i.next()) break; } else break; } return origMax - Max; } /** * Turn an iterator into a bitmap * * @param i iterator we wish to materialize * @return materialized version of the iterator */ public static EWAHCompressedBitmap materialize(final IteratingRLW i) { EWAHCompressedBitmap ewah = new EWAHCompressedBitmap(); materialize(i, ewah); return ewah; } } javaewah-JavaEWAH-0.7.9/src/main/java/com/googlecode/javaewah/LogicalElement.java000066400000000000000000000024341224043567000275050ustar00rootroot00000000000000package com.googlecode.javaewah; /** * A prototypical model for bitmaps. Used by the * class FastAggregation. Users should probably not * be concerned by this class. * * @author Daniel Lemire * @param the type of element (e.g., a bitmap class) * */ public interface LogicalElement { /** * Compute the bitwise logical and * @param le element * @return the result of the operation */ public T and(T le); /** * Compute the bitwise logical and not * @param le element * @return the result of the operation */ public T andNot(T le); /** * Compute the bitwise logical not (in place) */ public void not(); @SuppressWarnings({ "rawtypes", "javadoc" }) /** * Compute the bitwise logical or * @param le another element * @return the result of the operation */ public LogicalElement or(T le); /** * How many logical bits does this element represent? * * @return the number of bits represented by this element */ public int sizeInBits(); /** * Should report the storage requirement * @return How many bytes * @since 0.6.2 */ public int sizeInBytes(); /** * Compute the bitwise logical Xor * @param le element * @return the results of the operation */ public T xor(T le); } javaewah-JavaEWAH-0.7.9/src/main/java/com/googlecode/javaewah/NonEmptyVirtualStorage.java000066400000000000000000000044551224043567000312730ustar00rootroot00000000000000package com.googlecode.javaewah; /* * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser * Licensed under the Apache License, Version 2.0. */ /** * This is a BitmapStorage that can be used to determine quickly if the result * of an operation is non-trivial... that is, whether there will be at least on * set bit. * * @since 0.4.2 * @author Daniel Lemire and Veronika Zenz * */ public class NonEmptyVirtualStorage implements BitmapStorage { static class NonEmptyException extends RuntimeException { private static final long serialVersionUID = 1L; /** * Do not fill in the stack trace for this exception * for performance reasons. * * @return this instance * @see java.lang.Throwable#fillInStackTrace() */ @Override public synchronized Throwable fillInStackTrace() { return this; } } private static final NonEmptyException nonEmptyException = new NonEmptyException(); /** * If the word to be added is non-zero, a NonEmptyException exception is * thrown. * * @see com.googlecode.javaewah.BitmapStorage#add(long) */ @Override public void add(long newdata) { if (newdata != 0) throw nonEmptyException; return; } /** * throws a NonEmptyException exception when number is greater than 0 * */ @Override public void addStreamOfLiteralWords(long[] data, int start, int number) { if(number>0){ throw nonEmptyException; } } /** * If the boolean value is true and number is greater than 0, then it throws a NonEmptyException exception, * otherwise, nothing happens. * * @see com.googlecode.javaewah.BitmapStorage#addStreamOfEmptyWords(boolean, long) */ @Override public void addStreamOfEmptyWords(boolean v, long number) { if (v && (number>0)) throw nonEmptyException; return; } /** * throws a NonEmptyException exception when number is greater than 0 * */ @Override public void addStreamOfNegatedLiteralWords(long[] data, int start, int number) { if(number>0){ throw nonEmptyException; } } /** * Does nothing. * * @see com.googlecode.javaewah.BitmapStorage#setSizeInBits(int) */ @Override public void setSizeInBits(int bits) { } } javaewah-JavaEWAH-0.7.9/src/main/java/com/googlecode/javaewah/RunningLengthWord.java000066400000000000000000000114461224043567000302420ustar00rootroot00000000000000package com.googlecode.javaewah; /* * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser * Licensed under the Apache License, Version 2.0. */ /** * Mostly for internal use. * * @since 0.1.0 * @author Daniel Lemire */ public final class RunningLengthWord implements Cloneable { /** * Instantiates a new running length word. * * @param a * an array of 64-bit words * @param p * position in the array where the running length word is * located. */ RunningLengthWord(final EWAHCompressedBitmap a, final int p) { this.parent = a; this.position = p; } /** * Gets the number of literal words. * * @return the number of literal words */ public int getNumberOfLiteralWords() { return (int) (this.parent.buffer[this.position] >>> (1 + runninglengthbits)); } /** * Gets the running bit. * * @return the running bit */ public boolean getRunningBit() { return (this.parent.buffer[this.position] & 1) != 0; } /** * Gets the running length. * * @return the running length */ public long getRunningLength() { return (this.parent.buffer[this.position] >>> 1) & largestrunninglengthcount; } /** * Sets the number of literal words. * * @param number * the new number of literal words */ public void setNumberOfLiteralWords(final long number) { this.parent.buffer[this.position] |= notrunninglengthplusrunningbit; this.parent.buffer[this.position] &= (number << (runninglengthbits + 1)) | runninglengthplusrunningbit; } /** * Sets the running bit. * * @param b * the new running bit */ public void setRunningBit(final boolean b) { if (b) this.parent.buffer[this.position] |= 1l; else this.parent.buffer[this.position] &= ~1l; } /** * Sets the running length. * * @param number * the new running length */ public void setRunningLength(final long number) { this.parent.buffer[this.position] |= shiftedlargestrunninglengthcount; this.parent.buffer[this.position] &= (number << 1) | notshiftedlargestrunninglengthcount; } /** * Return the size in uncompressed words represented by this running * length word. * * @return the size */ public long size() { return getRunningLength() + getNumberOfLiteralWords(); } /* * @see java.lang.Object#toString() */ @Override public String toString() { return "running bit = " + getRunningBit() + " running length = " + getRunningLength() + " number of lit. words " + getNumberOfLiteralWords(); } @Override public RunningLengthWord clone() throws CloneNotSupportedException { RunningLengthWord answer; answer = (RunningLengthWord) super.clone(); answer.parent = this.parent; answer.position = this.position; return answer; } /** The array of words. */ public EWAHCompressedBitmap parent; /** The position in array. */ public int position; /** * number of bits dedicated to marking of the running length of clean * words */ public static final int runninglengthbits = 32; private static final int literalbits = 64 - 1 - runninglengthbits; /** largest number of literal words in a run. */ public static final int largestliteralcount = (1 << literalbits) - 1; /** largest number of clean words in a run */ public static final long largestrunninglengthcount = (1l << runninglengthbits) - 1; private static final long runninglengthplusrunningbit = (1l << (runninglengthbits + 1)) - 1; private static final long shiftedlargestrunninglengthcount = largestrunninglengthcount << 1; private static final long notrunninglengthplusrunningbit = ~runninglengthplusrunningbit; private static final long notshiftedlargestrunninglengthcount = ~shiftedlargestrunninglengthcount; }javaewah-JavaEWAH-0.7.9/src/main/java/com/googlecode/javaewah/benchmark/000077500000000000000000000000001224043567000257055ustar00rootroot00000000000000javaewah-JavaEWAH-0.7.9/src/main/java/com/googlecode/javaewah/benchmark/Benchmark.java000066400000000000000000000215631224043567000304510ustar00rootroot00000000000000package com.googlecode.javaewah.benchmark; /* * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser * Licensed under the Apache License, Version 2.0. */ import java.text.DecimalFormat; import java.util.Arrays; import java.util.List; import com.googlecode.javaewah.EWAHCompressedBitmap; import com.googlecode.javaewah.FastAggregation; import com.googlecode.javaewah.IntIterator; import com.googlecode.javaewah.IteratingRLW; import com.googlecode.javaewah.IteratorAggregation; import com.googlecode.javaewah.IteratorUtil; /** * This class is used to benchmark the performance EWAH. * * @author Daniel Lemire */ public class Benchmark { /** * Compute the union between two sorted arrays * @param set1 first sorted array * @param set2 second sorted array * @return merged array */ static public int[] unite2by2(final int[] set1, final int[] set2) { int pos = 0; int k1 = 0, k2 = 0; if (0 == set1.length) return Arrays.copyOf(set2, set2.length); if (0 == set2.length) return Arrays.copyOf(set1, set1.length); int[] buffer = new int[set1.length + set2.length]; while (true) { if (set1[k1] < set2[k2]) { buffer[pos++] = set1[k1]; ++k1; if (k1 >= set1.length) { for (; k2 < set2.length; ++k2) buffer[pos++] = set2[k2]; break; } } else if (set1[k1] == set2[k2]) { buffer[pos++] = set1[k1]; ++k1; ++k2; if (k1 >= set1.length) { for (; k2 < set2.length; ++k2) buffer[pos++] = set2[k2]; break; } if (k2 >= set2.length) { for (; k1 < set1.length; ++k1) buffer[pos++] = set1[k1]; break; } } else {// if (set1[k1]>set2[k2]) { buffer[pos++] = set2[k2]; ++k2; if (k2 >= set2.length) { for (; k1 < set1.length; ++k1) buffer[pos++] = set1[k1]; break; } } } return Arrays.copyOf(buffer, pos); } @SuppressWarnings("javadoc") public static void main(String args[]) { //test(2, 24, 1); test(100, 16, 1); } @SuppressWarnings("javadoc") public static void test(int N, int nbr, int repeat) { DecimalFormat df = new DecimalFormat("0.###"); ClusteredDataGenerator cdg = new ClusteredDataGenerator(); for (int sparsity = 1; sparsity < 30 - nbr; sparsity += 2) { long bogus = 0; String line = ""; long bef, aft; line += sparsity; int[][] data = new int[N][]; int Max = (1 << (nbr + sparsity)); System.out.println("# generating random data..."); int[] inter = cdg.generateClustered(1 << (nbr/2), Max); for (int k = 0; k < N; ++k) data[k] = unite2by2(cdg.generateClustered(1 << nbr, Max),inter); System.out.println("# generating random data... ok."); // building bef = System.currentTimeMillis(); EWAHCompressedBitmap[] ewah = new EWAHCompressedBitmap[N]; int size = 0; for (int r = 0; r < repeat; ++r) { size = 0; for (int k = 0; k < N; ++k) { ewah[k] = new EWAHCompressedBitmap(); for (int x = 0; x < data[k].length; ++x) { ewah[k].set(data[k][x]); } size += ewah[k].sizeInBytes(); } } aft = System.currentTimeMillis(); line += "\t" + size; line += "\t" + df.format((aft - bef) / 1000.0); // uncompressing bef = System.currentTimeMillis(); for (int r = 0; r < repeat; ++r) for (int k = 0; k < N; ++k) { int[] array = ewah[k].toArray(); bogus += array.length; } aft = System.currentTimeMillis(); line += "\t" + df.format((aft - bef) / 1000.0); // uncompressing bef = System.currentTimeMillis(); for (int r = 0; r < repeat; ++r) for (int k = 0; k < N; ++k) { int[] array = new int[ewah[k].cardinality()]; int c = 0; for (int x : ewah[k]) array[c++] = x; } aft = System.currentTimeMillis(); line += "\t" + df.format((aft - bef) / 1000.0); // uncompressing bef = System.currentTimeMillis(); for (int r = 0; r < repeat; ++r) for (int k = 0; k < N; ++k) { List L = ewah[k].getPositions(); int[] array = new int[L.size()]; int c = 0; for (int x : L) array[c++] = x; } aft = System.currentTimeMillis(); line += "\t" + df.format((aft - bef) / 1000.0); // uncompressing bef = System.currentTimeMillis(); for (int r = 0; r < repeat; ++r) for (int k = 0; k < N; ++k) { IntIterator iter = ewah[k].intIterator(); while (iter.hasNext()) { bogus += iter.next(); } } aft = System.currentTimeMillis(); line += "\t" + df.format((aft - bef) / 1000.0); line += "\t\t\t"; // logical or bef = System.currentTimeMillis(); for (int r = 0; r < repeat; ++r) for (int k = 0; k < N; ++k) { EWAHCompressedBitmap ewahor = ewah[0]; for (int j = 1; j < k + 1; ++j) { ewahor = ewahor.or(ewah[j]); } bogus += ewahor.sizeInBits(); } aft = System.currentTimeMillis(); line += "\t" + df.format((aft - bef) / 1000.0); // fast logical or bef = System.currentTimeMillis(); for (int r = 0; r < repeat; ++r) for (int k = 0; k < N; ++k) { EWAHCompressedBitmap[] ewahcp = new EWAHCompressedBitmap[k + 1]; for (int j = 0; j < k + 1; ++j) { ewahcp[j] = ewah[j]; } EWAHCompressedBitmap ewahor = EWAHCompressedBitmap .or(ewahcp); bogus += ewahor.sizeInBits(); } aft = System.currentTimeMillis(); line += "\t" + df.format((aft - bef) / 1000.0); // fast logical or bef = System.currentTimeMillis(); for (int r = 0; r < repeat; ++r) for (int k = 0; k < N; ++k) { EWAHCompressedBitmap[] ewahcp = new EWAHCompressedBitmap[k + 1]; for (int j = 0; j < k + 1; ++j) { ewahcp[j] = ewah[j]; } EWAHCompressedBitmap ewahor = FastAggregation.or(ewahcp); bogus += ewahor.sizeInBits(); } aft = System.currentTimeMillis(); line += "\t" + df.format((aft - bef) / 1000.0); // fast logical or // run sanity check for (int k = 0; k < N; ++k) { IteratingRLW[] ewahcp = new IteratingRLW[k + 1]; for (int j = 0; j < k + 1; ++j) { ewahcp[j] = ewah[j].getIteratingRLW(); } IteratingRLW ewahor = IteratorAggregation.bufferedor(ewahcp); EWAHCompressedBitmap ewahorp = EWAHCompressedBitmap.or(Arrays.copyOf(ewah, k+1)); EWAHCompressedBitmap mewahor = IteratorUtil.materialize(ewahor); if(!ewahorp.equals(mewahor)) throw new RuntimeException("bug"); } bef = System.currentTimeMillis(); for (int r = 0; r < repeat; ++r) for (int k = 0; k < N; ++k) { IteratingRLW[] ewahcp = new IteratingRLW[k + 1]; for (int j = 0; j < k + 1; ++j) { ewahcp[j] = ewah[j].getIteratingRLW(); } IteratingRLW ewahor = IteratorAggregation.bufferedor(ewahcp); bogus += IteratorUtil.materialize(ewahor).sizeInBits(); } aft = System.currentTimeMillis(); line += "\t" + df.format((aft - bef) / 1000.0); line += "\t\t\t"; // logical and bef = System.currentTimeMillis(); for (int r = 0; r < repeat; ++r) for (int k = 0; k < N; ++k) { EWAHCompressedBitmap ewahand = ewah[0]; for (int j = 1; j < k + 1; ++j) { ewahand = ewahand.and(ewah[j]); } bogus += ewahand.sizeInBits(); } aft = System.currentTimeMillis(); line += "\t" + df.format((aft - bef) / 1000.0); // fast logical and bef = System.currentTimeMillis(); for (int r = 0; r < repeat; ++r) for (int k = 0; k < N; ++k) { EWAHCompressedBitmap[] ewahcp = new EWAHCompressedBitmap[k + 1]; for (int j = 0; j < k + 1; ++j) { ewahcp[j] = ewah[j]; } EWAHCompressedBitmap ewahand = EWAHCompressedBitmap .and(ewahcp); bogus += ewahand.sizeInBits(); } aft = System.currentTimeMillis(); line += "\t" + df.format((aft - bef) / 1000.0); for (int k = 0; k < N; ++k) { IteratingRLW[] ewahcp = new IteratingRLW[k + 1]; for (int j = 0; j < k + 1; ++j) { ewahcp[j] = ewah[j].getIteratingRLW(); } IteratingRLW ewahand = IteratorAggregation.bufferedand(ewahcp); EWAHCompressedBitmap ewahandp = EWAHCompressedBitmap.and(Arrays.copyOf(ewah, k+1)); EWAHCompressedBitmap mewahand = IteratorUtil.materialize(ewahand); if(!ewahandp.equals(mewahand)) throw new RuntimeException("bug"); } // fast logical and bef = System.currentTimeMillis(); for (int r = 0; r < repeat; ++r) for (int k = 0; k < N; ++k) { IteratingRLW[] ewahcp = new IteratingRLW[k + 1]; for (int j = 0; j < k + 1; ++j) { ewahcp[j] = ewah[j].getIteratingRLW(); } IteratingRLW ewahand = IteratorAggregation.bufferedand(ewahcp); bogus += IteratorUtil.materialize(ewahand).sizeInBits(); } aft = System.currentTimeMillis(); line += "\t" + df.format((aft - bef) / 1000.0); System.out .println("time for building, toArray(), Java iterator, intIterator,\t\t\t logical or (2-by-2), logical or (grouped), FastAggregation.or, iterator-based or, \t\t\t (2-by-2) and, logical and (grouped), iterator-based and"); System.out.println(line); System.out.println("# bogus =" + bogus); } } } javaewah-JavaEWAH-0.7.9/src/main/java/com/googlecode/javaewah/benchmark/Benchmark32.java000066400000000000000000000154461224043567000306210ustar00rootroot00000000000000package com.googlecode.javaewah.benchmark; /* * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser * Licensed under the Apache License, Version 2.0. */ import java.text.DecimalFormat; import java.util.List; import com.googlecode.javaewah32.EWAHCompressedBitmap32; import com.googlecode.javaewah.FastAggregation; import com.googlecode.javaewah.IntIterator; import com.googlecode.javaewah32.IteratingRLW32; import com.googlecode.javaewah32.IteratorAggregation32; import com.googlecode.javaewah32.IteratorUtil32; /** * This class is used to benchmark the performance EWAH. * * @author Daniel Lemire */ public class Benchmark32 { @SuppressWarnings("javadoc") public static void main(String args[]) { test(100, 16, 1); // test(2, 24, 1); } @SuppressWarnings("javadoc") public static void test(int N, int nbr, int repeat) { DecimalFormat df = new DecimalFormat("0.###"); ClusteredDataGenerator cdg = new ClusteredDataGenerator(); for (int sparsity = 1; sparsity < 30 - nbr; sparsity += 2) { long bogus = 0; String line = ""; long bef, aft; line += sparsity; int[][] data = new int[N][]; int Max = (1 << (nbr + sparsity)); System.out.println("# generating random data..."); int[] inter = cdg.generateClustered(1 << (nbr/2), Max); for (int k = 0; k < N; ++k) data[k] = Benchmark.unite2by2(cdg.generateClustered(1 << nbr, Max),inter); System.out.println("# generating random data... ok."); // building bef = System.currentTimeMillis(); EWAHCompressedBitmap32[] ewah = new EWAHCompressedBitmap32[N]; int size = 0; for (int r = 0; r < repeat; ++r) { size = 0; for (int k = 0; k < N; ++k) { ewah[k] = new EWAHCompressedBitmap32(); for (int x = 0; x < data[k].length; ++x) { ewah[k].set(data[k][x]); } size += ewah[k].sizeInBytes(); } } aft = System.currentTimeMillis(); line += "\t" + size; line += "\t" + df.format((aft - bef) / 1000.0); // uncompressing bef = System.currentTimeMillis(); for (int r = 0; r < repeat; ++r) for (int k = 0; k < N; ++k) { int[] array = ewah[k].toArray(); bogus += array.length; } aft = System.currentTimeMillis(); line += "\t" + df.format((aft - bef) / 1000.0); // uncompressing bef = System.currentTimeMillis(); for (int r = 0; r < repeat; ++r) for (int k = 0; k < N; ++k) { int[] array = new int[ewah[k].cardinality()]; int c = 0; for (int x : ewah[k]) array[c++] = x; } aft = System.currentTimeMillis(); line += "\t" + df.format((aft - bef) / 1000.0); // uncompressing bef = System.currentTimeMillis(); for (int r = 0; r < repeat; ++r) for (int k = 0; k < N; ++k) { List L = ewah[k].getPositions(); int[] array = new int[L.size()]; int c = 0; for (int x : L) array[c++] = x; } aft = System.currentTimeMillis(); line += "\t" + df.format((aft - bef) / 1000.0); // uncompressing bef = System.currentTimeMillis(); for (int r = 0; r < repeat; ++r) for (int k = 0; k < N; ++k) { IntIterator iter = ewah[k].intIterator(); while (iter.hasNext()) { bogus += iter.next(); } } aft = System.currentTimeMillis(); line += "\t" + df.format((aft - bef) / 1000.0); line += "\t\t\t"; // logical or bef = System.currentTimeMillis(); for (int r = 0; r < repeat; ++r) for (int k = 0; k < N; ++k) { EWAHCompressedBitmap32 ewahor = ewah[0]; for (int j = 1; j < k + 1; ++j) { ewahor = ewahor.or(ewah[j]); } bogus += ewahor.sizeInBits(); } aft = System.currentTimeMillis(); line += "\t" + df.format((aft - bef) / 1000.0); // fast logical or bef = System.currentTimeMillis(); for (int r = 0; r < repeat; ++r) for (int k = 0; k < N; ++k) { EWAHCompressedBitmap32[] ewahcp = new EWAHCompressedBitmap32[k + 1]; for (int j = 0; j < k + 1; ++j) { ewahcp[j] = ewah[j]; } EWAHCompressedBitmap32 ewahor = EWAHCompressedBitmap32 .or(ewahcp); bogus += ewahor.sizeInBits(); } aft = System.currentTimeMillis(); line += "\t" + df.format((aft - bef) / 1000.0); // fast logical or bef = System.currentTimeMillis(); for (int r = 0; r < repeat; ++r) for (int k = 0; k < N; ++k) { EWAHCompressedBitmap32[] ewahcp = new EWAHCompressedBitmap32[k + 1]; for (int j = 0; j < k + 1; ++j) { ewahcp[j] = ewah[j]; } EWAHCompressedBitmap32 ewahor = FastAggregation.or(ewahcp); bogus += ewahor.sizeInBits(); } aft = System.currentTimeMillis(); line += "\t" + df.format((aft - bef) / 1000.0); // fast logical or bef = System.currentTimeMillis(); for (int r = 0; r < repeat; ++r) for (int k = 0; k < N; ++k) { IteratingRLW32[] ewahcp = new IteratingRLW32[k + 1]; for (int j = 0; j < k + 1; ++j) { ewahcp[j] = ewah[j].getIteratingRLW(); } IteratingRLW32 ewahor = IteratorAggregation32.bufferedor(ewahcp); bogus += IteratorUtil32.materialize(ewahor).sizeInBits(); } aft = System.currentTimeMillis(); line += "\t" + df.format((aft - bef) / 1000.0); line += "\t\t\t"; // logical and bef = System.currentTimeMillis(); for (int r = 0; r < repeat; ++r) for (int k = 0; k < N; ++k) { EWAHCompressedBitmap32 ewahand = ewah[0]; for (int j = 1; j < k + 1; ++j) { ewahand = ewahand.and(ewah[j]); } bogus += ewahand.sizeInBits(); } aft = System.currentTimeMillis(); line += "\t" + df.format((aft - bef) / 1000.0); // fast logical and bef = System.currentTimeMillis(); for (int r = 0; r < repeat; ++r) for (int k = 0; k < N; ++k) { EWAHCompressedBitmap32[] ewahcp = new EWAHCompressedBitmap32[k + 1]; for (int j = 0; j < k + 1; ++j) { ewahcp[j] = ewah[j]; } EWAHCompressedBitmap32 ewahand = EWAHCompressedBitmap32 .and(ewahcp); bogus += ewahand.sizeInBits(); } aft = System.currentTimeMillis(); line += "\t" + df.format((aft - bef) / 1000.0); // fast logical and bef = System.currentTimeMillis(); for (int r = 0; r < repeat; ++r) for (int k = 0; k < N; ++k) { IteratingRLW32[] ewahcp = new IteratingRLW32[k + 1]; for (int j = 0; j < k + 1; ++j) { ewahcp[j] = ewah[j].getIteratingRLW(); } IteratingRLW32 ewahand = IteratorAggregation32.bufferedand(ewahcp); bogus += IteratorUtil32.materialize(ewahand).sizeInBits(); } aft = System.currentTimeMillis(); line += "\t" + df.format((aft - bef) / 1000.0); System.out .println("time for building, toArray(), Java iterator, intIterator,\t\t\t logical or (2-by-2), logical or (grouped), FastAggregation.or, iterator-based or, \t\t\t (2-by-2) and, logical and (grouped), iterator-based and"); System.out.println(line); System.out.println("# bogus =" + bogus); } } } javaewah-JavaEWAH-0.7.9/src/main/java/com/googlecode/javaewah/benchmark/BenchmarkIntersection.java000066400000000000000000000077441224043567000330450ustar00rootroot00000000000000package com.googlecode.javaewah.benchmark; import java.text.DecimalFormat; import com.googlecode.javaewah.*; /* * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser * Licensed under the Apache License, Version 2.0. */ /** * To benchmark the logical and (intersection) aggregate. */ public class BenchmarkIntersection { @SuppressWarnings("javadoc") public static void main(String args[]) { test(10, 18, 1); } @SuppressWarnings({ "javadoc"}) public static void test(int N, int nbr, int repeat) { long bogus = 0; DecimalFormat df = new DecimalFormat("0.###"); ClusteredDataGenerator cdg = new ClusteredDataGenerator(); for (int sparsity = 1; sparsity < 30 - nbr; sparsity++) { for (int times = 0; times < 2; ++times) { String line = ""; long bef, aft; line += sparsity; int[][] data = new int[N][]; int Max = (1 << (nbr + sparsity)); int[] inter = cdg.generateClustered(1 << (nbr/2), Max); for (int k = 0; k < N; ++k) data[k] = Benchmark.unite2by2(cdg.generateClustered(1 << nbr, Max),inter); // building EWAHCompressedBitmap[] ewah = new EWAHCompressedBitmap[N]; for (int k = 0; k < N; ++k) { ewah[k] = new EWAHCompressedBitmap(); for (int x = 0; x < data[k].length; ++x) { ewah[k].set(data[k][x]); } data[k] = null; } // sanity check if (true) { EWAHCompressedBitmap answer = ewah[0].and(ewah[1]); for (int k = 2; k < ewah.length; ++k) answer = answer.and(ewah[k]); EWAHCompressedBitmap ewahand = EWAHCompressedBitmap.and(ewah); if (!answer.equals(ewahand)) throw new RuntimeException( "bug EWAHCompressedBitmap.and"); EWAHCompressedBitmap ewahand2 = FastAggregation .bufferedand(65536,ewah); if (!ewahand.equals(ewahand2)) throw new RuntimeException( "bug FastAggregation.bufferedand "); } // logical or bef = System.currentTimeMillis(); for (int r = 0; r < repeat; ++r) for (int k = 0; k < N; ++k) { EWAHCompressedBitmap ewahor = ewah[0]; for (int j = 1; j < k + 1; ++j) { ewahor = ewahor.and(ewah[j]); } } aft = System.currentTimeMillis(); line += "\t" + df.format((aft - bef) / 1000.0); // fast logical or bef = System.currentTimeMillis(); for (int r = 0; r < repeat; ++r) for (int k = 0; k < N; ++k) { EWAHCompressedBitmap[] ewahcp = new EWAHCompressedBitmap[k + 1]; for (int j = 0; j < k + 1; ++j) { ewahcp[j] = ewah[j]; } EWAHCompressedBitmap ewahor = EWAHCompressedBitmap .and(ewahcp); bogus += ewahor.sizeInBits(); } aft = System.currentTimeMillis(); line += "\t" + df.format((aft - bef) / 1000.0); // fast logical or bef = System.currentTimeMillis(); for (int r = 0; r < repeat; ++r) for (int k = 0; k < N; ++k) { EWAHCompressedBitmap[] ewahcp = new EWAHCompressedBitmap[k + 1]; for (int j = 0; j < k + 1; ++j) { ewahcp[j] = ewah[j]; } EWAHCompressedBitmap ewahor = FastAggregation .bufferedand(65536,ewahcp); bogus += ewahor.sizeInBits(); } aft = System.currentTimeMillis(); line += "\t" + df.format((aft - bef) / 1000.0); // fast logical or bef = System.currentTimeMillis(); for (int r = 0; r < repeat; ++r) for (int k = 0; k < N; ++k) { IteratingRLW[] ewahcp = new IteratingRLW[k + 1]; for (int j = 0; j < k + 1; ++j) { ewahcp[j] = new IteratingBufferedRunningLengthWord( ewah[j]); } IteratingRLW ewahor = IteratorAggregation.bufferedand(ewahcp); int wordcounter = IteratorUtil.cardinality(ewahor); bogus += wordcounter; } aft = System.currentTimeMillis(); line += "\t" + df.format((aft - bef) / 1000.0); System.out .println("# times for: 2by2 EWAHCompressedBitmap.and bufferedand iterator-bufferedand"); System.out.println(line); } System.out.println("# bogus =" + bogus); } } } javaewah-JavaEWAH-0.7.9/src/main/java/com/googlecode/javaewah/benchmark/BenchmarkIntersection32.java000066400000000000000000000100271224043567000331760ustar00rootroot00000000000000package com.googlecode.javaewah.benchmark; import java.text.DecimalFormat; import com.googlecode.javaewah32.*; /* * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser * Licensed under the Apache License, Version 2.0. */ /** * To benchmark the logical and (intersection) aggregate. */ public class BenchmarkIntersection32 { @SuppressWarnings("javadoc") public static void main(String args[]) { test(10, 18, 1); } @SuppressWarnings({ "javadoc" }) public static void test(int N, int nbr, int repeat) { long bogus = 0; DecimalFormat df = new DecimalFormat("0.###"); ClusteredDataGenerator cdg = new ClusteredDataGenerator(); for (int sparsity = 1; sparsity < 30 - nbr; sparsity++) { for (int times = 0; times < 2; ++times) { String line = ""; long bef, aft; line += sparsity; int[][] data = new int[N][]; int Max = (1 << (nbr + sparsity)); int[] inter = cdg.generateClustered(1 << (nbr/2), Max); for (int k = 0; k < N; ++k) data[k] = Benchmark.unite2by2(cdg.generateClustered(1 << nbr, Max),inter); // building EWAHCompressedBitmap32[] ewah = new EWAHCompressedBitmap32[N]; for (int k = 0; k < N; ++k) { ewah[k] = new EWAHCompressedBitmap32(); for (int x = 0; x < data[k].length; ++x) { ewah[k].set(data[k][x]); } data[k] = null; } // sanity check if (true) { EWAHCompressedBitmap32 answer = ewah[0].and(ewah[1]); for (int k = 2; k < ewah.length; ++k) answer = answer.and(ewah[k]); EWAHCompressedBitmap32 ewahand = EWAHCompressedBitmap32.and(ewah); if (!answer.equals(ewahand)) throw new RuntimeException( "bug EWAHCompressedBitmap.and"); EWAHCompressedBitmap32 ewahand2 = FastAggregation32 .bufferedand(65536,ewah); if (!ewahand.equals(ewahand2)) throw new RuntimeException( "bug FastAggregation.bufferedand "); } // logical or bef = System.currentTimeMillis(); for (int r = 0; r < repeat; ++r) for (int k = 0; k < N; ++k) { EWAHCompressedBitmap32 ewahor = ewah[0]; for (int j = 1; j < k + 1; ++j) { ewahor = ewahor.and(ewah[j]); } } aft = System.currentTimeMillis(); line += "\t" + df.format((aft - bef) / 1000.0); // fast logical or bef = System.currentTimeMillis(); for (int r = 0; r < repeat; ++r) for (int k = 0; k < N; ++k) { EWAHCompressedBitmap32[] ewahcp = new EWAHCompressedBitmap32[k + 1]; for (int j = 0; j < k + 1; ++j) { ewahcp[j] = ewah[j]; } EWAHCompressedBitmap32 ewahor = EWAHCompressedBitmap32 .and(ewahcp); bogus += ewahor.sizeInBits(); } aft = System.currentTimeMillis(); line += "\t" + df.format((aft - bef) / 1000.0); // fast logical or bef = System.currentTimeMillis(); for (int r = 0; r < repeat; ++r) for (int k = 0; k < N; ++k) { EWAHCompressedBitmap32[] ewahcp = new EWAHCompressedBitmap32[k + 1]; for (int j = 0; j < k + 1; ++j) { ewahcp[j] = ewah[j]; } EWAHCompressedBitmap32 ewahor = FastAggregation32 .bufferedand(65536,ewahcp); bogus += ewahor.sizeInBits(); } aft = System.currentTimeMillis(); line += "\t" + df.format((aft - bef) / 1000.0); // fast logical or bef = System.currentTimeMillis(); for (int r = 0; r < repeat; ++r) for (int k = 0; k < N; ++k) { IteratingRLW32[] ewahcp = new IteratingRLW32[k + 1]; for (int j = 0; j < k + 1; ++j) { ewahcp[j] = new IteratingBufferedRunningLengthWord32( ewah[j]); } IteratingRLW32 ewahor = IteratorAggregation32.bufferedand(ewahcp); int wordcounter = IteratorUtil32.cardinality(ewahor); bogus += wordcounter; } aft = System.currentTimeMillis(); line += "\t" + df.format((aft - bef) / 1000.0); System.out .println("# times for: 2by2 EWAHCompressedBitmap.and bufferedand iterator-bufferedand"); System.out.println(line); } System.out.println("# bogus =" + bogus); } } } javaewah-JavaEWAH-0.7.9/src/main/java/com/googlecode/javaewah/benchmark/BenchmarkUnion.java000066400000000000000000000120771224043567000314620ustar00rootroot00000000000000package com.googlecode.javaewah.benchmark; import java.text.DecimalFormat; import com.googlecode.javaewah.*; /* * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser * Licensed under the Apache License, Version 2.0. */ /** * To benchmark the logical or (union) aggregate. */ public class BenchmarkUnion { @SuppressWarnings("javadoc") public static void main(String args[]) { test(10, 18, 1); } @SuppressWarnings({ "javadoc", "deprecation" }) public static void test(int N, int nbr, int repeat) { long bogus = 0; DecimalFormat df = new DecimalFormat("0.###"); ClusteredDataGenerator cdg = new ClusteredDataGenerator(); for (int sparsity = 1; sparsity < 30 - nbr; sparsity++) { for (int times = 0; times < 2; ++times) { String line = ""; long bef, aft; line += sparsity; int[][] data = new int[N][]; int Max = (1 << (nbr + sparsity)); for (int k = 0; k < N; ++k) data[k] = cdg.generateClustered(1 << nbr, Max); // building EWAHCompressedBitmap[] ewah = new EWAHCompressedBitmap[N]; for (int k = 0; k < N; ++k) { ewah[k] = new EWAHCompressedBitmap(); for (int x = 0; x < data[k].length; ++x) { ewah[k].set(data[k][x]); } data[k] = null; } // sanity check if (true) { EWAHCompressedBitmap answer = ewah[0].or(ewah[1]); for (int k = 2; k < ewah.length; ++k) answer = answer.or(ewah[k]); EWAHCompressedBitmap ewahor = EWAHCompressedBitmap.or(ewah); if (!answer.equals(ewahor)) throw new RuntimeException( "bug EWAHCompressedBitmap.or"); EWAHCompressedBitmap ewahor3 = FastAggregation.or(ewah); if (!ewahor.equals(ewahor3)) throw new RuntimeException("bug FastAggregation.or"); EWAHCompressedBitmap ewahor2 = FastAggregation .bufferedor(65536,ewah); if (!ewahor.equals(ewahor2)) throw new RuntimeException( "bug FastAggregation.bufferedor "); } // logical or bef = System.currentTimeMillis(); for (int r = 0; r < repeat; ++r) for (int k = 0; k < N; ++k) { EWAHCompressedBitmap ewahor = ewah[0]; for (int j = 1; j < k + 1; ++j) { ewahor = ewahor.or(ewah[j]); } } aft = System.currentTimeMillis(); line += "\t" + df.format((aft - bef) / 1000.0); // fast logical or bef = System.currentTimeMillis(); for (int r = 0; r < repeat; ++r) for (int k = 0; k < N; ++k) { EWAHCompressedBitmap[] ewahcp = new EWAHCompressedBitmap[k + 1]; for (int j = 0; j < k + 1; ++j) { ewahcp[j] = ewah[j]; } EWAHCompressedBitmap ewahor = EWAHCompressedBitmap .or(ewahcp); bogus += ewahor.sizeInBits(); } aft = System.currentTimeMillis(); line += "\t" + df.format((aft - bef) / 1000.0); // fast logical or bef = System.currentTimeMillis(); for (int r = 0; r < repeat; ++r) for (int k = 0; k < N; ++k) { EWAHCompressedBitmap[] ewahcp = new EWAHCompressedBitmap[k + 1]; for (int j = 0; j < k + 1; ++j) { ewahcp[j] = ewah[j]; } EWAHCompressedBitmap ewahor = FastAggregation .or(ewahcp); bogus += ewahor.sizeInBits(); } aft = System.currentTimeMillis(); line += "\t" + df.format((aft - bef) / 1000.0); // fast logical or bef = System.currentTimeMillis(); for (int r = 0; r < repeat; ++r) for (int k = 0; k < N; ++k) { EWAHCompressedBitmap[] ewahcp = new EWAHCompressedBitmap[k + 1]; for (int j = 0; j < k + 1; ++j) { ewahcp[j] = ewah[j]; } EWAHCompressedBitmap ewahor = FastAggregation .bufferedor(65536,ewahcp); bogus += ewahor.sizeInBits(); } aft = System.currentTimeMillis(); line += "\t" + df.format((aft - bef) / 1000.0); // fast logical or bef = System.currentTimeMillis(); for (int r = 0; r < repeat; ++r) for (int k = 0; k < N; ++k) { EWAHCompressedBitmap[] ewahcp = new EWAHCompressedBitmap[k + 1]; for (int j = 0; j < k + 1; ++j) { ewahcp[j] = ewah[j]; } EWAHCompressedBitmap x = new EWAHCompressedBitmap(); FastAggregation.legacy_orWithContainer(x, ewahcp); bogus += x.sizeInBits(); } aft = System.currentTimeMillis(); line += "\t" + df.format((aft - bef) / 1000.0); // fast logical or bef = System.currentTimeMillis(); for (int r = 0; r < repeat; ++r) for (int k = 0; k < N; ++k) { IteratingRLW[] ewahcp = new IteratingRLW[k + 1]; for (int j = 0; j < k + 1; ++j) { ewahcp[j] = new IteratingBufferedRunningLengthWord( ewah[j]); } IteratingRLW ewahor = IteratorAggregation.bufferedor(ewahcp); int wordcounter = IteratorUtil.cardinality(ewahor); bogus += wordcounter; } aft = System.currentTimeMillis(); line += "\t" + df.format((aft - bef) / 1000.0); System.out .println("# times for: 2by2 EWAHCompressedBitmap.or FastAggregation.or experimentalor bufferedor legacygroupedor iterator-bufferedor"); System.out.println(line); } System.out.println("# bogus =" + bogus); } } } javaewah-JavaEWAH-0.7.9/src/main/java/com/googlecode/javaewah/benchmark/BenchmarkUnion32.java000066400000000000000000000123211224043567000316170ustar00rootroot00000000000000package com.googlecode.javaewah.benchmark; import java.text.DecimalFormat; import com.googlecode.javaewah.FastAggregation; import com.googlecode.javaewah32.*; /* * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser * Licensed under the Apache License, Version 2.0. */ /** * To benchmark the logical or (union) aggregate. */ public class BenchmarkUnion32 { @SuppressWarnings("javadoc") public static void main(String args[]) { test(10, 18, 1); } @SuppressWarnings({ "javadoc", "deprecation" }) public static void test(int N, int nbr, int repeat) { long bogus = 0; DecimalFormat df = new DecimalFormat("0.###"); ClusteredDataGenerator cdg = new ClusteredDataGenerator(); for (int sparsity = 1; sparsity < 30 - nbr; sparsity++) { for (int times = 0; times < 2; ++times) { String line = ""; long bef, aft; line += sparsity; int[][] data = new int[N][]; int Max = (1 << (nbr + sparsity)); for (int k = 0; k < N; ++k) data[k] = cdg.generateClustered(1 << nbr, Max); // building EWAHCompressedBitmap32[] ewah = new EWAHCompressedBitmap32[N]; for (int k = 0; k < N; ++k) { ewah[k] = new EWAHCompressedBitmap32(); for (int x = 0; x < data[k].length; ++x) { ewah[k].set(data[k][x]); } data[k] = null; } // sanity check if(true){ EWAHCompressedBitmap32 answer = ewah[0].or(ewah[1]); for(int k = 2; k < ewah.length; ++k) answer = answer.or(ewah[k]); EWAHCompressedBitmap32 ewahor = EWAHCompressedBitmap32 .or(ewah); if(!answer.equals(ewahor)) throw new RuntimeException("bug EWAHCompressedBitmap.or"); EWAHCompressedBitmap32 ewahor3 = FastAggregation .or(ewah); if(!ewahor.equals(ewahor3)) throw new RuntimeException("bug FastAggregation.or"); EWAHCompressedBitmap32 ewahor2 = FastAggregation32 .bufferedor(65536,ewah); if(!ewahor.equals(ewahor2)) throw new RuntimeException("bug FastAggregation.bufferedor "); } // logical or bef = System.currentTimeMillis(); for (int r = 0; r < repeat; ++r) for (int k = 0; k < N; ++k) { EWAHCompressedBitmap32 ewahor = ewah[0]; for (int j = 1; j < k + 1; ++j) { ewahor = ewahor.or(ewah[j]); } } aft = System.currentTimeMillis(); line += "\t" + df.format((aft - bef) / 1000.0); // fast logical or bef = System.currentTimeMillis(); for (int r = 0; r < repeat; ++r) for (int k = 0; k < N; ++k) { EWAHCompressedBitmap32[] ewahcp = new EWAHCompressedBitmap32[k + 1]; for (int j = 0; j < k + 1; ++j) { ewahcp[j] = ewah[j]; } EWAHCompressedBitmap32 ewahor = EWAHCompressedBitmap32 .or(ewahcp); bogus += ewahor.sizeInBits(); } aft = System.currentTimeMillis(); line += "\t" + df.format((aft - bef) / 1000.0); // fast logical or bef = System.currentTimeMillis(); for (int r = 0; r < repeat; ++r) for (int k = 0; k < N; ++k) { EWAHCompressedBitmap32[] ewahcp = new EWAHCompressedBitmap32[k + 1]; for (int j = 0; j < k + 1; ++j) { ewahcp[j] = ewah[j]; } EWAHCompressedBitmap32 ewahor = FastAggregation .or(ewahcp); bogus += ewahor.sizeInBits(); } aft = System.currentTimeMillis(); line += "\t" + df.format((aft - bef) / 1000.0); // fast logical or bef = System.currentTimeMillis(); for (int r = 0; r < repeat; ++r) for (int k = 0; k < N; ++k) { EWAHCompressedBitmap32[] ewahcp = new EWAHCompressedBitmap32[k + 1]; for (int j = 0; j < k + 1; ++j) { ewahcp[j] = ewah[j]; } EWAHCompressedBitmap32 ewahor = FastAggregation32 .bufferedor(65536,ewahcp); bogus += ewahor.sizeInBits(); } aft = System.currentTimeMillis(); line += "\t" + df.format((aft - bef) / 1000.0); // fast logical or bef = System.currentTimeMillis(); for (int r = 0; r < repeat; ++r) for (int k = 0; k < N; ++k) { EWAHCompressedBitmap32[] ewahcp = new EWAHCompressedBitmap32[k + 1]; for (int j = 0; j < k + 1; ++j) { ewahcp[j] = ewah[j]; } EWAHCompressedBitmap32 x = new EWAHCompressedBitmap32(); FastAggregation32.legacy_orWithContainer(x, ewahcp); bogus += x.sizeInBits(); } aft = System.currentTimeMillis(); line += "\t" + df.format((aft - bef) / 1000.0); // fast logical or bef = System.currentTimeMillis(); for (int r = 0; r < repeat; ++r) for (int k = 0; k < N; ++k) { IteratingRLW32[] ewahcp = new IteratingRLW32[k + 1]; for (int j = 0; j < k + 1; ++j) { ewahcp[j] = new IteratingBufferedRunningLengthWord32(ewah[j]); } IteratingRLW32 ewahor = IteratorAggregation32 .bufferedor(ewahcp); int wordcounter = IteratorUtil32.cardinality(ewahor); bogus += wordcounter; } aft = System.currentTimeMillis(); line += "\t" + df.format((aft - bef) / 1000.0); System.out .println("# times for: 2by2 EWAHCompressedBitmap.or FastAggregation.or experimentalor bufferedor legacygroupedor iterator-bufferedor"); System.out.println(line); } System.out.println("# bogus =" + bogus); } } } javaewah-JavaEWAH-0.7.9/src/main/java/com/googlecode/javaewah/benchmark/BenchmarkXOR.java000066400000000000000000000101031224043567000310260ustar00rootroot00000000000000package com.googlecode.javaewah.benchmark; import java.text.DecimalFormat; import com.googlecode.javaewah.*; /* * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser * Licensed under the Apache License, Version 2.0. */ /** * To benchmark the logical xor aggregate. */ public class BenchmarkXOR { @SuppressWarnings("javadoc") public static void main(String args[]) { //test(10, 18, 1); test(2, 22, 1); } @SuppressWarnings({ "javadoc" }) public static void test(int N, int nbr, int repeat) { long bogus = 0; DecimalFormat df = new DecimalFormat("0.###"); ClusteredDataGenerator cdg = new ClusteredDataGenerator(); for (int sparsity = 1; sparsity < 30 - nbr; sparsity++) { for (int times = 0; times < 2; ++times) { String line = ""; long bef, aft; line += sparsity; int[][] data = new int[N][]; int Max = (1 << (nbr + sparsity)); for (int k = 0; k < N; ++k) data[k] = cdg.generateClustered(1 << nbr, Max); // building EWAHCompressedBitmap[] ewah = new EWAHCompressedBitmap[N]; for (int k = 0; k < N; ++k) { ewah[k] = new EWAHCompressedBitmap(); for (int x = 0; x < data[k].length; ++x) { ewah[k].set(data[k][x]); } data[k] = null; } // sanity check if (true) { EWAHCompressedBitmap answer = ewah[0].xor(ewah[1]); for (int k = 2; k < ewah.length; ++k) answer = answer.xor(ewah[k]); EWAHCompressedBitmap ewahor3 = FastAggregation.xor(ewah); if (!answer.equals(ewahor3)) throw new RuntimeException("bug FastAggregation.xor"); EWAHCompressedBitmap ewahor2 = FastAggregation .bufferedxor(65536,ewah); if (!answer.equals(ewahor2)) throw new RuntimeException( "bug FastAggregation.bufferedxor "); EWAHCompressedBitmap iwah = IteratorUtil.materialize(IteratorAggregation.bufferedxor(IteratorUtil.toIterators(ewah))); if (!answer.equals(iwah)) throw new RuntimeException( "bug xor it "); } // logical xor bef = System.currentTimeMillis(); for (int r = 0; r < repeat; ++r) for (int k = 0; k < N; ++k) { EWAHCompressedBitmap ewahor = ewah[0]; for (int j = 1; j < k + 1; ++j) { ewahor = ewahor.xor(ewah[j]); } } aft = System.currentTimeMillis(); line += "\t" + df.format((aft - bef) / 1000.0); // fast logical xor bef = System.currentTimeMillis(); for (int r = 0; r < repeat; ++r) for (int k = 0; k < N; ++k) { EWAHCompressedBitmap[] ewahcp = new EWAHCompressedBitmap[k + 1]; for (int j = 0; j < k + 1; ++j) { ewahcp[j] = ewah[j]; } EWAHCompressedBitmap ewahor = FastAggregation .xor(ewahcp); bogus += ewahor.sizeInBits(); } aft = System.currentTimeMillis(); line += "\t" + df.format((aft - bef) / 1000.0); // fast logical xor bef = System.currentTimeMillis(); for (int r = 0; r < repeat; ++r) for (int k = 0; k < N; ++k) { EWAHCompressedBitmap[] ewahcp = new EWAHCompressedBitmap[k + 1]; for (int j = 0; j < k + 1; ++j) { ewahcp[j] = ewah[j]; } EWAHCompressedBitmap ewahor = FastAggregation .bufferedxor(65536,ewahcp); bogus += ewahor.sizeInBits(); } aft = System.currentTimeMillis(); line += "\t" + df.format((aft - bef) / 1000.0); // fast logical xor bef = System.currentTimeMillis(); for (int r = 0; r < repeat; ++r) for (int k = 0; k < N; ++k) { IteratingRLW[] ewahcp = new IteratingRLW[k + 1]; for (int j = 0; j < k + 1; ++j) { ewahcp[j] = new IteratingBufferedRunningLengthWord( ewah[j]); } IteratingRLW ewahor = IteratorAggregation.bufferedxor(ewahcp); int wordcounter = IteratorUtil.cardinality(ewahor); bogus += wordcounter; } aft = System.currentTimeMillis(); line += "\t" + df.format((aft - bef) / 1000.0); System.out .println("# times for: 2by2 FastAggregation.xor bufferedxor iterator-based"); System.out.println(line); } System.out.println("# bogus =" + bogus); } } } javaewah-JavaEWAH-0.7.9/src/main/java/com/googlecode/javaewah/benchmark/BenchmarkXOR32.java000066400000000000000000000102521224043567000312000ustar00rootroot00000000000000package com.googlecode.javaewah.benchmark; import java.text.DecimalFormat; import com.googlecode.javaewah.FastAggregation; import com.googlecode.javaewah32.*; /* * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser * Licensed under the Apache License, Version 2.0. */ /** * To benchmark the logical xor aggregate. */ public class BenchmarkXOR32 { @SuppressWarnings("javadoc") public static void main(String args[]) { test(10, 18, 1); //test(2, 22, 1); } @SuppressWarnings({ "javadoc" }) public static void test(int N, int nbr, int repeat) { long bogus = 0; DecimalFormat df = new DecimalFormat("0.###"); ClusteredDataGenerator cdg = new ClusteredDataGenerator(); for (int sparsity = 1; sparsity < 30 - nbr; sparsity++) { for (int times = 0; times < 2; ++times) { String line = ""; long bef, aft; line += sparsity; int[][] data = new int[N][]; int Max = (1 << (nbr + sparsity)); for (int k = 0; k < N; ++k) data[k] = cdg.generateClustered(1 << nbr, Max); // building EWAHCompressedBitmap32[] ewah = new EWAHCompressedBitmap32[N]; for (int k = 0; k < N; ++k) { ewah[k] = new EWAHCompressedBitmap32(); for (int x = 0; x < data[k].length; ++x) { ewah[k].set(data[k][x]); } data[k] = null; } // sanity check if (true) { EWAHCompressedBitmap32 answer = ewah[0].xor(ewah[1]); for (int k = 2; k < ewah.length; ++k) answer = answer.xor(ewah[k]); EWAHCompressedBitmap32 ewahor3 = FastAggregation.xor(ewah); if (!answer.equals(ewahor3)) throw new RuntimeException("bug FastAggregation.xor"); EWAHCompressedBitmap32 ewahor2 = FastAggregation32 .bufferedxor(65536,ewah); if (!answer.equals(ewahor2)) throw new RuntimeException( "bug FastAggregation.bufferedxor "); EWAHCompressedBitmap32 iwah = IteratorUtil32.materialize(IteratorAggregation32.bufferedxor(IteratorUtil32.toIterators(ewah))); if (!answer.equals(iwah)) throw new RuntimeException( "bug xor it "); } // logical xor bef = System.currentTimeMillis(); for (int r = 0; r < repeat; ++r) for (int k = 0; k < N; ++k) { EWAHCompressedBitmap32 ewahor = ewah[0]; for (int j = 1; j < k + 1; ++j) { ewahor = ewahor.xor(ewah[j]); } } aft = System.currentTimeMillis(); line += "\t" + df.format((aft - bef) / 1000.0); // fast logical xor bef = System.currentTimeMillis(); for (int r = 0; r < repeat; ++r) for (int k = 0; k < N; ++k) { EWAHCompressedBitmap32[] ewahcp = new EWAHCompressedBitmap32[k + 1]; for (int j = 0; j < k + 1; ++j) { ewahcp[j] = ewah[j]; } EWAHCompressedBitmap32 ewahor = FastAggregation .xor(ewahcp); bogus += ewahor.sizeInBits(); } aft = System.currentTimeMillis(); line += "\t" + df.format((aft - bef) / 1000.0); // fast logical xor bef = System.currentTimeMillis(); for (int r = 0; r < repeat; ++r) for (int k = 0; k < N; ++k) { EWAHCompressedBitmap32[] ewahcp = new EWAHCompressedBitmap32[k + 1]; for (int j = 0; j < k + 1; ++j) { ewahcp[j] = ewah[j]; } EWAHCompressedBitmap32 ewahor = FastAggregation32 .bufferedxor(65536,ewahcp); bogus += ewahor.sizeInBits(); } aft = System.currentTimeMillis(); line += "\t" + df.format((aft - bef) / 1000.0); // fast logical xor bef = System.currentTimeMillis(); for (int r = 0; r < repeat; ++r) for (int k = 0; k < N; ++k) { IteratingRLW32[] ewahcp = new IteratingRLW32[k + 1]; for (int j = 0; j < k + 1; ++j) { ewahcp[j] = new IteratingBufferedRunningLengthWord32( ewah[j]); } IteratingRLW32 ewahor = IteratorAggregation32.bufferedxor(ewahcp); int wordcounter = IteratorUtil32.cardinality(ewahor); bogus += wordcounter; } aft = System.currentTimeMillis(); line += "\t" + df.format((aft - bef) / 1000.0); System.out .println("# times for: 2by2 FastAggregation.xor bufferedxor iterator-based"); System.out.println(line); } System.out.println("# bogus =" + bogus); } } } javaewah-JavaEWAH-0.7.9/src/main/java/com/googlecode/javaewah/benchmark/ClusteredDataGenerator.java000066400000000000000000000043401224043567000331440ustar00rootroot00000000000000package com.googlecode.javaewah.benchmark; /* * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser * Licensed under the Apache License, Version 2.0. */ /** * This class will generate lists of random integers with a "clustered" distribution. * Reference: * Anh VN, Moffat A. Index compression using 64-bit words. Software: Practice and Experience 2010; 40(2):131-147. * * @author Daniel Lemire */ public class ClusteredDataGenerator { /** * */ public ClusteredDataGenerator() { this.unidg = new UniformDataGenerator(); } /** * @param seed random seed */ public ClusteredDataGenerator(final int seed) { this.unidg = new UniformDataGenerator(seed); } /** * generates randomly N distinct integers from 0 to Max. * @param N number of integers * @param Max maximum integer value * @return a randomly generated array */ public int[] generateClustered(int N, int Max) { int[] array = new int[N]; fillClustered(array, 0, N, 0, Max); return array; } void fillClustered(int[] array, int offset, int length, int Min, int Max) { final int range = Max - Min; if ((range == length) || (length <= 10)) { fillUniform(array, offset, length, Min, Max); return; } final int cut = length / 2 + ((range - length - 1 > 0) ? this.unidg.rand.nextInt(range - length - 1) : 0); final double p = this.unidg.rand.nextDouble(); if (p < 0.25) { fillUniform(array, offset, length / 2, Min, Min + cut); fillClustered(array, offset + length / 2, length - length / 2, Min + cut, Max); } else if (p < 0.5) { fillClustered(array, offset, length / 2, Min, Min + cut); fillUniform(array, offset + length / 2, length - length / 2, Min + cut, Max); } else { fillClustered(array, offset, length / 2, Min, Min + cut); fillClustered(array, offset + length / 2, length - length / 2, Min + cut, Max); } } void fillUniform(int[] array, int offset, int length, int Min, int Max) { int[] v = this.unidg.generateUniform(length, Max - Min); for (int k = 0; k < v.length; ++k) array[k + offset] = Min + v[k]; } UniformDataGenerator unidg; } javaewah-JavaEWAH-0.7.9/src/main/java/com/googlecode/javaewah/benchmark/UniformDataGenerator.java000066400000000000000000000072471224043567000326420ustar00rootroot00000000000000package com.googlecode.javaewah.benchmark; /* * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser * Licensed under the Apache License, Version 2.0. */ import java.util.Arrays; import java.util.BitSet; import java.util.HashSet; import java.util.Iterator; import java.util.Random; /** * This class will generate "uniform" lists of random integers. * * @author Daniel Lemire */ public class UniformDataGenerator { /** * construct generator of random arrays. */ public UniformDataGenerator() { this.rand = new Random(); } /** * @param seed random seed */ public UniformDataGenerator(final int seed) { this.rand = new Random(seed); } /** * generates randomly N distinct integers from 0 to Max. */ int[] generateUniformHash(int N, int Max) { if (N > Max) throw new RuntimeException("not possible"); int[] ans = new int[N]; HashSet s = new HashSet(); while (s.size() < N) s.add(new Integer(this.rand.nextInt(Max))); Iterator i = s.iterator(); for (int k = 0; k < N; ++k) ans[k] = i.next().intValue(); Arrays.sort(ans); return ans; } /** * output all integers from the range [0,Max) that are not * in the array */ static int[] negate(int[] x, int Max) { int[] ans = new int[Max - x.length]; int i = 0; int c = 0; for (int j = 0; j < x.length; ++j) { int v = x[j]; for (; i < v; ++i) ans[c++] = i; ++i; } while (c < ans.length) ans[c++] = i++; return ans; } /** * generates randomly N distinct integers from 0 to Max. * @param N Number of integers to generate * @param Max Maximum value of the integers * @return array containing random integers */ public int[] generateUniform(int N, int Max) { if(N * 2 > Max) { return negate( generateUniform(Max - N, Max), Max ); } if (2048 * N > Max) return generateUniformBitmap(N, Max); return generateUniformHash(N, Max); } /** * generates randomly N distinct integers from 0 to Max using a bitmap. * @param N Number of integers to generate * @param Max Maximum value of the integers * @return array containing random integers */ int[] generateUniformBitmap(int N, int Max) { if (N > Max) throw new RuntimeException("not possible"); int[] ans = new int[N]; BitSet bs = new BitSet(Max); int cardinality = 0; while (cardinality < N) { int v = this.rand.nextInt(Max); if (!bs.get(v)) { bs.set(v); cardinality++; } } int pos = 0; for (int i = bs.nextSetBit(0); i >= 0; i = bs.nextSetBit(i + 1)) { ans[pos++] = i; } return ans; } Random rand = new Random(); } javaewah-JavaEWAH-0.7.9/src/main/java/com/googlecode/javaewah32/000077500000000000000000000000001224043567000241205ustar00rootroot00000000000000javaewah-JavaEWAH-0.7.9/src/main/java/com/googlecode/javaewah32/BitCounter32.java000066400000000000000000000045001224043567000272050ustar00rootroot00000000000000package com.googlecode.javaewah32; /* * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc. and Veronika Zenz * Licensed under the Apache License, Version 2.0. */ /** * BitCounter is a fake bitset data structure. Instead of storing the actual data, * it only records the number of set bits. * * @since 0.5.0 * @author Daniel Lemire and David McIntosh */ public final class BitCounter32 implements BitmapStorage32 { /** * Virtually add words directly to the bitmap * * @param newdata the word */ // @Override : causes problems with Java 1.5 @Override public void add(final int newdata) { this.oneBits += Integer.bitCount(newdata); } /** * virtually add several literal words. * * @param data the literal words * @param start the starting point in the array * @param number the number of literal words to add */ // @Override : causes problems with Java 1.5 @Override public void addStreamOfLiteralWords(int[] data, int start, int number) { for(int i=start;i iterator) { this.masteriterator = iterator; if(this.masteriterator.hasNext()) { this.iterator = this.masteriterator.next(); this.brlw = new BufferedRunningLengthWord32(this.iterator.next()); this.literalWordStartPosition = this.iterator.literalWords() + this.brlw.literalwordoffset; this.buffer = this.iterator.buffer(); } } /** * Discard first words, iterating to the next running length word if needed. * * @param x the number of words to be discarded */ @Override public void discardFirstWords(int x) { while (x > 0) { if (this.brlw.RunningLength > x) { this.brlw.RunningLength -= x; return; } x -= this.brlw.RunningLength; this.brlw.RunningLength = 0; int toDiscard = x > this.brlw.NumberOfLiteralWords ? this.brlw.NumberOfLiteralWords : x; this.literalWordStartPosition += toDiscard; this.brlw.NumberOfLiteralWords -= toDiscard; x -= toDiscard; if ((x > 0) || (this.brlw.size() == 0)) { if (!this.next()) { break; } } } } /** * Move to the next RunningLengthWord * @return whether the move was possible */ @Override public boolean next() { if (!this.iterator.hasNext()) { if(!reload()) { this.brlw.NumberOfLiteralWords = 0; this.brlw.RunningLength = 0; return false; } } this.brlw.reset(this.iterator.next()); this.literalWordStartPosition = this.iterator.literalWords(); // + this.brlw.literalwordoffset ==0 return true; } private boolean reload() { if(!this.masteriterator.hasNext()) { return false; } this.iterator = this.masteriterator.next(); this.buffer = this.iterator.buffer(); return true; } /** * Get the nth literal word for the current running length word * @param index zero based index * @return the literal word */ @Override public int getLiteralWordAt(int index) { return this.buffer[this.literalWordStartPosition + index]; } /** * Gets the number of literal words for the current running length word. * * @return the number of literal words */ @Override public int getNumberOfLiteralWords() { return this.brlw.NumberOfLiteralWords; } /** * Gets the running bit. * * @return the running bit */ @Override public boolean getRunningBit() { return this.brlw.RunningBit; } /** * Gets the running length. * * @return the running length */ @Override public int getRunningLength() { return this.brlw.RunningLength; } /** * Size in uncompressed words of the current running length word. * * @return the size */ @Override public int size() { return this.brlw.size(); } @Override public BufferedIterator32 clone() throws CloneNotSupportedException { BufferedIterator32 answer = (BufferedIterator32) super.clone(); answer.brlw = this.brlw.clone(); answer.buffer = this.buffer; answer.iterator = this.iterator.clone(); answer.literalWordStartPosition = this.literalWordStartPosition; answer.masteriterator = this.masteriterator.clone(); return answer; } private BufferedRunningLengthWord32 brlw; private int[] buffer; private int literalWordStartPosition; private EWAHIterator32 iterator; private CloneableIterator masteriterator; }javaewah-JavaEWAH-0.7.9/src/main/java/com/googlecode/javaewah32/BufferedRunningLengthWord32.java000066400000000000000000000101121224043567000322040ustar00rootroot00000000000000package com.googlecode.javaewah32; /* * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser * Licensed under the Apache License, Version 2.0. */ /** * Mostly for internal use. Similar to RunningLengthWord, but can * be modified without access to the array, and has faster access. * * @author Daniel Lemire * @since 0.5.0 * */ public final class BufferedRunningLengthWord32 implements Cloneable { /** * Instantiates a new buffered running length word. * * @param a the word */ public BufferedRunningLengthWord32(final int a) { this.NumberOfLiteralWords = (a >>> (1 + RunningLengthWord32.runninglengthbits)); this.RunningBit = (a & 1) != 0; this.RunningLength = ((a >>> 1) & RunningLengthWord32.largestrunninglengthcount); } /** * Instantiates a new buffered running length word. * * @param rlw the rlw */ public BufferedRunningLengthWord32(final RunningLengthWord32 rlw) { this(rlw.parent.buffer[rlw.position]); } /** * Discard first words. * * @param x the number of words to be discarded */ public void discardFirstWords(int x) { if (this.RunningLength >= x) { this.RunningLength -= x; return; } x -= this.RunningLength; this.RunningLength = 0; this.literalwordoffset += x; this.NumberOfLiteralWords -= x; } /** * Gets the number of literal words. * * @return the number of literal words */ public int getNumberOfLiteralWords() { return this.NumberOfLiteralWords; } /** * Gets the running bit. * * @return the running bit */ public boolean getRunningBit() { return this.RunningBit; } /** * Gets the running length. * * @return the running length */ public int getRunningLength() { return this.RunningLength; } /** * Reset the values using the provided word. * * @param a the word */ public void reset(final int a) { this.NumberOfLiteralWords = (a >>> (1 + RunningLengthWord32.runninglengthbits)); this.RunningBit = (a & 1) != 0; this.RunningLength = ((a >>> 1) & RunningLengthWord32.largestrunninglengthcount); this.literalwordoffset = 0; } /** * Reset the values of this running length word so that it has the same values * as the other running length word. * * @param rlw the other running length word */ public void reset(final RunningLengthWord32 rlw) { reset(rlw.parent.buffer[rlw.position]); } /** * Sets the number of literal words. * * @param number the new number of literal words */ public void setNumberOfLiteralWords(final int number) { this.NumberOfLiteralWords = number; } /** * Sets the running bit. * * @param b the new running bit */ public void setRunningBit(final boolean b) { this.RunningBit = b; } /** * Sets the running length. * * @param number the new running length */ public void setRunningLength(final int number) { this.RunningLength = number; } /** * Size in uncompressed words. * * @return the int */ public int size() { return this.RunningLength + this.NumberOfLiteralWords; } /* * @see java.lang.Object#toString() */ @Override public String toString() { return "running bit = " + getRunningBit() + " running length = " + getRunningLength() + " number of lit. words " + getNumberOfLiteralWords(); } @Override public BufferedRunningLengthWord32 clone() throws CloneNotSupportedException { BufferedRunningLengthWord32 answer = (BufferedRunningLengthWord32) super.clone(); answer.literalwordoffset = this.literalwordoffset; answer.NumberOfLiteralWords = this.NumberOfLiteralWords; answer.RunningBit = this.RunningBit; answer.RunningLength = this.RunningLength; return answer; } /** how many literal words have we read so far? */ public int literalwordoffset = 0; /** The Number of literal words. */ public int NumberOfLiteralWords; /** The Running bit. */ public boolean RunningBit; /** The Running length. */ public int RunningLength; }javaewah-JavaEWAH-0.7.9/src/main/java/com/googlecode/javaewah32/EWAHCompressedBitmap32.java000066400000000000000000001516621224043567000310510ustar00rootroot00000000000000package com.googlecode.javaewah32; /* * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser * Licensed under the Apache License, Version 2.0. */ import java.util.*; import java.io.*; import com.googlecode.javaewah.IntIterator; import com.googlecode.javaewah.LogicalElement; /** *

* This implements the patent-free EWAH scheme. Roughly speaking, it is a 32-bit * variant of the BBC compression scheme used by Oracle for its bitmap indexes. *

* *

* In contrast with the 64-bit EWAH scheme (javaewah.EWAHCompressedBitmap), you * can expect this class to compress better, but to be slower at processing the * data. In effect, there is a trade-off between memory usage and performances. *

* * @see com.googlecode.javaewah.EWAHCompressedBitmap * *

* The objective of this compression type is to provide some compression, * while reducing as much as possible the CPU cycle usage. *

* * *

* For more details, see the following paper: *

* *
    *
  • Daniel Lemire, Owen Kaser, Kamel Aouiche, Sorting improves * word-aligned bitmap indexes. Data & Knowledge Engineering 69 (1), pages * 3-28, 2010. http://arxiv.org/abs/0901.3751
  • *
* * @since 0.5.0 */ public final class EWAHCompressedBitmap32 implements Cloneable, Externalizable, Iterable, BitmapStorage32, LogicalElement { /** * Creates an empty bitmap (no bit set to true). */ public EWAHCompressedBitmap32() { this.buffer = new int[defaultbuffersize]; this.rlw = new RunningLengthWord32(this, 0); } /** * Sets explicitly the buffer size (in 32-bit words). The initial memory usage * will be "buffersize * 32". For large poorly compressible bitmaps, using * large values may improve performance. * * @param buffersize * number of 32-bit words reserved when the object is created) */ public EWAHCompressedBitmap32(final int buffersize) { this.buffer = new int[buffersize]; this.rlw = new RunningLengthWord32(this, 0); } /** * Adding words directly to the bitmap (for expert use). * * This is normally how you add data to the array. So you add bits in streams * of 4*8 bits. * * Example: if you add 321, you are have added (in binary notation) * 0b101000001, so you have effectively called set(0), set(6), set(8) * in sequence. * * @param newdata * the word */ @Override public void add(final int newdata) { add(newdata, wordinbits); } /** * Adding words directly to the bitmap (for expert use). * * @param newdata * the word * @param bitsthatmatter * the number of significant bits (by default it should be 32) */ public void add(final int newdata, final int bitsthatmatter) { this.sizeinbits += bitsthatmatter; if (newdata == 0) { addEmptyWord(false); } else if (newdata == ~0) { addEmptyWord(true); } else { addLiteralWord(newdata); } } /** * For internal use. * * @param v * the boolean value * @return the storage cost of the addition */ private int addEmptyWord(final boolean v) { final boolean noliteralword = (this.rlw.getNumberOfLiteralWords() == 0); final int runlen = this.rlw.getRunningLength(); if ((noliteralword) && (runlen == 0)) { this.rlw.setRunningBit(v); } if ((noliteralword) && (this.rlw.getRunningBit() == v) && (runlen < RunningLengthWord32.largestrunninglengthcount)) { this.rlw.setRunningLength(runlen + 1); return 0; } push_back(0); this.rlw.position = this.actualsizeinwords - 1; this.rlw.setRunningBit(v); this.rlw.setRunningLength(1); return 1; } /** * For internal use. * * @param newdata * the literal word * @return the storage cost of the addition */ private int addLiteralWord(final int newdata) { final int numbersofar = this.rlw.getNumberOfLiteralWords(); if (numbersofar >= RunningLengthWord32.largestliteralcount) { push_back(0); this.rlw.position = this.actualsizeinwords - 1; this.rlw.setNumberOfLiteralWords(1); push_back(newdata); return 2; } this.rlw.setNumberOfLiteralWords(numbersofar + 1); push_back(newdata); return 1; } /** * if you have several literal words to copy over, this might be faster. * * * @param data * the literal words * @param start * the starting point in the array * @param number * the number of literal words to add */ @Override public void addStreamOfLiteralWords(final int[] data, final int start, final int number) { int leftovernumber = number; while (leftovernumber > 0) { final int NumberOfLiteralWords = this.rlw.getNumberOfLiteralWords(); final int whatwecanadd = leftovernumber < RunningLengthWord32.largestliteralcount - NumberOfLiteralWords ? leftovernumber : RunningLengthWord32.largestliteralcount - NumberOfLiteralWords; this.rlw.setNumberOfLiteralWords(NumberOfLiteralWords + whatwecanadd); leftovernumber -= whatwecanadd; push_back(data, start, whatwecanadd); this.sizeinbits += whatwecanadd * wordinbits; if (leftovernumber > 0) { push_back(0); this.rlw.position = this.actualsizeinwords - 1; } } } /** * For experts: You want to add many zeroes or ones? This is the method you * use. * * @param v * the boolean value * @param number * the number */ @Override public void addStreamOfEmptyWords(final boolean v, int number) { if (number == 0) return; this.sizeinbits += number * wordinbits; if ((this.rlw.getRunningBit() != v) && (this.rlw.size() == 0)) { this.rlw.setRunningBit(v); } else if ((this.rlw.getNumberOfLiteralWords() != 0) || (this.rlw.getRunningBit() != v)) { push_back(0); this.rlw.position = this.actualsizeinwords - 1; if (v) this.rlw.setRunningBit(v); } final int runlen = this.rlw.getRunningLength(); final int whatwecanadd = number < RunningLengthWord32.largestrunninglengthcount - runlen ? number : RunningLengthWord32.largestrunninglengthcount - runlen; this.rlw.setRunningLength(runlen + whatwecanadd); number -= whatwecanadd; while (number >= RunningLengthWord32.largestrunninglengthcount) { push_back(0); this.rlw.position = this.actualsizeinwords - 1; if (v) this.rlw.setRunningBit(v); this.rlw.setRunningLength(RunningLengthWord32.largestrunninglengthcount); number -= RunningLengthWord32.largestrunninglengthcount; } if (number > 0) { push_back(0); this.rlw.position = this.actualsizeinwords - 1; if (v) this.rlw.setRunningBit(v); this.rlw.setRunningLength(number); } } /** * Same as addStreamOfLiteralWords, but the words are negated. * * @param data * the literal words * @param start * the starting point in the array * @param number * the number of literal words to add */ @Override public void addStreamOfNegatedLiteralWords(final int[] data, final int start, final int number) { int leftovernumber = number; while (leftovernumber > 0) { final int NumberOfLiteralWords = this.rlw.getNumberOfLiteralWords(); final int whatwecanadd = leftovernumber < RunningLengthWord32.largestliteralcount - NumberOfLiteralWords ? leftovernumber : RunningLengthWord32.largestliteralcount - NumberOfLiteralWords; this.rlw.setNumberOfLiteralWords(NumberOfLiteralWords + whatwecanadd); leftovernumber -= whatwecanadd; negative_push_back(data, start, whatwecanadd); this.sizeinbits += whatwecanadd * wordinbits; if (leftovernumber > 0) { push_back(0); this.rlw.position = this.actualsizeinwords - 1; } } } /** * Returns a new compressed bitmap containing the bitwise AND values of the * current bitmap with some other bitmap. * * The running time is proportional to the sum of the compressed sizes (as * reported by sizeInBytes()). * * If you are not planning on adding to the resulting bitmap, you may call the trim() * method to reduce memory usage. * * @param a * the other bitmap * @return the EWAH compressed bitmap */ @Override public EWAHCompressedBitmap32 and(final EWAHCompressedBitmap32 a) { final EWAHCompressedBitmap32 container = new EWAHCompressedBitmap32(); container .reserve(this.actualsizeinwords > a.actualsizeinwords ? this.actualsizeinwords : a.actualsizeinwords); andToContainer(a, container); return container; } /** * Computes new compressed bitmap containing the bitwise AND values of the * current bitmap with some other bitmap. * * The running time is proportional to the sum of the compressed sizes (as * reported by sizeInBytes()). * * @param a * the other bitmap * @param container * where we store the result */ /** * Computes new compressed bitmap containing the bitwise AND values of the * current bitmap with some other bitmap. * * The running time is proportional to the sum of the compressed sizes (as * reported by sizeInBytes()). * * @since 0.4.0 * @param a * the other bitmap * @param container * where we store the result */ public void andToContainer(final EWAHCompressedBitmap32 a, final BitmapStorage32 container) { final EWAHIterator32 i = a.getEWAHIterator(); final EWAHIterator32 j = getEWAHIterator(); final IteratingBufferedRunningLengthWord32 rlwi = new IteratingBufferedRunningLengthWord32(i); final IteratingBufferedRunningLengthWord32 rlwj = new IteratingBufferedRunningLengthWord32(j); while ((rlwi.size()>0) && (rlwj.size()>0)) { while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) { final boolean i_is_prey = rlwi.getRunningLength() < rlwj .getRunningLength(); final IteratingBufferedRunningLengthWord32 prey = i_is_prey ? rlwi : rlwj; final IteratingBufferedRunningLengthWord32 predator = i_is_prey ? rlwj : rlwi; if (predator.getRunningBit() == false) { container.addStreamOfEmptyWords(false, predator.getRunningLength()); prey.discardFirstWords(predator.getRunningLength()); predator.discardFirstWords(predator.getRunningLength()); } else { final int index = prey.discharge(container, predator.getRunningLength()); container.addStreamOfEmptyWords(false, predator.getRunningLength() - index); predator.discardFirstWords(predator.getRunningLength()); } } final int nbre_literal = Math.min(rlwi.getNumberOfLiteralWords(), rlwj.getNumberOfLiteralWords()); if (nbre_literal > 0) { for (int k = 0; k < nbre_literal; ++k) container.add(rlwi.getLiteralWordAt(k) & rlwj.getLiteralWordAt(k)); rlwi.discardFirstWords(nbre_literal); rlwj.discardFirstWords(nbre_literal); } } if (adjustContainerSizeWhenAggregating) { final boolean i_remains = rlwi.size() > 0; final IteratingBufferedRunningLengthWord32 remaining = i_remains ? rlwi : rlwj; remaining.dischargeAsEmpty(container); container.setSizeInBits(Math.max(sizeInBits(), a.sizeInBits())); } } /** * Returns the cardinality of the result of a bitwise AND of the values of the * current bitmap with some other bitmap. Avoids needing to allocate an * intermediate bitmap to hold the result of the OR. * * @param a * the other bitmap * @return the cardinality */ public int andCardinality(final EWAHCompressedBitmap32 a) { final BitCounter32 counter = new BitCounter32(); andToContainer(a, counter); return counter.getCount(); } /** * Returns a new compressed bitmap containing the bitwise AND NOT values of * the current bitmap with some other bitmap. * * The running time is proportional to the sum of the compressed sizes (as * reported by sizeInBytes()). * * If you are not planning on adding to the resulting bitmap, you may call the trim() * method to reduce memory usage. * * @param a * the other bitmap * @return the EWAH compressed bitmap */ @Override public EWAHCompressedBitmap32 andNot(final EWAHCompressedBitmap32 a) { final EWAHCompressedBitmap32 container = new EWAHCompressedBitmap32(); container .reserve(this.actualsizeinwords > a.actualsizeinwords ? this.actualsizeinwords : a.actualsizeinwords); andNotToContainer(a, container); return container; } /** * Returns a new compressed bitmap containing the bitwise AND NOT values of * the current bitmap with some other bitmap. * * The running time is proportional to the sum of the compressed sizes (as * reported by sizeInBytes()). * * @param a the other bitmap * @param container where we store the result */ public void andNotToContainer(final EWAHCompressedBitmap32 a, final BitmapStorage32 container) { final EWAHIterator32 i = getEWAHIterator(); final EWAHIterator32 j = a.getEWAHIterator(); final IteratingBufferedRunningLengthWord32 rlwi = new IteratingBufferedRunningLengthWord32(i); final IteratingBufferedRunningLengthWord32 rlwj = new IteratingBufferedRunningLengthWord32(j); while ((rlwi.size()>0) && (rlwj.size()>0)) { while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) { final boolean i_is_prey = rlwi.getRunningLength() < rlwj .getRunningLength(); final IteratingBufferedRunningLengthWord32 prey = i_is_prey ? rlwi : rlwj; final IteratingBufferedRunningLengthWord32 predator = i_is_prey ? rlwj : rlwi; if ( ((predator.getRunningBit() == true) && (i_is_prey)) || ((predator.getRunningBit() == false) && (!i_is_prey))){ container.addStreamOfEmptyWords(false, predator.getRunningLength()); prey.discardFirstWords(predator.getRunningLength()); predator.discardFirstWords(predator.getRunningLength()); } else if (i_is_prey) { int index = prey.discharge(container, predator.getRunningLength()); container.addStreamOfEmptyWords(false, predator.getRunningLength() - index); predator.discardFirstWords(predator.getRunningLength()); } else { int index = prey.dischargeNegated(container, predator.getRunningLength()); container.addStreamOfEmptyWords(true, predator.getRunningLength() - index); predator.discardFirstWords(predator.getRunningLength()); } } final int nbre_literal = Math.min(rlwi.getNumberOfLiteralWords(), rlwj.getNumberOfLiteralWords()); if (nbre_literal > 0) { for (int k = 0; k < nbre_literal; ++k) container.add(rlwi.getLiteralWordAt(k) & (~rlwj.getLiteralWordAt(k))); rlwi.discardFirstWords(nbre_literal); rlwj.discardFirstWords(nbre_literal); } } final boolean i_remains = rlwi.size()>0; final IteratingBufferedRunningLengthWord32 remaining = i_remains ? rlwi : rlwj; if(i_remains) remaining.discharge(container); else if (adjustContainerSizeWhenAggregating) remaining.dischargeAsEmpty(container); if (adjustContainerSizeWhenAggregating) container.setSizeInBits(Math.max(sizeInBits(), a.sizeInBits())); } /** * Returns the cardinality of the result of a bitwise AND NOT of the values of * the current bitmap with some other bitmap. Avoids needing to allocate an * intermediate bitmap to hold the result of the OR. * * @param a * the other bitmap * @return the cardinality */ public int andNotCardinality(final EWAHCompressedBitmap32 a) { final BitCounter32 counter = new BitCounter32(); andNotToContainer(a, counter); return counter.getCount(); } /** * reports the number of bits set to true. Running time is proportional to * compressed size (as reported by sizeInBytes). * * @return the number of bits set to true */ public int cardinality() { int counter = 0; final EWAHIterator32 i = new EWAHIterator32(this, this.actualsizeinwords); while (i.hasNext()) { RunningLengthWord32 localrlw = i.next(); if (localrlw.getRunningBit()) { counter += wordinbits * localrlw.getRunningLength(); } for (int j = 0; j < localrlw.getNumberOfLiteralWords(); ++j) { counter += Integer.bitCount(i.buffer()[i.literalWords() + j]); } } return counter; } /** * Clear any set bits and set size in bits back to 0 */ public void clear() { this.sizeinbits = 0; this.actualsizeinwords = 1; this.rlw.position = 0; // buffer is not fully cleared but any new set operations should overwrite // stale data this.buffer[0] = 0; } /* * @see java.lang.Object#clone() */ @Override public EWAHCompressedBitmap32 clone() throws java.lang.CloneNotSupportedException { final EWAHCompressedBitmap32 clone = (EWAHCompressedBitmap32) super.clone(); clone.buffer = this.buffer.clone(); clone.actualsizeinwords = this.actualsizeinwords; clone.sizeinbits = this.sizeinbits; return clone; } /** * Deserialize. * * @param in * the DataInput stream * @throws IOException * Signals that an I/O exception has occurred. */ public void deserialize(DataInput in) throws IOException { this.sizeinbits = in.readInt(); this.actualsizeinwords = in.readInt(); if (this.buffer.length < this.actualsizeinwords) { this.buffer = new int[this.actualsizeinwords]; } for (int k = 0; k < this.actualsizeinwords; ++k) this.buffer[k] = in.readInt(); this.rlw = new RunningLengthWord32(this, in.readInt()); } /** * Check to see whether the two compressed bitmaps contain the same set bits. * * @see java.lang.Object#equals(java.lang.Object) */ @Override public boolean equals(Object o) { if (o instanceof EWAHCompressedBitmap32) { try { this.xorToContainer((EWAHCompressedBitmap32) o, new NonEmptyVirtualStorage32()); return true; } catch (NonEmptyVirtualStorage32.NonEmptyException e) { return false; } } return false; } /** * For experts: You want to add many zeroes or ones faster? * * This method does not update sizeinbits. * * @param v * the boolean value * @param number * the number (must be greater than 0) */ private void fastaddStreamOfEmptyWords(final boolean v, int number) { if ((this.rlw.getRunningBit() != v) && (this.rlw.size() == 0)) { this.rlw.setRunningBit(v); } else if ((this.rlw.getNumberOfLiteralWords() != 0) || (this.rlw.getRunningBit() != v)) { push_back(0); this.rlw.position = this.actualsizeinwords - 1; if (v) this.rlw.setRunningBit(v); } final int runlen = this.rlw.getRunningLength(); final int whatwecanadd = number < RunningLengthWord32.largestrunninglengthcount - runlen ? number : RunningLengthWord32.largestrunninglengthcount - runlen; this.rlw.setRunningLength(runlen + whatwecanadd); number -= whatwecanadd; while (number >= RunningLengthWord32.largestrunninglengthcount) { push_back(0); this.rlw.position = this.actualsizeinwords - 1; if (v) this.rlw.setRunningBit(v); this.rlw.setRunningLength(RunningLengthWord32.largestrunninglengthcount); number -= RunningLengthWord32.largestrunninglengthcount; } if (number > 0) { push_back(0); this.rlw.position = this.actualsizeinwords - 1; if (v) this.rlw.setRunningBit(v); this.rlw.setRunningLength(number); } } /** * Gets an EWAHIterator over the data. This is a customized iterator which * iterates over run length word. For experts only. * * @return the EWAHIterator */ public EWAHIterator32 getEWAHIterator() { return new EWAHIterator32(this, this.actualsizeinwords); } /** * @return the IteratingRLW iterator corresponding to this bitmap */ public IteratingRLW32 getIteratingRLW() { return new IteratingBufferedRunningLengthWord32(this); } /** * get the locations of the true values as one vector. (may use more memory * than iterator()) * * @return the positions */ public List getPositions() { final ArrayList v = new ArrayList(); final EWAHIterator32 i = new EWAHIterator32(this, this.actualsizeinwords); int pos = 0; while (i.hasNext()) { RunningLengthWord32 localrlw = i.next(); if (localrlw.getRunningBit()) { for (int j = 0; j < localrlw.getRunningLength(); ++j) { for (int c = 0; c < wordinbits; ++c) v.add(new Integer(pos++)); } } else { pos += wordinbits * localrlw.getRunningLength(); } for (int j = 0; j < localrlw.getNumberOfLiteralWords(); ++j) { int data = i.buffer()[i.literalWords() + j]; while (data != 0) { final int ntz = Integer.numberOfTrailingZeros(data); data ^= (1 << ntz); v.add(new Integer(ntz + pos)); } pos += wordinbits; } } while ((v.size() > 0) && (v.get(v.size() - 1).intValue() >= this.sizeinbits)) v.remove(v.size() - 1); return v; } /** * Returns a customized hash code (based on Karp-Rabin). Naturally, if the * bitmaps are equal, they will hash to the same value. * */ @Override public int hashCode() { int karprabin = 0; final int B = 31; final EWAHIterator32 i = new EWAHIterator32(this, this.actualsizeinwords); while( i.hasNext() ) { i.next(); if (i.rlw.getRunningBit() == true) { karprabin += B * karprabin + i.rlw.getRunningLength(); } for (int k = 0; k < i.rlw.getNumberOfLiteralWords(); ++k) { karprabin += B * karprabin + this.buffer[k + i.literalWords()]; } } return karprabin; } /** * Return true if the two EWAHCompressedBitmap have both at least one true bit * in the same position. Equivalently, you could call "and" and check whether * there is a set bit, but intersects will run faster if you don't need the * result of the "and" operation. * * @param a * the other bitmap * @return whether they intersect */ public boolean intersects(final EWAHCompressedBitmap32 a) { NonEmptyVirtualStorage32 nevs = new NonEmptyVirtualStorage32(); try { this.andToContainer(a, nevs); } catch (NonEmptyVirtualStorage32.NonEmptyException nee) { return true; } return false; } /** * Iterator over the set bits (this is what most people will want to use to * browse the content if they want an iterator). The location of the set bits * is returned, in increasing order. * * @return the int iterator */ public IntIterator intIterator() { return new IntIteratorImpl32( new EWAHIterator32(this, this.actualsizeinwords)); } /** * iterate over the positions of the true values. This is similar to * intIterator(), but it uses Java generics. * * @return the iterator */ @Override public Iterator iterator() { return new Iterator() { @Override public boolean hasNext() { return this.under.hasNext(); } @Override public Integer next() { return new Integer(this.under.next()); } @Override public void remove() { throw new UnsupportedOperationException("bitsets do not support remove"); } final private IntIterator under = intIterator(); }; } /** * For internal use. * * @param data * the array of words to be added * @param start * the starting point * @param number * the number of words to add */ private void negative_push_back(final int[] data, final int start, final int number) { while (this.actualsizeinwords + number >= this.buffer.length) { final int oldbuffer[] = this.buffer; if(this.actualsizeinwords + number < 32768) this.buffer = new int[(this.actualsizeinwords + number) * 2]; else if ((this.actualsizeinwords + number) * 3 / 2 < this.actualsizeinwords + number) this.buffer = new int[Integer.MAX_VALUE]; else this.buffer = new int[(this.actualsizeinwords + number) * 3 / 2]; System.arraycopy(oldbuffer, 0, this.buffer, 0, oldbuffer.length); this.rlw.parent.buffer = this.buffer; } for (int k = 0; k < number; ++k) this.buffer[this.actualsizeinwords + k] = ~data[start + k]; this.actualsizeinwords += number; } /** * Negate (bitwise) the current bitmap. To get a negated copy, do * EWAHCompressedBitmap x= ((EWAHCompressedBitmap) mybitmap.clone()); x.not(); * * The running time is proportional to the compressed size (as reported by * sizeInBytes()). * */ @Override public void not() { final EWAHIterator32 i = new EWAHIterator32(this, this.actualsizeinwords); if (!i.hasNext()) return; while (true) { final RunningLengthWord32 rlw1 = i.next(); rlw1.setRunningBit(!rlw1.getRunningBit()); for (int j = 0; j < rlw1.getNumberOfLiteralWords(); ++j) { i.buffer()[i.literalWords() + j] = ~i.buffer()[i.literalWords() + j]; } if (!i.hasNext()) {// must potentially adjust the last literal word final int usedbitsinlast = this.sizeinbits % wordinbits; if (usedbitsinlast == 0) return; if (rlw1.getNumberOfLiteralWords() == 0) { if((rlw1.getRunningLength()>0) && (rlw1.getRunningBit())) { rlw1.setRunningLength(rlw1.getRunningLength()-1); this.addLiteralWord((~0) >>> (wordinbits - usedbitsinlast)); } return; } i.buffer()[i.literalWords() + rlw1.getNumberOfLiteralWords() - 1] &= ((~0) >>> (wordinbits - usedbitsinlast)); return; } } } /** * Returns a new compressed bitmap containing the bitwise OR values of the * current bitmap with some other bitmap. * * The running time is proportional to the sum of the compressed sizes (as * reported by sizeInBytes()). * * If you are not planning on adding to the resulting bitmap, you may call the trim() * method to reduce memory usage. * * @param a * the other bitmap * @return the EWAH compressed bitmap */ @Override public EWAHCompressedBitmap32 or(final EWAHCompressedBitmap32 a) { final EWAHCompressedBitmap32 container = new EWAHCompressedBitmap32(); container.reserve(this.actualsizeinwords + a.actualsizeinwords); orToContainer(a, container); return container; } /** * Computes the bitwise or between the current bitmap and the bitmap "a". * Stores the result in the container. * * @param a * the other bitmap * @param container * where we store the result */ public void orToContainer(final EWAHCompressedBitmap32 a, final BitmapStorage32 container) { final EWAHIterator32 i = a.getEWAHIterator(); final EWAHIterator32 j = getEWAHIterator(); final IteratingBufferedRunningLengthWord32 rlwi = new IteratingBufferedRunningLengthWord32(i); final IteratingBufferedRunningLengthWord32 rlwj = new IteratingBufferedRunningLengthWord32(j); while ((rlwi.size()>0) && (rlwj.size()>0)) { while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) { final boolean i_is_prey = rlwi.getRunningLength() < rlwj .getRunningLength(); final IteratingBufferedRunningLengthWord32 prey = i_is_prey ? rlwi : rlwj; final IteratingBufferedRunningLengthWord32 predator = i_is_prey ? rlwj : rlwi; if (predator.getRunningBit() == true) { container.addStreamOfEmptyWords(true, predator.getRunningLength()); prey.discardFirstWords(predator.getRunningLength()); predator.discardFirstWords(predator.getRunningLength()); } else { int index = prey.discharge(container, predator.getRunningLength()); container.addStreamOfEmptyWords(false, predator.getRunningLength() - index); predator.discardFirstWords(predator.getRunningLength()); } } final int nbre_literal = Math.min(rlwi.getNumberOfLiteralWords(), rlwj.getNumberOfLiteralWords()); if (nbre_literal > 0) { for (int k = 0; k < nbre_literal; ++k) { container.add(rlwi.getLiteralWordAt(k) | rlwj.getLiteralWordAt(k)); } rlwi.discardFirstWords(nbre_literal); rlwj.discardFirstWords(nbre_literal); } } final boolean i_remains = rlwi.size()>0; final IteratingBufferedRunningLengthWord32 remaining = i_remains ? rlwi : rlwj; remaining.discharge(container); container.setSizeInBits(Math.max(sizeInBits(), a.sizeInBits())); } /** * Returns the cardinality of the result of a bitwise OR of the values of the * current bitmap with some other bitmap. Avoids needing to allocate an * intermediate bitmap to hold the result of the OR. * * @param a * the other bitmap * @return the cardinality */ public int orCardinality(final EWAHCompressedBitmap32 a) { final BitCounter32 counter = new BitCounter32(); orToContainer(a, counter); return counter.getCount(); } /** * For internal use. * * @param data * the word to be added */ private void push_back(final int data) { if (this.actualsizeinwords == this.buffer.length) { final int oldbuffer[] = this.buffer; if(oldbuffer.length < 32768) this.buffer = new int[oldbuffer.length * 2]; else if (oldbuffer.length * 3 / 2 < oldbuffer.length) this.buffer = new int[Integer.MAX_VALUE]; else this.buffer = new int[oldbuffer.length * 3 / 2]; System.arraycopy(oldbuffer, 0, this.buffer, 0, oldbuffer.length); this.rlw.parent.buffer = this.buffer; } this.buffer[this.actualsizeinwords++] = data; } /** * For internal use. * * @param data * the array of words to be added * @param start * the starting point * @param number * the number of words to add */ private void push_back(final int[] data, final int start, final int number) { if (this.actualsizeinwords + number >= this.buffer.length) { final int oldbuffer[] = this.buffer; if(this.actualsizeinwords + number < 32768) this.buffer = new int[(this.actualsizeinwords + number) * 2]; else if((this.actualsizeinwords + number) * 3 / 2 < this.actualsizeinwords + number) //overflow this.buffer = new int[Integer.MAX_VALUE]; else this.buffer = new int[(this.actualsizeinwords + number) * 3 / 2]; System.arraycopy(oldbuffer, 0, this.buffer, 0, oldbuffer.length); this.rlw.parent.buffer = this.buffer; } System.arraycopy(data, start, this.buffer, this.actualsizeinwords, number); this.actualsizeinwords += number; } /* * @see java.io.Externalizable#readExternal(java.io.ObjectInput) */ @Override public void readExternal(ObjectInput in) throws IOException { deserialize(in); } /** * For internal use (trading off memory for speed). * * @param size * the number of words to allocate * @return True if the operation was a success. */ private boolean reserve(final int size) { if (size > this.buffer.length) { final int oldbuffer[] = this.buffer; this.buffer = new int[size]; System.arraycopy(oldbuffer, 0, this.buffer, 0, oldbuffer.length); this.rlw.parent.buffer = this.buffer; return true; } return false; } /** * Serialize. * * @param out * the DataOutput stream * @throws IOException * Signals that an I/O exception has occurred. */ public void serialize(DataOutput out) throws IOException { out.writeInt(this.sizeinbits); out.writeInt(this.actualsizeinwords); for (int k = 0; k < this.actualsizeinwords; ++k) out.writeInt(this.buffer[k]); out.writeInt(this.rlw.position); } /** * Report the size required to serialize this bitmap * * @return the size in bytes */ public int serializedSizeInBytes() { return this.sizeInBytes() + 3 * 4; } /** * Query the value of a single bit. Relying on this method when speed is * needed is discouraged. The complexity is linear with the size of the * bitmap. * * (This implementation is based on zhenjl's Go version of JavaEWAH.) * * @param i * the bit we are interested in * @return whether the bit is set to true */ public boolean get(final int i) { if ((i < 0) || (i >= this.sizeinbits)) return false; int WordChecked = 0; final IteratingRLW32 j = getIteratingRLW(); final int wordi = i / wordinbits; while (WordChecked <= wordi) { WordChecked += j.getRunningLength(); if (wordi < WordChecked) { return j.getRunningBit(); } if (wordi < WordChecked + j.getNumberOfLiteralWords()) { final int w = j.getLiteralWordAt(wordi - WordChecked); return (w & (1 << i)) != 0; } WordChecked += j.getNumberOfLiteralWords(); j.next(); } return false; } /** * Set the bit at position i to true, the bits must be set in (strictly) increasing * order. For example, set(15) and then set(7) will fail. You must do set(7) * and then set(15). * * @param i * the index * @return true if the value was set (always true when i is greater or equal to sizeInBits()). * @throws IndexOutOfBoundsException * if i is negative or greater than Integer.MAX_VALUE - 32 */ public boolean set(final int i) { if ((i > Integer.MAX_VALUE - wordinbits) || (i < 0)) throw new IndexOutOfBoundsException("Set values should be between 0 and " + (Integer.MAX_VALUE - wordinbits)); if (i < this.sizeinbits) return false; // distance in words: final int dist = (i + wordinbits) / wordinbits - (this.sizeinbits + wordinbits - 1) / wordinbits; this.sizeinbits = i + 1; if (dist > 0) {// easy if (dist > 1) fastaddStreamOfEmptyWords(false, dist - 1); addLiteralWord(1 << (i % wordinbits)); return true; } if (this.rlw.getNumberOfLiteralWords() == 0) { this.rlw.setRunningLength(this.rlw.getRunningLength() - 1); addLiteralWord(1 << (i % wordinbits)); return true; } this.buffer[this.actualsizeinwords - 1] |= 1 << (i % wordinbits); if (this.buffer[this.actualsizeinwords - 1] == ~0) { this.buffer[this.actualsizeinwords - 1] = 0; --this.actualsizeinwords; this.rlw.setNumberOfLiteralWords(this.rlw.getNumberOfLiteralWords() - 1); // next we add one clean word addEmptyWord(true); } return true; } /** * Set the size in bits. This does not change the compressed bitmap. * */ @Override public void setSizeInBits(final int size) { if((size+EWAHCompressedBitmap32.wordinbits-1)/EWAHCompressedBitmap32.wordinbits!= (this.sizeinbits+EWAHCompressedBitmap32.wordinbits-1)/EWAHCompressedBitmap32.wordinbits) throw new RuntimeException("You can only reduce the size of the bitmap within the scope of the last word. To extend the bitmap, please call setSizeInbits(int,boolean): "+size+" "+this.sizeinbits); this.sizeinbits = size; } /** * Change the reported size in bits of the *uncompressed* bitmap represented * by this compressed bitmap. It may change the underlying compressed bitmap. * It is not possible to reduce the sizeInBits, but * it can be extended. The new bits are set to false or true depending on the * value of defaultvalue. * * @param size * the size in bits * @param defaultvalue * the default boolean value * @return true if the update was possible */ public boolean setSizeInBits(final int size, final boolean defaultvalue) { if (size < this.sizeinbits) return false; if (defaultvalue == false) extendEmptyBits(this, this.sizeinbits, size); else { // next bit could be optimized while (((this.sizeinbits % wordinbits) != 0) && (this.sizeinbits < size)) { this.set(this.sizeinbits); } this.addStreamOfEmptyWords(defaultvalue, (size / wordinbits) - this.sizeinbits / wordinbits); // next bit could be optimized while (this.sizeinbits < size) { this.set(this.sizeinbits); } } this.sizeinbits = size; return true; } /** * Returns the size in bits of the *uncompressed* bitmap represented by this * compressed bitmap. Initially, the sizeInBits is zero. It is extended * automatically when you set bits to true. * * @return the size in bits */ @Override public int sizeInBits() { return this.sizeinbits; } /** * Report the *compressed* size of the bitmap (equivalent to memory usage, * after accounting for some overhead). * * @return the size in bytes */ @Override public int sizeInBytes() { return this.actualsizeinwords * (wordinbits / 8); } /** * Populate an array of (sorted integers) corresponding to the location of the * set bits. * * @return the array containing the location of the set bits */ public int[] toArray() { int[] ans = new int[this.cardinality()]; int inanspos = 0; int pos = 0; final EWAHIterator32 i = new EWAHIterator32(this, this.actualsizeinwords); while (i.hasNext()) { RunningLengthWord32 localrlw = i.next(); if (localrlw.getRunningBit()) { for (int j = 0; j < localrlw.getRunningLength(); ++j) { for (int c = 0; c < wordinbits; ++c) { ans[inanspos++] = pos++; } } } else { pos += wordinbits * localrlw.getRunningLength(); } for (int j = 0; j < localrlw.getNumberOfLiteralWords(); ++j) { int data = i.buffer()[i.literalWords() + j]; if (!usetrailingzeros) { for (int c = 0; c < wordinbits; ++c) { if ((data & (1 << c)) != 0) ans[inanspos++] = c + pos; } pos += wordinbits; } else { while (data != 0) { final int ntz = Integer.numberOfTrailingZeros(data); data ^= (1l << ntz); ans[inanspos++] = ntz + pos; } pos += wordinbits; } } } return ans; } /** * A more detailed string describing the bitmap (useful for debugging). * * @return the string */ public String toDebugString() { String ans = " EWAHCompressedBitmap, size in bits = " + this.sizeinbits + " size in words = " + this.actualsizeinwords + "\n"; final EWAHIterator32 i = new EWAHIterator32(this, this.actualsizeinwords); while (i.hasNext()) { RunningLengthWord32 localrlw = i.next(); if (localrlw.getRunningBit()) { ans += localrlw.getRunningLength() + " 1x11\n"; } else { ans += localrlw.getRunningLength() + " 0x00\n"; } ans += localrlw.getNumberOfLiteralWords() + " dirties\n"; for (int j = 0; j < localrlw.getNumberOfLiteralWords(); ++j) { int data = i.buffer()[i.literalWords() + j]; ans += "\t" + data + "\n"; } } return ans; } /** * A string describing the bitmap. * * @return the string */ @Override public String toString() { StringBuffer answer = new StringBuffer(); IntIterator i = this.intIterator(); answer.append("{"); if (i.hasNext()) answer.append(i.next()); while (i.hasNext()) { answer.append(","); answer.append(i.next()); } answer.append("}"); return answer.toString(); } /** * swap the content of the bitmap with another. * * @param other * bitmap to swap with */ public void swap(final EWAHCompressedBitmap32 other) { int[] tmp = this.buffer; this.buffer = other.buffer; other.buffer = tmp; int tmp2 = this.rlw.position; this.rlw.position = other.rlw.position; other.rlw.position = tmp2; int tmp3 = this.actualsizeinwords; this.actualsizeinwords = other.actualsizeinwords; other.actualsizeinwords = tmp3; int tmp4 = this.sizeinbits; this.sizeinbits = other.sizeinbits; other.sizeinbits = tmp4; } /** * Reduce the internal buffer to its minimal allowable size (given * by this.actualsizeinwords). This can free memory. */ public void trim() { this.buffer = Arrays.copyOf(this.buffer, this.actualsizeinwords); } /* * @see java.io.Externalizable#writeExternal(java.io.ObjectOutput) */ @Override public void writeExternal(ObjectOutput out) throws IOException { serialize(out); } /** * Returns a new compressed bitmap containing the bitwise XOR values of the * current bitmap with some other bitmap. * * The running time is proportional to the sum of the compressed sizes (as * reported by sizeInBytes()). * * If you are not planning on adding to the resulting bitmap, you may call the trim() * method to reduce memory usage. * * @param a * the other bitmap * @return the EWAH compressed bitmap */ @Override public EWAHCompressedBitmap32 xor(final EWAHCompressedBitmap32 a) { final EWAHCompressedBitmap32 container = new EWAHCompressedBitmap32(); container.reserve(this.actualsizeinwords + a.actualsizeinwords); xorToContainer(a, container); return container; } /** * Computes a new compressed bitmap containing the bitwise XOR values of the * current bitmap with some other bitmap. * * The running time is proportional to the sum of the compressed sizes (as * reported by sizeInBytes()). * * @param a * the other bitmap * @param container * where we store the result */ public void xorToContainer(final EWAHCompressedBitmap32 a, final BitmapStorage32 container) { final EWAHIterator32 i = a.getEWAHIterator(); final EWAHIterator32 j = getEWAHIterator(); final IteratingBufferedRunningLengthWord32 rlwi = new IteratingBufferedRunningLengthWord32(i); final IteratingBufferedRunningLengthWord32 rlwj = new IteratingBufferedRunningLengthWord32(j); while ((rlwi.size()>0) && (rlwj.size()>0)) { while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) { final boolean i_is_prey = rlwi.getRunningLength() < rlwj .getRunningLength(); final IteratingBufferedRunningLengthWord32 prey = i_is_prey ? rlwi : rlwj; final IteratingBufferedRunningLengthWord32 predator = i_is_prey ? rlwj : rlwi; if (predator.getRunningBit() == false) { int index = prey.discharge(container, predator.getRunningLength()); container.addStreamOfEmptyWords(false, predator.getRunningLength() - index); predator.discardFirstWords(predator.getRunningLength()); } else { int index = prey.dischargeNegated(container, predator.getRunningLength()); container.addStreamOfEmptyWords(true, predator.getRunningLength() - index); predator.discardFirstWords(predator.getRunningLength()); } } final int nbre_literal = Math.min(rlwi.getNumberOfLiteralWords(), rlwj.getNumberOfLiteralWords()); if (nbre_literal > 0) { for (int k = 0; k < nbre_literal; ++k) container.add(rlwi.getLiteralWordAt(k) ^ rlwj.getLiteralWordAt(k)); rlwi.discardFirstWords(nbre_literal); rlwj.discardFirstWords(nbre_literal); } } final boolean i_remains = rlwi.size()>0; final IteratingBufferedRunningLengthWord32 remaining = i_remains ? rlwi : rlwj; remaining.discharge(container); container.setSizeInBits(Math.max(sizeInBits(), a.sizeInBits())); } /** * Returns the cardinality of the result of a bitwise XOR of the values of the * current bitmap with some other bitmap. Avoids needing to allocate an * intermediate bitmap to hold the result of the OR. * * @param a * the other bitmap * @return the cardinality */ public int xorCardinality(final EWAHCompressedBitmap32 a) { final BitCounter32 counter = new BitCounter32(); xorToContainer(a, counter); return counter.getCount(); } /** * For internal use. Computes the bitwise and of the provided bitmaps and * stores the result in the container. * * @param container * where the result is stored * @param bitmaps * bitmaps to AND */ public static void andWithContainer(final BitmapStorage32 container, final EWAHCompressedBitmap32... bitmaps) { if(bitmaps.length == 1) throw new IllegalArgumentException("Need at least one bitmap"); if(bitmaps.length == 2) { bitmaps[0].andToContainer(bitmaps[1],container); return; } EWAHCompressedBitmap32 answer = new EWAHCompressedBitmap32(); EWAHCompressedBitmap32 tmp = new EWAHCompressedBitmap32(); bitmaps[0].andToContainer(bitmaps[1], answer); for(int k = 2; k < bitmaps.length - 1; ++k) { answer.andToContainer(bitmaps[k], tmp); tmp.swap(answer); tmp.clear(); } answer.andToContainer(bitmaps[bitmaps.length - 1], container); } /** * Returns a new compressed bitmap containing the bitwise AND values of the * provided bitmaps. * * It may or may not be faster than doing the aggregation two-by-two (A.and(B).and(C)). * * If only one bitmap is provided, it is returned as is. * * If you are not planning on adding to the resulting bitmap, you may call the trim() * method to reduce memory usage. * * @param bitmaps * bitmaps to AND together * @return result of the AND */ public static EWAHCompressedBitmap32 and( final EWAHCompressedBitmap32... bitmaps) { if(bitmaps.length == 1) return bitmaps[0]; if(bitmaps.length == 2) return bitmaps[0].and(bitmaps[1]); EWAHCompressedBitmap32 answer = new EWAHCompressedBitmap32(); EWAHCompressedBitmap32 tmp = new EWAHCompressedBitmap32(); bitmaps[0].andToContainer(bitmaps[1], answer); for(int k = 2; k < bitmaps.length; ++k) { answer.andToContainer(bitmaps[k], tmp); tmp.swap(answer); tmp.clear(); } return answer; } /** * Returns the cardinality of the result of a bitwise AND of the values of the * provided bitmaps. Avoids needing to allocate an intermediate bitmap to hold * the result of the AND. * * @param bitmaps * bitmaps to AND * @return the cardinality */ public static int andCardinality(final EWAHCompressedBitmap32... bitmaps) { if(bitmaps.length == 1) return bitmaps[0].cardinality(); final BitCounter32 counter = new BitCounter32(); andWithContainer(counter, bitmaps); return counter.getCount(); } /** * Return a bitmap with the bit set to true at the given * positions. The positions should be given in sorted order. * * (This is a convenience method.) * * @since 0.4.5 * @param setbits list of set bit positions * @return the bitmap */ public static EWAHCompressedBitmap32 bitmapOf(int ... setbits) { EWAHCompressedBitmap32 a = new EWAHCompressedBitmap32(); for (int k : setbits) a.set(k); return a; } /** * For internal use. This simply adds a stream of words made of zeroes so that * we pad to the desired size. * * @param storage * bitmap to extend * @param currentSize * current size (in bits) * @param newSize * new desired size (in bits) */ private static void extendEmptyBits(final BitmapStorage32 storage, final int currentSize, final int newSize) { final int currentLeftover = currentSize % wordinbits; final int finalLeftover = newSize % wordinbits; storage.addStreamOfEmptyWords(false, (newSize / wordinbits) - currentSize / wordinbits + (finalLeftover != 0 ? 1 : 0) + (currentLeftover != 0 ? -1 : 0)); } /** * For internal use. Computes the bitwise or of the provided bitmaps and * stores the result in the container. * @param container where store the result * @param bitmaps to be aggregated */ public static void orWithContainer(final BitmapStorage32 container, final EWAHCompressedBitmap32... bitmaps) { if (bitmaps.length < 2) throw new IllegalArgumentException("You should provide at least two bitmaps, provided "+bitmaps.length); int size = 0; int sinbits = 0; for (EWAHCompressedBitmap32 b : bitmaps) { size += b.sizeInBytes(); if (sinbits < b.sizeInBits()) sinbits = b.sizeInBits(); } if (size * 8 > sinbits) { FastAggregation32.bufferedorWithContainer(container, 65536, bitmaps); } else { FastAggregation32.orToContainer(container, bitmaps); } } /** * For internal use. Computes the bitwise xor of the provided bitmaps and * stores the result in the container. * @param container where store the result * @param bitmaps to be aggregated */ public static void xorWithContainer(final BitmapStorage32 container, final EWAHCompressedBitmap32... bitmaps) { if (bitmaps.length < 2) throw new IllegalArgumentException("You should provide at least two bitmaps, provided "+bitmaps.length); int size = 0; int sinbits = 0; for (EWAHCompressedBitmap32 b : bitmaps) { size += b.sizeInBytes(); if (sinbits < b.sizeInBits()) sinbits = b.sizeInBits(); } if (size * 8 > sinbits) { FastAggregation32.bufferedxorWithContainer(container, 65536, bitmaps); } else { FastAggregation32.xorToContainer(container, bitmaps); } } /** * Returns a new compressed bitmap containing the bitwise OR values of the * provided bitmaps. This is typically faster than doing the aggregation * two-by-two (A.or(B).or(C).or(D)). * * If only one bitmap is provided, it is returned as is. * * If you are not planning on adding to the resulting bitmap, you may call the trim() * method to reduce memory usage. * * @param bitmaps * bitmaps to OR together * @return result of the OR */ public static EWAHCompressedBitmap32 or( final EWAHCompressedBitmap32... bitmaps) { if(bitmaps.length == 1) return bitmaps[0]; final EWAHCompressedBitmap32 container = new EWAHCompressedBitmap32(); int largestSize = 0; for (EWAHCompressedBitmap32 bitmap : bitmaps) { largestSize = Math.max(bitmap.actualsizeinwords, largestSize); } container.reserve((int) (largestSize * 1.5)); orWithContainer(container, bitmaps); return container; } /** * Returns a new compressed bitmap containing the bitwise XOR values of the * provided bitmaps. This is typically faster than doing the aggregation * two-by-two (A.xor(B).xor(C).xor(D)). * * If only one bitmap is provided, it is returned as is. * * If you are not planning on adding to the resulting bitmap, you may call the trim() * method to reduce memory usage. * * @param bitmaps * bitmaps to XOR together * @return result of the XOR */ public static EWAHCompressedBitmap32 xor( final EWAHCompressedBitmap32... bitmaps) { if(bitmaps.length == 1) return bitmaps[0]; final EWAHCompressedBitmap32 container = new EWAHCompressedBitmap32(); int largestSize = 0; for (EWAHCompressedBitmap32 bitmap : bitmaps) { largestSize = Math.max(bitmap.actualsizeinwords, largestSize); } container.reserve((int) (largestSize * 1.5)); xorWithContainer(container, bitmaps); return container; } /** * Returns the cardinality of the result of a bitwise OR of the values of the * provided bitmaps. Avoids needing to allocate an intermediate bitmap to hold * the result of the OR. * * @param bitmaps * bitmaps to OR * @return the cardinality */ public static int orCardinality(final EWAHCompressedBitmap32... bitmaps) { if(bitmaps.length == 1) return bitmaps[0].cardinality(); final BitCounter32 counter = new BitCounter32(); orWithContainer(counter, bitmaps); return counter.getCount(); } /** The actual size in words. */ int actualsizeinwords = 1; /** The buffer (array of 32-bit words) */ int buffer[] = null; /** The current (last) running length word. */ RunningLengthWord32 rlw = null; /** sizeinbits: number of bits in the (uncompressed) bitmap. */ int sizeinbits = 0; /** * The Constant defaultbuffersize: default memory allocation when the object * is constructed. */ static final int defaultbuffersize = 4; /** optimization option **/ public static final boolean usetrailingzeros = true; /** whether we adjust after some aggregation by adding in zeroes **/ public static final boolean adjustContainerSizeWhenAggregating = true; /** The Constant wordinbits represents the number of bits in a int. */ public static final int wordinbits = 32; } javaewah-JavaEWAH-0.7.9/src/main/java/com/googlecode/javaewah32/EWAHIterator32.java000066400000000000000000000044351224043567000273740ustar00rootroot00000000000000package com.googlecode.javaewah32; /* * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser * Licensed under the Apache License, Version 2.0. */ /** * The class EWAHIterator represents a special type of * efficient iterator iterating over (uncompressed) words of bits. * * @author Daniel Lemire * @since 0.5.0 * */ public final class EWAHIterator32 implements Cloneable { /** * Instantiates a new eWAH iterator. * * @param a the array of words * @param sizeinwords the number of words that are significant in the array of words */ public EWAHIterator32(final EWAHCompressedBitmap32 a, final int sizeinwords) { this.rlw = new RunningLengthWord32(a, 0); this.size = sizeinwords; this.pointer = 0; } /** * Allow expert developers to instantiate an EWAHIterator. * * @param bitmap we want to iterate over * @return an iterator */ public static EWAHIterator32 getEWAHIterator(EWAHCompressedBitmap32 bitmap) { return bitmap.getEWAHIterator(); } /** * Access to the array of words * * @return the int[] */ public int[] buffer() { return this.rlw.parent.buffer; } /** * Position of the literal words represented by this running length word. * * @return the int */ public int literalWords() { return this.pointer - this.rlw.getNumberOfLiteralWords(); } /** * Checks for next. * * @return true, if successful */ public boolean hasNext() { return this.pointer < this.size; } /** * Next running length word. * * @return the running length word */ public RunningLengthWord32 next() { this.rlw.position = this.pointer; this.pointer += this.rlw.getNumberOfLiteralWords() + 1; return this.rlw; } @Override public EWAHIterator32 clone() throws CloneNotSupportedException { EWAHIterator32 ans = (EWAHIterator32) super.clone(); ans.rlw = this.rlw.clone(); ans.size = this.size; ans.pointer = this.pointer; return ans; } /** The pointer represent the location of the current running length * word in the array of words (embedded in the rlw attribute). */ int pointer; /** The current running length word. */ RunningLengthWord32 rlw; /** The size in words. */ int size; } javaewah-JavaEWAH-0.7.9/src/main/java/com/googlecode/javaewah32/FastAggregation32.java000066400000000000000000000305231224043567000302000ustar00rootroot00000000000000package com.googlecode.javaewah32; import java.util.Arrays; import java.util.Comparator; import java.util.PriorityQueue; /** * Fast algorithms to aggregate many bitmaps. These algorithms are just given as * reference. They may not be faster than the corresponding methods in the * EWAHCompressedBitmap class. * * @author Daniel Lemire * */ public class FastAggregation32 { /** * Compute the and aggregate using a temporary uncompressed bitmap. * @param bitmaps the source bitmaps * @param bufsize buffer size used during the computation in 64-bit words (per input bitmap) * @return the or aggregate. */ public static EWAHCompressedBitmap32 bufferedand(final int bufsize, final EWAHCompressedBitmap32... bitmaps) { EWAHCompressedBitmap32 answer = new EWAHCompressedBitmap32(); bufferedandWithContainer(answer,bufsize, bitmaps); return answer; } /** * Compute the and aggregate using a temporary uncompressed bitmap. * * @param container where the aggregate is written * @param bufsize buffer size used during the computation in 64-bit words (per input bitmap) * @param bitmaps the source bitmaps */ public static void bufferedandWithContainer(final BitmapStorage32 container,final int bufsize, final EWAHCompressedBitmap32... bitmaps) { java.util.LinkedList al = new java.util.LinkedList(); for (EWAHCompressedBitmap32 bitmap : bitmaps) { al.add(new IteratingBufferedRunningLengthWord32(bitmap)); } int[] hardbitmap = new int[bufsize*bitmaps.length]; for(IteratingRLW32 i : al) if (i.size() == 0) { al.clear(); break; } while (!al.isEmpty()) { Arrays.fill(hardbitmap, ~0); int effective = Integer.MAX_VALUE; for(IteratingRLW32 i : al) { int eff = IteratorAggregation32.inplaceand(hardbitmap, i); if (eff < effective) effective = eff; } for (int k = 0; k < effective; ++k) container.add(hardbitmap[k]); for(IteratingRLW32 i : al) if (i.size() == 0) { al.clear(); break; } } } /** * Compute the or aggregate using a temporary uncompressed bitmap. * @param bitmaps the source bitmaps * @param bufsize buffer size used during the computation in 64-bit words * @return the or aggregate. */ public static EWAHCompressedBitmap32 bufferedor(final int bufsize, final EWAHCompressedBitmap32... bitmaps) { EWAHCompressedBitmap32 answer = new EWAHCompressedBitmap32(); bufferedorWithContainer(answer, bufsize, bitmaps); return answer; } /** * Compute the or aggregate using a temporary uncompressed bitmap. * * @param container where the aggregate is written * @param bufsize buffer size used during the computation in 64-bit words * @param bitmaps the source bitmaps */ public static void bufferedorWithContainer(final BitmapStorage32 container,final int bufsize, final EWAHCompressedBitmap32... bitmaps) { int range = 0; EWAHCompressedBitmap32[] sbitmaps = bitmaps.clone(); Arrays.sort(sbitmaps, new Comparator() { @Override public int compare(EWAHCompressedBitmap32 a, EWAHCompressedBitmap32 b) { return b.sizeinbits - a.sizeinbits; } }); java.util.ArrayList al = new java.util.ArrayList(); for (EWAHCompressedBitmap32 bitmap : sbitmaps) { if (bitmap.sizeinbits > range) range = bitmap.sizeinbits; al.add(new IteratingBufferedRunningLengthWord32(bitmap)); } int[] hardbitmap = new int[bufsize]; int maxr = al.size(); while (maxr > 0) { int effective = 0; for (int k = 0; k < maxr; ++k) { if (al.get(k).size() > 0) { int eff = IteratorAggregation32.inplaceor(hardbitmap, al.get(k)); if (eff > effective) effective = eff; } else maxr = k; } for (int k = 0; k < effective; ++k) container.add(hardbitmap[k]); Arrays.fill(hardbitmap, 0); } container.setSizeInBits(range); } /** * Compute the xor aggregate using a temporary uncompressed bitmap. * @param bitmaps the source bitmaps * @param bufsize buffer size used during the computation in 64-bit words * @return the xor aggregate. */ public static EWAHCompressedBitmap32 bufferedxor(final int bufsize, final EWAHCompressedBitmap32... bitmaps) { EWAHCompressedBitmap32 answer = new EWAHCompressedBitmap32(); bufferedxorWithContainer(answer, bufsize, bitmaps); return answer; } /** * Compute the xor aggregate using a temporary uncompressed bitmap. * * @param container where the aggregate is written * @param bufsize buffer size used during the computation in 64-bit words * @param bitmaps the source bitmaps */ public static void bufferedxorWithContainer(final BitmapStorage32 container,final int bufsize, final EWAHCompressedBitmap32... bitmaps) { int range = 0; EWAHCompressedBitmap32[] sbitmaps = bitmaps.clone(); Arrays.sort(sbitmaps, new Comparator() { @Override public int compare(EWAHCompressedBitmap32 a, EWAHCompressedBitmap32 b) { return b.sizeinbits - a.sizeinbits; } }); java.util.ArrayList al = new java.util.ArrayList(); for (EWAHCompressedBitmap32 bitmap : sbitmaps) { if (bitmap.sizeinbits > range) range = bitmap.sizeinbits; al.add(new IteratingBufferedRunningLengthWord32(bitmap)); } int[] hardbitmap = new int[bufsize]; int maxr = al.size(); while (maxr > 0) { int effective = 0; for (int k = 0; k < maxr; ++k) { if (al.get(k).size() > 0) { int eff = IteratorAggregation32.inplacexor(hardbitmap, al.get(k)); if (eff > effective) effective = eff; } else maxr = k; } for (int k = 0; k < effective; ++k) container.add(hardbitmap[k]); Arrays.fill(hardbitmap, 0); } container.setSizeInBits(range); } /** * Uses a priority queue to compute the or aggregate. * @param container where we write the result * @param bitmaps to be aggregated */ public static void orToContainer(final BitmapStorage32 container, final EWAHCompressedBitmap32 ... bitmaps) { if(bitmaps.length < 2) throw new IllegalArgumentException("We need at least two bitmaps"); PriorityQueue pq = new PriorityQueue(bitmaps.length, new Comparator() { @Override public int compare(EWAHCompressedBitmap32 a, EWAHCompressedBitmap32 b) { return a.sizeInBytes() - b.sizeInBytes(); } }); for (EWAHCompressedBitmap32 x : bitmaps) { pq.add(x); } while (pq.size() > 2) { EWAHCompressedBitmap32 x1 = pq.poll(); EWAHCompressedBitmap32 x2 = pq.poll(); pq.add(x1.or(x2)); } pq.poll().orToContainer(pq.poll(), container); } /** * Uses a priority queue to compute the xor aggregate. * @param container where we write the result * @param bitmaps to be aggregated */ public static void xorToContainer(final BitmapStorage32 container, final EWAHCompressedBitmap32 ... bitmaps) { if(bitmaps.length < 2) throw new IllegalArgumentException("We need at least two bitmaps"); PriorityQueue pq = new PriorityQueue(bitmaps.length, new Comparator() { @Override public int compare(EWAHCompressedBitmap32 a, EWAHCompressedBitmap32 b) { return a.sizeInBytes() - b.sizeInBytes(); } }); for (EWAHCompressedBitmap32 x : bitmaps) { pq.add(x); } while (pq.size() > 2) { EWAHCompressedBitmap32 x1 = pq.poll(); EWAHCompressedBitmap32 x2 = pq.poll(); pq.add(x1.xor(x2)); } pq.poll().xorToContainer(pq.poll(), container); } /** * For internal use. Computes the bitwise or of the provided bitmaps and * stores the result in the container. (This used to be the default.) * * @deprecated use EWAHCompressedBitmap32.or instead * @since 0.4.0 * @param container where store the result * @param bitmaps to be aggregated */ @Deprecated public static void legacy_orWithContainer(final BitmapStorage32 container, final EWAHCompressedBitmap32... bitmaps) { if (bitmaps.length == 2) { // should be more efficient bitmaps[0].orToContainer(bitmaps[1], container); return; } // Sort the bitmaps in descending order by sizeinbits. We will exhaust the // sorted bitmaps from right to left. final EWAHCompressedBitmap32[] sortedBitmaps = bitmaps.clone(); Arrays.sort(sortedBitmaps, new Comparator() { @Override public int compare(EWAHCompressedBitmap32 a, EWAHCompressedBitmap32 b) { return a.sizeinbits < b.sizeinbits ? 1 : a.sizeinbits == b.sizeinbits ? 0 : -1; } }); final IteratingBufferedRunningLengthWord32[] rlws = new IteratingBufferedRunningLengthWord32[bitmaps.length]; int maxAvailablePos = 0; for (EWAHCompressedBitmap32 bitmap : sortedBitmaps) { EWAHIterator32 iterator = bitmap.getEWAHIterator(); if (iterator.hasNext()) { rlws[maxAvailablePos++] = new IteratingBufferedRunningLengthWord32( iterator); } } if (maxAvailablePos == 0) { // this never happens... container.setSizeInBits(0); return; } int maxSize = sortedBitmaps[0].sizeinbits; while (true) { int maxOneRl = 0; int minZeroRl = Integer.MAX_VALUE; int minSize = Integer.MAX_VALUE; int numEmptyRl = 0; for (int i = 0; i < maxAvailablePos; i++) { IteratingBufferedRunningLengthWord32 rlw = rlws[i]; int size = rlw.size(); if (size == 0) { maxAvailablePos = i; break; } minSize = Math.min(minSize, size); if (rlw.getRunningBit()) { int rl = rlw.getRunningLength(); maxOneRl = Math.max(maxOneRl, rl); minZeroRl = 0; if (rl == 0 && size > 0) { numEmptyRl++; } } else { int rl = rlw.getRunningLength(); minZeroRl = Math.min(minZeroRl, rl); if (rl == 0 && size > 0) { numEmptyRl++; } } } if (maxAvailablePos == 0) { break; } else if (maxAvailablePos == 1) { // only one bitmap is left so just write the rest of it out rlws[0].discharge(container); break; } if (maxOneRl > 0) { container.addStreamOfEmptyWords(true, maxOneRl); for (int i = 0; i < maxAvailablePos; i++) { IteratingBufferedRunningLengthWord32 rlw = rlws[i]; rlw.discardFirstWords(maxOneRl); } } else if (minZeroRl > 0) { container.addStreamOfEmptyWords(false, minZeroRl); for (int i = 0; i < maxAvailablePos; i++) { IteratingBufferedRunningLengthWord32 rlw = rlws[i]; rlw.discardFirstWords(minZeroRl); } } else { int index = 0; if (numEmptyRl == 1) { // if one rlw has literal words to process and the rest have a run of // 0's we can write them out here IteratingBufferedRunningLengthWord32 emptyRl = null; int minNonEmptyRl = Integer.MAX_VALUE; for (int i = 0; i < maxAvailablePos; i++) { IteratingBufferedRunningLengthWord32 rlw = rlws[i]; int rl = rlw.getRunningLength(); if (rl == 0) { assert emptyRl == null; emptyRl = rlw; } else { minNonEmptyRl = Math.min(minNonEmptyRl, rl); } } int wordsToWrite = minNonEmptyRl > minSize ? minSize : minNonEmptyRl; if (emptyRl != null) emptyRl.writeLiteralWords(wordsToWrite, container); index += wordsToWrite; } while (index < minSize) { int word = 0; for (int i = 0; i < maxAvailablePos; i++) { IteratingBufferedRunningLengthWord32 rlw = rlws[i]; if (rlw.getRunningLength() <= index) { word |= rlw.getLiteralWordAt(index - rlw.getRunningLength()); } } container.add(word); index++; } for (int i = 0; i < maxAvailablePos; i++) { IteratingBufferedRunningLengthWord32 rlw = rlws[i]; rlw.discardFirstWords(minSize); } } } container.setSizeInBits(maxSize); } } javaewah-JavaEWAH-0.7.9/src/main/java/com/googlecode/javaewah32/IntIteratorImpl32.java000066400000000000000000000043031224043567000302160ustar00rootroot00000000000000package com.googlecode.javaewah32; /* * Copyright 2012, Google Inc. * Licensed under the Apache License, Version 2.0. */ import static com.googlecode.javaewah32.EWAHCompressedBitmap32.wordinbits; import com.googlecode.javaewah.IntIterator; /** * The IntIteratorImpl32 is the 32 bit implementation of the IntIterator * interface, which efficiently returns the stream of integers represented by an * EWAHIterator32. * * @author Colby Ranger * @since 0.5.6 */ final class IntIteratorImpl32 implements IntIterator { private final EWAHIterator32 ewahIter; private final int[] ewahBuffer; private int position; private int runningLength; private int word; private int wordPosition; private int wordLength; private int literalPosition; private boolean hasnext; IntIteratorImpl32(EWAHIterator32 ewahIter) { this.ewahIter = ewahIter; this.ewahBuffer = ewahIter.buffer(); this.hasnext = this.moveToNext(); } public final boolean moveToNext() { while (!runningHasNext() && !literalHasNext()) { if (!this.ewahIter.hasNext()) { return false; } setRunningLengthWord(this.ewahIter.next()); } return true; } @Override public final boolean hasNext() { return this.hasnext; } @Override public final int next() { final int answer; if (runningHasNext()) { answer = this.position++; } else { final int bit = Long.numberOfTrailingZeros(this.word); this.word ^= (1l << bit); answer = this.literalPosition + bit; } this.hasnext = this.moveToNext(); return answer; } private final void setRunningLengthWord(RunningLengthWord32 rlw) { this.runningLength = wordinbits * rlw.getRunningLength() + this.position; if (!rlw.getRunningBit()) { this.position = this.runningLength; } this.wordPosition = this.ewahIter.literalWords(); this.wordLength = this.wordPosition + rlw.getNumberOfLiteralWords(); } private final boolean runningHasNext() { return this.position < this.runningLength; } private final boolean literalHasNext() { while (this.word == 0 && this.wordPosition < this.wordLength) { this.word = this.ewahBuffer[this.wordPosition++]; this.literalPosition = this.position; this.position += wordinbits; } return this.word != 0; } } javaewah-JavaEWAH-0.7.9/src/main/java/com/googlecode/javaewah32/IntIteratorOverIteratingRLW32.java000066400000000000000000000057441224043567000324760ustar00rootroot00000000000000package com.googlecode.javaewah32; import static com.googlecode.javaewah.EWAHCompressedBitmap.wordinbits; import com.googlecode.javaewah.IntIterator; /* * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser * Licensed under the Apache License, Version 2.0. */ /** * Implementation of an IntIterator over an IteratingRLW. * * */ public class IntIteratorOverIteratingRLW32 implements IntIterator { IteratingRLW32 parent; private int position; private int runningLength; private int word; private int wordPosition; private int wordLength; private int literalPosition; private boolean hasnext; /** * @param p iterator we wish to iterate over */ public IntIteratorOverIteratingRLW32(final IteratingRLW32 p) { this.parent = p; this.position = 0; setupForCurrentRunningLengthWord(); this.hasnext = moveToNext(); } /** * @return whether we could find another set bit; don't move if there is an unprocessed value */ private final boolean moveToNext() { while (!runningHasNext() && !literalHasNext()) { if (this.parent.next()) setupForCurrentRunningLengthWord(); else return false; } return true; } @Override public boolean hasNext() { return this.hasnext; } @Override public final int next() { final int answer; if (runningHasNext()) { answer = this.position++; } else { final int bit = Long.numberOfTrailingZeros(this.word); this.word ^= (1l << bit); answer = this.literalPosition + bit; } this.hasnext = this.moveToNext(); return answer; } private final void setupForCurrentRunningLengthWord() { this.runningLength = wordinbits * this.parent.getRunningLength() + this.position; if (!this.parent.getRunningBit()) { this.position = this.runningLength; } this.wordPosition = 0; this.wordLength = this.parent.getNumberOfLiteralWords(); } private final boolean runningHasNext() { return this.position < this.runningLength; } private final boolean literalHasNext() { while (this.word == 0 && this.wordPosition < this.wordLength) { this.word = this.parent.getLiteralWordAt(this.wordPosition++); this.literalPosition = this.position; this.position += wordinbits; } return this.word != 0; } } IteratingBufferedRunningLengthWord32.java000066400000000000000000000175611224043567000340130ustar00rootroot00000000000000javaewah-JavaEWAH-0.7.9/src/main/java/com/googlecode/javaewah32package com.googlecode.javaewah32; /* * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser * Licensed under the Apache License, Version 2.0. */ /** * Mostly for internal use. Similar to BufferedRunningLengthWord32, but automatically * advances to the next BufferedRunningLengthWord32 as words are discarded. * * @since 0.5.0 * @author Daniel Lemire and David McIntosh */ public final class IteratingBufferedRunningLengthWord32 implements IteratingRLW32, Cloneable { /** * Instantiates a new iterating buffered running length word. * * @param iterator iterator */ public IteratingBufferedRunningLengthWord32(final EWAHIterator32 iterator) { this.iterator = iterator; this.brlw = new BufferedRunningLengthWord32(this.iterator.next()); this.literalWordStartPosition = this.iterator.literalWords() + this.brlw.literalwordoffset; this.buffer = this.iterator.buffer(); } /** * Instantiates a new iterating buffered running length word. * @param bitmap over which we want to iterate * */ public IteratingBufferedRunningLengthWord32(final EWAHCompressedBitmap32 bitmap) { this(EWAHIterator32.getEWAHIterator(bitmap)); } /** * Discard first words, iterating to the next running length word if needed. * * @param x the x */ @Override public void discardFirstWords(int x) { while (x > 0) { if (this.brlw.RunningLength > x) { this.brlw.RunningLength -= x; return; } x -= this.brlw.RunningLength; this.brlw.RunningLength = 0; int toDiscard = x > this.brlw.NumberOfLiteralWords ? this.brlw.NumberOfLiteralWords : x; this.literalWordStartPosition += toDiscard; this.brlw.NumberOfLiteralWords -= toDiscard; x -= toDiscard; if ((x > 0) || (this.brlw.size() == 0)) { if (!this.iterator.hasNext()) { break; } this.brlw.reset(this.iterator.next()); this.literalWordStartPosition = this.iterator.literalWords(); // + this.brlw.literalwordoffset == 0; } } } /** * Write out up to max words, returns how many were written * @param container target for writes * @param max maximal number of writes * @return how many written */ public int discharge(BitmapStorage32 container, int max) { int index = 0; while ((index < max) && (size() > 0)) { // first run int pl = getRunningLength(); if (index + pl > max) { pl = max - index; } container.addStreamOfEmptyWords(getRunningBit(), pl); index += pl; int pd = getNumberOfLiteralWords(); if (pd + index > max) { pd = max - index; } writeLiteralWords(pd, container); discardFirstWords(pl+pd); index += pd; } return index; } /** * Write out up to max words (negated), returns how many were written * @param container target for writes * @param max maximal number of writes * @return how many written */ public int dischargeNegated(BitmapStorage32 container, int max) { int index = 0; while ((index < max) && (size() > 0)) { // first run int pl = getRunningLength(); if (index + pl > max) { pl = max - index; } container.addStreamOfEmptyWords(!getRunningBit(), pl); index += pl; int pd = getNumberOfLiteralWords(); if (pd + index > max) { pd = max - index; } writeNegatedLiteralWords(pd, container); discardFirstWords(pl+pd); index += pd; } return index; } /** * Move to the next RunningLengthWord * @return whether the move was possible */ @Override public boolean next() { if (!this.iterator.hasNext()) { this.brlw.NumberOfLiteralWords = 0; this.brlw.RunningLength = 0; return false; } this.brlw.reset(this.iterator.next()); this.literalWordStartPosition = this.iterator.literalWords(); // + this.brlw.literalwordoffset ==0 return true; } /** * Write out the remain words, transforming them to zeroes. * @param container target for writes */ public void dischargeAsEmpty(BitmapStorage32 container) { while(size()>0) { container.addStreamOfEmptyWords(false, size()); discardFirstWords(size()); } } /** * Write out the remaining words * @param container target for writes */ public void discharge(BitmapStorage32 container) { // fix the offset this.brlw.literalwordoffset = this.literalWordStartPosition - this.iterator.literalWords(); discharge(this.brlw, this.iterator, container); } /** * Get the nth literal word for the current running length word * @param index zero based index * @return the literal word */ @Override public int getLiteralWordAt(int index) { return this.buffer[this.literalWordStartPosition + index]; } /** * Gets the number of literal words for the current running length word. * * @return the number of literal words */ @Override public int getNumberOfLiteralWords() { return this.brlw.NumberOfLiteralWords; } /** * Gets the running bit. * * @return the running bit */ @Override public boolean getRunningBit() { return this.brlw.RunningBit; } /** * Gets the running length. * * @return the running length */ @Override public int getRunningLength() { return this.brlw.RunningLength; } /** * Size in uncompressed words of the current running length word. * * @return the int */ @Override public int size() { return this.brlw.size(); } /** * write the first N literal words to the target bitmap. Does not discard the words or perform iteration. * @param numWords number of words to be written * @param container where we write the data */ public void writeLiteralWords(int numWords, BitmapStorage32 container) { container.addStreamOfLiteralWords(this.buffer, this.literalWordStartPosition, numWords); } /** * write the first N literal words (negated) to the target bitmap. Does not discard the words or perform iteration. * @param numWords number of words to be written * @param container where we write the data */ public void writeNegatedLiteralWords(int numWords, BitmapStorage32 container) { container.addStreamOfNegatedLiteralWords(this.buffer, this.literalWordStartPosition, numWords); } /** * For internal use. (One could use the non-static discharge method instead, * but we expect them to be slower.) * * @param initialWord * the initial word * @param iterator * the iterator * @param container * the container */ protected static void discharge( final BufferedRunningLengthWord32 initialWord, final EWAHIterator32 iterator, final BitmapStorage32 container) { BufferedRunningLengthWord32 runningLengthWord = initialWord; for (;;) { final int runningLength = runningLengthWord.getRunningLength(); container.addStreamOfEmptyWords(runningLengthWord.getRunningBit(), runningLength); container.addStreamOfLiteralWords(iterator.buffer(), iterator.literalWords() + runningLengthWord.literalwordoffset, runningLengthWord.getNumberOfLiteralWords()); if (!iterator.hasNext()) break; runningLengthWord = new BufferedRunningLengthWord32(iterator.next()); } } @Override public IteratingBufferedRunningLengthWord32 clone() throws CloneNotSupportedException { IteratingBufferedRunningLengthWord32 answer = (IteratingBufferedRunningLengthWord32) super.clone(); answer.brlw = this.brlw.clone(); answer.buffer = this.buffer; answer.iterator = this.iterator.clone(); answer.literalWordStartPosition = this.literalWordStartPosition; return answer; } private BufferedRunningLengthWord32 brlw; private int[] buffer; private int literalWordStartPosition; private EWAHIterator32 iterator; } javaewah-JavaEWAH-0.7.9/src/main/java/com/googlecode/javaewah32/IteratingRLW32.java000066400000000000000000000017541224043567000274520ustar00rootroot00000000000000package com.googlecode.javaewah32; /* * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser * Licensed under the Apache License, Version 2.0. */ /** * High-level iterator over a compressed bitmap. * */ public interface IteratingRLW32 { /** * @return whether there is more */ public boolean next() ; /** * @param index where the literal word is * @return the literal word at the given index. */ public int getLiteralWordAt(int index); /** * @return the number of literal (non-fill) words */ public int getNumberOfLiteralWords() ; /** * @return the bit used for the fill bits */ public boolean getRunningBit() ; /** * @return sum of getRunningLength() and getNumberOfLiteralWords() */ public int size() ; /** * @return length of the run of fill words */ public int getRunningLength() ; /** * @param x the number of words to discard */ public void discardFirstWords(int x); } javaewah-JavaEWAH-0.7.9/src/main/java/com/googlecode/javaewah32/IteratorAggregation32.java000066400000000000000000000426331224043567000311010ustar00rootroot00000000000000package com.googlecode.javaewah32; import java.util.Arrays; import java.util.Iterator; import java.util.LinkedList; import com.googlecode.javaewah.CloneableIterator; /* * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser * Licensed under the Apache License, Version 2.0. */ /** * Set of helper functions to aggregate bitmaps. * */ public class IteratorAggregation32 { /** * @param x iterator to negate * @return negated version of the iterator */ public static IteratingRLW32 not(final IteratingRLW32 x) { return new IteratingRLW32() { @Override public boolean next() { return x.next(); } @Override public int getLiteralWordAt(int index) { return ~x.getLiteralWordAt(index); } @Override public int getNumberOfLiteralWords() { return x.getNumberOfLiteralWords(); } @Override public boolean getRunningBit() { return ! x.getRunningBit(); } @Override public int size() { return x.size(); } @Override public int getRunningLength() { return x.getRunningLength(); } @Override public void discardFirstWords(int y) { x.discardFirstWords(y); } }; } /** * Aggregate the iterators using a bitmap buffer. * * @param al iterators to aggregate * @return and aggregate */ public static IteratingRLW32 bufferedand(final IteratingRLW32... al) { return bufferedand (DEFAULTMAXBUFSIZE,al); } /** * Aggregate the iterators using a bitmap buffer. * * @param al iterators to aggregate * @param bufsize size of the internal buffer used by the iterator in 64-bit words * @return and aggregate */ public static IteratingRLW32 bufferedand(final int bufsize, final IteratingRLW32... al) { if (al.length == 0) throw new IllegalArgumentException("Need at least one iterator"); if (al.length == 1) return al[0]; final LinkedList basell = new LinkedList(); for (IteratingRLW32 i : al) basell.add(i); return new BufferedIterator32(new AndIt(basell,bufsize)); } /** * Aggregate the iterators using a bitmap buffer. * * @param al iterators to aggregate * @return or aggregate */ public static IteratingRLW32 bufferedor(final IteratingRLW32... al) { return bufferedor(DEFAULTMAXBUFSIZE,al); } /** * Aggregate the iterators using a bitmap buffer. * * @param al iterators to aggregate * @param bufsize size of the internal buffer used by the iterator in 64-bit words * @return or aggregate */ public static IteratingRLW32 bufferedor(final int bufsize, final IteratingRLW32... al) { if (al.length == 0) throw new IllegalArgumentException("Need at least one iterator"); if (al.length == 1) return al[0]; final LinkedList basell = new LinkedList(); for (IteratingRLW32 i : al) basell.add(i); return new BufferedIterator32(new ORIt(basell,bufsize)); } /** * Aggregate the iterators using a bitmap buffer. * * @param al iterators to aggregate * @return xor aggregate */ public static IteratingRLW32 bufferedxor(final IteratingRLW32... al) { return bufferedxor (DEFAULTMAXBUFSIZE,al); } /** * Aggregate the iterators using a bitmap buffer. * * @param al iterators to aggregate * @param bufsize size of the internal buffer used by the iterator in 64-bit words * @return xor aggregate */ public static IteratingRLW32 bufferedxor(final int bufsize, final IteratingRLW32... al) { if (al.length == 0) throw new IllegalArgumentException("Need at least one iterator"); if (al.length == 1) return al[0]; final LinkedList basell = new LinkedList(); for (IteratingRLW32 i : al) basell.add(i); return new BufferedIterator32(new XORIt(basell,bufsize)); } /** * Write out the content of the iterator, but as if it were all zeros. * * @param container * where we write * @param i * the iterator */ protected static void dischargeAsEmpty(final BitmapStorage32 container, final IteratingRLW32 i) { while (i.size() > 0) { container.addStreamOfEmptyWords(false, i.size()); i.next(); } } /** * Write out up to max words, returns how many were written * @param container target for writes * @param i source of data * @param max maximal number of writes * @return how many written */ protected static int discharge(final BitmapStorage32 container, IteratingRLW32 i, int max) { int counter = 0; while (i.size() > 0 && counter < max) { int L1 = i.getRunningLength(); if (L1 > 0) { if (L1 + counter > max) L1 = max - counter; container.addStreamOfEmptyWords(i.getRunningBit(), L1); counter += L1; } int L = i.getNumberOfLiteralWords(); if(L + counter > max) L = max - counter; for (int k = 0; k < L; ++k) { container.add(i.getLiteralWordAt(k)); } counter += L; i.discardFirstWords(L+L1); } return counter; } /** * Write out up to max negated words, returns how many were written * @param container target for writes * @param i source of data * @param max maximal number of writes * @return how many written */ protected static int dischargeNegated(final BitmapStorage32 container, IteratingRLW32 i, int max) { int counter = 0; while (i.size() > 0 && counter < max) { int L1 = i.getRunningLength(); if (L1 > 0) { if (L1 + counter > max) L1 = max - counter; container.addStreamOfEmptyWords(i.getRunningBit(), L1); counter += L1; } int L = i.getNumberOfLiteralWords(); if(L + counter > max) L = max - counter; for (int k = 0; k < L; ++k) { container.add(i.getLiteralWordAt(k)); } counter += L; i.discardFirstWords(L+L1); } return counter; } static void andToContainer(final BitmapStorage32 container, int desiredrlwcount, final IteratingRLW32 rlwi, IteratingRLW32 rlwj) { while ((rlwi.size()>0) && (rlwj.size()>0) && (desiredrlwcount-- >0) ) { while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) { final boolean i_is_prey = rlwi.getRunningLength() < rlwj .getRunningLength(); final IteratingRLW32 prey = i_is_prey ? rlwi : rlwj; final IteratingRLW32 predator = i_is_prey ? rlwj : rlwi; if (predator.getRunningBit() == false) { container.addStreamOfEmptyWords(false, predator.getRunningLength()); prey.discardFirstWords(predator.getRunningLength()); predator.discardFirstWords(predator.getRunningLength()); } else { final int index = discharge(container, prey, predator.getRunningLength()); container.addStreamOfEmptyWords(false, predator.getRunningLength() - index); predator.discardFirstWords(predator.getRunningLength()); } } final int nbre_literal = Math.min(rlwi.getNumberOfLiteralWords(), rlwj.getNumberOfLiteralWords()); if (nbre_literal > 0) { desiredrlwcount -= nbre_literal; for (int k = 0; k < nbre_literal; ++k) container.add(rlwi.getLiteralWordAt(k) & rlwj.getLiteralWordAt(k)); rlwi.discardFirstWords(nbre_literal); rlwj.discardFirstWords(nbre_literal); } } } static void andToContainer(final BitmapStorage32 container, final IteratingRLW32 rlwi, IteratingRLW32 rlwj) { while ((rlwi.size()>0) && (rlwj.size()>0) ) { while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) { final boolean i_is_prey = rlwi.getRunningLength() < rlwj .getRunningLength(); final IteratingRLW32 prey = i_is_prey ? rlwi : rlwj; final IteratingRLW32 predator = i_is_prey ? rlwj : rlwi; if (predator.getRunningBit() == false) { container.addStreamOfEmptyWords(false, predator.getRunningLength()); prey.discardFirstWords(predator.getRunningLength()); predator.discardFirstWords(predator.getRunningLength()); } else { final int index = discharge(container, prey, predator.getRunningLength()); container.addStreamOfEmptyWords(false, predator.getRunningLength() - index); predator.discardFirstWords(predator.getRunningLength()); } } final int nbre_literal = Math.min(rlwi.getNumberOfLiteralWords(), rlwj.getNumberOfLiteralWords()); if (nbre_literal > 0) { for (int k = 0; k < nbre_literal; ++k) container.add(rlwi.getLiteralWordAt(k) & rlwj.getLiteralWordAt(k)); rlwi.discardFirstWords(nbre_literal); rlwj.discardFirstWords(nbre_literal); } } } /** * Compute the first few words of the XOR aggregate between two iterators. * * @param container where to write * @param desiredrlwcount number of words to be written (max) * @param rlwi first iterator to aggregate * @param rlwj second iterator to aggregate */ public static void xorToContainer(final BitmapStorage32 container, int desiredrlwcount, final IteratingRLW32 rlwi, IteratingRLW32 rlwj) { while ((rlwi.size()>0) && (rlwj.size()>0) && (desiredrlwcount-- >0) ) { while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) { final boolean i_is_prey = rlwi.getRunningLength() < rlwj .getRunningLength(); final IteratingRLW32 prey = i_is_prey ? rlwi : rlwj; final IteratingRLW32 predator = i_is_prey ? rlwj : rlwi; if (predator.getRunningBit() == false) { int index = discharge(container, prey, predator.getRunningLength()); container.addStreamOfEmptyWords(false, predator.getRunningLength() - index); predator.discardFirstWords(predator.getRunningLength()); } else { int index = dischargeNegated(container, prey, predator.getRunningLength()); container.addStreamOfEmptyWords(true, predator.getRunningLength() - index); predator.discardFirstWords(predator.getRunningLength()); } } final int nbre_literal = Math.min(rlwi.getNumberOfLiteralWords(), rlwj.getNumberOfLiteralWords()); if (nbre_literal > 0) { desiredrlwcount -= nbre_literal; for (int k = 0; k < nbre_literal; ++k) container.add(rlwi.getLiteralWordAt(k) ^ rlwj.getLiteralWordAt(k)); rlwi.discardFirstWords(nbre_literal); rlwj.discardFirstWords(nbre_literal); } } } protected static int inplaceor(int[] bitmap, IteratingRLW32 i) { int pos = 0; int s; while ((s = i.size()) > 0) { if (pos + s < bitmap.length) { final int L = i.getRunningLength(); if (i.getRunningBit()) java.util.Arrays.fill(bitmap, pos, pos + L, ~0); pos += L; final int LR = i.getNumberOfLiteralWords(); for (int k = 0; k < LR; ++k) bitmap[pos++] |= i.getLiteralWordAt(k); if (!i.next()) { return pos; } } else { int howmany = bitmap.length - pos; int L = i.getRunningLength(); if (pos + L > bitmap.length) { if (i.getRunningBit()) { java.util.Arrays.fill(bitmap, pos, bitmap.length, ~0); } i.discardFirstWords(howmany); return bitmap.length; } if (i.getRunningBit()) java.util.Arrays.fill(bitmap, pos, pos + L, ~0); pos += L; for (int k = 0; pos < bitmap.length; ++k) bitmap[pos++] |= i.getLiteralWordAt(k); i.discardFirstWords(howmany); return pos; } } return pos; } protected static int inplacexor(int[] bitmap, IteratingRLW32 i) { int pos = 0; int s; while ((s = i.size()) > 0) { if (pos + s < bitmap.length) { final int L = i.getRunningLength(); if (i.getRunningBit()) { for(int k = pos ; k < pos + L; ++k) bitmap[k] = ~bitmap[k]; } pos += L; final int LR = i.getNumberOfLiteralWords(); for (int k = 0; k < LR; ++k) bitmap[pos++] ^= i.getLiteralWordAt(k); if (!i.next()) { return pos; } } else { int howmany = bitmap.length - pos; int L = i.getRunningLength(); if (pos + L > bitmap.length) { if (i.getRunningBit()) { for(int k = pos ; k < bitmap.length; ++k) bitmap[k] = ~bitmap[k]; } i.discardFirstWords(howmany); return bitmap.length; } if (i.getRunningBit()) for(int k = pos ; k < pos + L; ++k) bitmap[k] = ~bitmap[k]; pos += L; for (int k = 0; pos < bitmap.length; ++k) bitmap[pos++] ^= i.getLiteralWordAt(k); i.discardFirstWords(howmany); return pos; } } return pos; } protected static int inplaceand(int[] bitmap, IteratingRLW32 i) { int pos = 0; int s; while ((s = i.size()) > 0) { if (pos + s < bitmap.length) { final int L = i.getRunningLength(); if (!i.getRunningBit()) { for(int k = pos ; k < pos + L; ++k) bitmap[k] = 0; } pos += L; final int LR = i.getNumberOfLiteralWords(); for (int k = 0; k < LR; ++k) bitmap[pos++] &= i.getLiteralWordAt(k); if (!i.next()) { return pos; } } else { int howmany = bitmap.length - pos; int L = i.getRunningLength(); if (pos + L > bitmap.length) { if (!i.getRunningBit()) { for(int k = pos ; k < bitmap.length; ++k) bitmap[k] = 0; } i.discardFirstWords(howmany); return bitmap.length; } if (!i.getRunningBit()) for(int k = pos ; k < pos + L; ++k) bitmap[k] = 0; pos += L; for (int k = 0; pos < bitmap.length; ++k) bitmap[pos++] &= i.getLiteralWordAt(k); i.discardFirstWords(howmany); return pos; } } return pos; } /** * An optimization option. Larger values may improve speed, but at * the expense of memory. */ public final static int DEFAULTMAXBUFSIZE = 65536; } class ORIt implements CloneableIterator { EWAHCompressedBitmap32 buffer = new EWAHCompressedBitmap32(); int[] hardbitmap; LinkedList ll; ORIt(LinkedList basell, final int bufsize) { this.ll = basell; this.hardbitmap = new int[bufsize]; } @Override public XORIt clone() throws CloneNotSupportedException { XORIt answer = (XORIt) super.clone(); answer.buffer = this.buffer.clone(); answer.hardbitmap = this.hardbitmap.clone(); answer.ll = (LinkedList) this.ll.clone(); return answer; } @Override public boolean hasNext() { return !this.ll.isEmpty(); } @Override public EWAHIterator32 next() { this.buffer.clear(); int effective = 0; Iterator i = this.ll.iterator(); while (i.hasNext()) { IteratingRLW32 rlw = i.next(); if (rlw.size() > 0) { int eff = IteratorAggregation32.inplaceor(this.hardbitmap, rlw); if (eff > effective) effective = eff; } else i.remove(); } for (int k = 0; k < effective; ++k) this.buffer.add(this.hardbitmap[k]); Arrays.fill(this.hardbitmap, 0); return this.buffer.getEWAHIterator(); } } class XORIt implements CloneableIterator { EWAHCompressedBitmap32 buffer = new EWAHCompressedBitmap32(); int[] hardbitmap; LinkedList ll; XORIt(LinkedList basell, final int bufsize) { this.ll = basell; this.hardbitmap = new int[bufsize]; } @Override public XORIt clone() throws CloneNotSupportedException { XORIt answer = (XORIt) super.clone(); answer.buffer = this.buffer.clone(); answer.hardbitmap = this.hardbitmap.clone(); answer.ll = (LinkedList) this.ll.clone(); return answer; } @Override public boolean hasNext() { return !this.ll.isEmpty(); } @Override public EWAHIterator32 next() { this.buffer.clear(); int effective = 0; Iterator i = this.ll.iterator(); while (i.hasNext()) { IteratingRLW32 rlw = i.next(); if (rlw.size() > 0) { int eff = IteratorAggregation32.inplacexor(this.hardbitmap, rlw); if (eff > effective) effective = eff; } else i.remove(); } for (int k = 0; k < effective; ++k) this.buffer.add(this.hardbitmap[k]); Arrays.fill(this.hardbitmap, 0); return this.buffer.getEWAHIterator(); } } class AndIt implements CloneableIterator { EWAHCompressedBitmap32 buffer = new EWAHCompressedBitmap32(); LinkedList ll; int buffersize; public AndIt(LinkedList basell, final int bufsize) { this.ll = basell; this.buffersize = bufsize; } @Override public boolean hasNext() { return !this.ll.isEmpty(); } @Override public AndIt clone() throws CloneNotSupportedException { AndIt answer = (AndIt) super.clone(); answer.buffer = this.buffer.clone(); answer.ll = (LinkedList) this.ll.clone(); return answer; } @Override public EWAHIterator32 next() { this.buffer.clear(); IteratorAggregation32.andToContainer(this.buffer, this.buffersize * this.ll.size(), this.ll.get(0), this.ll.get(1)); if (this.ll.size() > 2) { Iterator i = this.ll.iterator(); i.next(); i.next(); EWAHCompressedBitmap32 tmpbuffer = new EWAHCompressedBitmap32(); while (i.hasNext() && this.buffer.sizeInBytes() > 0) { IteratorAggregation32.andToContainer(tmpbuffer, this.buffer.getIteratingRLW(), i.next()); this.buffer.swap(tmpbuffer); tmpbuffer.clear(); } } Iterator i = this.ll.iterator(); while(i.hasNext()) { if(i.next().size() == 0) { this.ll.clear(); break; } } return this.buffer.getEWAHIterator(); } }javaewah-JavaEWAH-0.7.9/src/main/java/com/googlecode/javaewah32/IteratorUtil32.java000066400000000000000000000067471224043567000275750ustar00rootroot00000000000000package com.googlecode.javaewah32; import java.util.Iterator; import com.googlecode.javaewah.IntIterator; /* * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser * Licensed under the Apache License, Version 2.0. */ /** * Convenience functions for working over iterators * */ public class IteratorUtil32 { /** * @param i iterator we wish to iterate over * @return an iterator over the set bits corresponding to the iterator */ public static IntIterator toSetBitsIntIterator(final IteratingRLW32 i) { return new IntIteratorOverIteratingRLW32(i); } /** * @param i iterator we wish to iterate over * @return an iterator over the set bits corresponding to the iterator */ public static Iterator toSetBitsIterator(final IteratingRLW32 i) { return new Iterator() { @Override public boolean hasNext() { return this.under.hasNext(); } @Override public Integer next() { return new Integer(this.under.next()); } @Override public void remove() { } final private IntIterator under = toSetBitsIntIterator(i); }; } /** * Turn an iterator into a bitmap * @param i iterator we wish to materialize * @param c where we write */ public static void materialize(final IteratingRLW32 i, final BitmapStorage32 c) { while (true) { if (i.getRunningLength() > 0) { c.addStreamOfEmptyWords(i.getRunningBit(), i.getRunningLength()); } for (int k = 0; k < i.getNumberOfLiteralWords(); ++k) c.add(i.getLiteralWordAt(k)); if (!i.next()) break; } } /** * @param i iterator we wish to iterate over * @return the cardinality (number of set bits) corresponding to the iterator */ public static int cardinality(final IteratingRLW32 i) { int answer = 0; while (true) { if(i.getRunningBit()) answer += i.getRunningLength() * EWAHCompressedBitmap32.wordinbits; for (int k = 0; k < i.getNumberOfLiteralWords(); ++k) answer += Long.bitCount(i.getLiteralWordAt(k)); if(!i.next()) break; } return answer; } /** * * @param x set of bitmaps we wish to iterate over * @return an array of iterators corresponding to the array of bitmaps */ public static IteratingRLW32[] toIterators(final EWAHCompressedBitmap32... x) { IteratingRLW32[] X = new IteratingRLW32[x.length]; for (int k = 0; k < X.length; ++k) { X[k] = new IteratingBufferedRunningLengthWord32(x[k]); } return X; } /** * Turn an iterator into a bitmap * * @param i iterator we wish to materialize * @param c where we write * @param Max maximum number of words to materialize * @return how many words were actually materialized */ public static long materialize(final IteratingRLW32 i, final BitmapStorage32 c, int Max) { final int origMax = Max; while (true) { if (i.getRunningLength() > 0) { int L = i.getRunningLength(); if(L > Max) L = Max; c.addStreamOfEmptyWords(i.getRunningBit(), L); Max -= L; } long L = i.getNumberOfLiteralWords(); for (int k = 0; k < L; ++k) c.add(i.getLiteralWordAt(k)); if(Max>0) { if (!i.next()) break; } else break; } return origMax - Max; } /** * Turn an iterator into a bitmap * * @param i iterator we wish to materialize * @return materialized version of the iterator */ public static EWAHCompressedBitmap32 materialize(final IteratingRLW32 i) { EWAHCompressedBitmap32 ewah = new EWAHCompressedBitmap32(); materialize(i, ewah); return ewah; } } javaewah-JavaEWAH-0.7.9/src/main/java/com/googlecode/javaewah32/NonEmptyVirtualStorage32.java000066400000000000000000000041551224043567000316020ustar00rootroot00000000000000package com.googlecode.javaewah32; /* * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser * Licensed under the Apache License, Version 2.0. */ /** * This is a BitmapStorage that can be used to determine quickly * if the result of an operation is non-trivial... that is, whether * there will be at least on set bit. * * @since 0.5.0 * @author Daniel Lemire and Veronika Zenz * */ public class NonEmptyVirtualStorage32 implements BitmapStorage32 { static class NonEmptyException extends RuntimeException { private static final long serialVersionUID = 1L; /** * Do not fill in the stack trace for this exception * for performance reasons. * * @return this instance * @see java.lang.Throwable#fillInStackTrace() */ @Override public synchronized Throwable fillInStackTrace() { return this; } } private static final NonEmptyException nonEmptyException = new NonEmptyException(); /** * If the word to be added is non-zero, a NonEmptyException exception is thrown. */ @Override public void add(int newdata) { if(newdata!=0) throw nonEmptyException; } /** * throws a NonEmptyException exception when number is greater than 0 * */ @Override public void addStreamOfLiteralWords(int[] data, int start, int number) { if (number > 0){ throw nonEmptyException; } } /** * If the boolean value is true and number is greater than 0, then it throws a NonEmptyException exception, * otherwise, nothing happens. * */ @Override public void addStreamOfEmptyWords(boolean v, int number) { if(v && (number>0)) throw nonEmptyException; } /** * throws a NonEmptyException exception when number is greater than 0 * */ @Override public void addStreamOfNegatedLiteralWords(int[] data, int start, int number) { if (number > 0){ throw nonEmptyException; } } /** * Does nothing. * * @see com.googlecode.javaewah.BitmapStorage#setSizeInBits(int) */ @Override public void setSizeInBits(int bits) { } } javaewah-JavaEWAH-0.7.9/src/main/java/com/googlecode/javaewah32/RunningLengthWord32.java000066400000000000000000000114421224043567000305500ustar00rootroot00000000000000package com.googlecode.javaewah32; /* * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser * Licensed under the Apache License, Version 2.0. */ /** * Mostly for internal use. * * @since 0.5.0 * @author Daniel Lemire */ public final class RunningLengthWord32 implements Cloneable { /** * Instantiates a new running length word. * * @param a * an array of 32-bit words * @param p * position in the array where the running length word is * located. */ RunningLengthWord32(final EWAHCompressedBitmap32 a, final int p) { this.parent = a; this.position = p; } /** * Gets the number of literal words. * * @return the number of literal words */ public int getNumberOfLiteralWords() { return (this.parent.buffer[this.position] >>> (1 + runninglengthbits)); } /** * Gets the running bit. * * @return the running bit */ public boolean getRunningBit() { return (this.parent.buffer[this.position] & 1) != 0; } /** * Gets the running length. * * @return the running length */ public int getRunningLength() { return (this.parent.buffer[this.position] >>> 1) & largestrunninglengthcount; } /** * Sets the number of literal words. * * @param number * the new number of literal words */ public void setNumberOfLiteralWords(final int number) { this.parent.buffer[this.position] |= notrunninglengthplusrunningbit; this.parent.buffer[this.position] &= (number << (runninglengthbits + 1)) | runninglengthplusrunningbit; } /** * Sets the running bit. * * @param b * the new running bit */ public void setRunningBit(final boolean b) { if (b) this.parent.buffer[this.position] |= 1; else this.parent.buffer[this.position] &= ~1; } /** * Sets the running length. * * @param number * the new running length */ public void setRunningLength(final int number) { this.parent.buffer[this.position] |= shiftedlargestrunninglengthcount; this.parent.buffer[this.position] &= (number << 1) | notshiftedlargestrunninglengthcount; } /** * Return the size in uncompressed words represented by this running * length word. * * @return the int */ public int size() { return getRunningLength() + getNumberOfLiteralWords(); } /* * @see java.lang.Object#toString() */ @Override public String toString() { return "running bit = " + getRunningBit() + " running length = " + getRunningLength() + " number of lit. words " + getNumberOfLiteralWords(); } @Override public RunningLengthWord32 clone() throws CloneNotSupportedException { RunningLengthWord32 answer; answer = (RunningLengthWord32) super.clone(); answer.parent = this.parent; answer.position = this.position; return answer; } /** The array of words. */ public EWAHCompressedBitmap32 parent; /** The position in array. */ public int position; /** * number of bits dedicated to marking of the running length of clean * words */ public static final int runninglengthbits = 16; private static final int literalbits = 32 - 1 - runninglengthbits; /** largest number of literal words in a run. */ public static final int largestliteralcount = (1 << literalbits) - 1; /** largest number of clean words in a run */ public static final int largestrunninglengthcount = (1 << runninglengthbits) - 1; private static final int runninglengthplusrunningbit = (1 << (runninglengthbits + 1)) - 1; private static final int shiftedlargestrunninglengthcount = largestrunninglengthcount << 1; private static final int notrunninglengthplusrunningbit = ~runninglengthplusrunningbit; private static final int notshiftedlargestrunninglengthcount = ~shiftedlargestrunninglengthcount; }javaewah-JavaEWAH-0.7.9/src/test/000077500000000000000000000000001224043567000164125ustar00rootroot00000000000000javaewah-JavaEWAH-0.7.9/src/test/java/000077500000000000000000000000001224043567000173335ustar00rootroot00000000000000javaewah-JavaEWAH-0.7.9/src/test/java/com/000077500000000000000000000000001224043567000201115ustar00rootroot00000000000000javaewah-JavaEWAH-0.7.9/src/test/java/com/googlecode/000077500000000000000000000000001224043567000222205ustar00rootroot00000000000000javaewah-JavaEWAH-0.7.9/src/test/java/com/googlecode/javaewah/000077500000000000000000000000001224043567000240065ustar00rootroot00000000000000javaewah-JavaEWAH-0.7.9/src/test/java/com/googlecode/javaewah/EWAHCompressedBitmapTest.java000066400000000000000000001276001224043567000314250ustar00rootroot00000000000000package com.googlecode.javaewah; /* * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser * Licensed under the Apache License, Version 2.0. */ import org.junit.Test; import java.util.*; import java.io.*; import junit.framework.Assert; /** * This class is used for basic unit testing. */ @SuppressWarnings("javadoc") public class EWAHCompressedBitmapTest { @Test public void testGet() { for (int gap = 29; gap < 10000; gap *= 10) { EWAHCompressedBitmap x = new EWAHCompressedBitmap(); for (int k = 0; k < 100; ++k) x.set(k * gap); for (int k = 0; k < 100 * gap; ++k) if (x.get(k)) { if (k % gap != 0) throw new RuntimeException( "spotted an extra set bit at " + k + " gap = " + gap); } else if (k % gap == 0) throw new RuntimeException( "missed a set bit " + k + " gap = " + gap); } } @SuppressWarnings({ "deprecation", "boxing" }) @Test public void OKaserBugReportJuly2013() { System.out.println("testing OKaserBugReportJuly2013"); int[][] data = { {}, { 5, 6, 7, 8, 9 }, { 1 }, { 2 }, { 2, 5, 7 }, { 1 }, { 2 }, { 1, 6, 9 }, { 1, 3, 4, 6, 8, 9 }, { 1, 3, 4, 6, 8, 9 }, { 1, 3, 6, 8, 9 }, { 2, 5, 7 }, { 2, 5, 7 }, { 1, 3, 9 }, { 3, 8, 9 } }; EWAHCompressedBitmap[] toBeOred = new EWAHCompressedBitmap[data.length]; Set bruteForceAnswer = new HashSet(); for (int i = 0; i < toBeOred.length; ++i) { toBeOred[i] = new EWAHCompressedBitmap(); for (int j : data[i]) { toBeOred[i].set(j); bruteForceAnswer.add(j); } toBeOred[i].setSizeInBits(1000,false); } long rightcard = bruteForceAnswer.size(); EWAHCompressedBitmap e1 = FastAggregation.or(toBeOred); Assert.assertEquals(rightcard, e1.cardinality()); EWAHCompressedBitmap e2 = FastAggregation.bufferedor(65536, toBeOred); Assert.assertEquals(rightcard, e2.cardinality()); EWAHCompressedBitmap foo = new EWAHCompressedBitmap(); FastAggregation.legacy_orWithContainer(foo, toBeOred); Assert.assertEquals(rightcard, foo.cardinality()); } @Test public void testSizeInBitsWithAnd() { System.out.println("testing SizeInBitsWithAnd"); EWAHCompressedBitmap a = new EWAHCompressedBitmap(); EWAHCompressedBitmap b = new EWAHCompressedBitmap(); a.set(1); a.set(2); a.set(3); b.set(3); b.set(4); b.set(5); a.setSizeInBits(10,false); b.setSizeInBits(10,false); EWAHCompressedBitmap and = a.and(b); Assert.assertEquals(10, and.sizeInBits()); EWAHCompressedBitmap and2 = EWAHCompressedBitmap.and(a,b); Assert.assertEquals(10, and2.sizeInBits()); } @Test public void testSizeInBitsWithAndNot() { System.out.println("testing SizeInBitsWithAndNot"); EWAHCompressedBitmap a = new EWAHCompressedBitmap(); EWAHCompressedBitmap b = new EWAHCompressedBitmap(); a.set(1); a.set(2); a.set(3); b.set(3); b.set(4); b.set(5); a.setSizeInBits(10,false); b.setSizeInBits(10,false); EWAHCompressedBitmap and = a.andNot(b); Assert.assertEquals(10, and.sizeInBits()); } @Test public void testSizeInBitsWithOr() { System.out.println("testing SizeInBitsWithOr"); EWAHCompressedBitmap a = new EWAHCompressedBitmap(); EWAHCompressedBitmap b = new EWAHCompressedBitmap(); a.set(1); a.set(2); a.set(3); b.set(3); b.set(4); b.set(5); a.setSizeInBits(10,false); b.setSizeInBits(10,false); EWAHCompressedBitmap or = a.or(b); Assert.assertEquals(10, or.sizeInBits()); EWAHCompressedBitmap or2 = EWAHCompressedBitmap.or(a,b); Assert.assertEquals(10, or2.sizeInBits()); } @Test public void testSizeInBitsWithXor() { System.out.println("testing SizeInBitsWithXor"); EWAHCompressedBitmap a = new EWAHCompressedBitmap(); EWAHCompressedBitmap b = new EWAHCompressedBitmap(); a.set(1); a.set(2); a.set(3); b.set(3); b.set(4); b.set(5); a.setSizeInBits(10,false); b.setSizeInBits(10,false); EWAHCompressedBitmap xor = a.xor(b); Assert.assertEquals(10, xor.sizeInBits()); EWAHCompressedBitmap xor2 = EWAHCompressedBitmap.xor(a,b); Assert.assertEquals(10, xor2.sizeInBits()); } @Test public void testDebugSetSizeInBitsTest() { System.out.println("testing DebugSetSizeInBits"); EWAHCompressedBitmap b = new EWAHCompressedBitmap(); b.set(4); b.setSizeInBits(6, true); List positions = b.getPositions(); Assert.assertEquals(2, positions.size()); Assert.assertEquals(Integer.valueOf(4), positions.get(0)); Assert.assertEquals(Integer.valueOf(5), positions.get(1)); Iterator iterator = b.iterator(); Assert.assertTrue(iterator.hasNext()); Assert.assertEquals(Integer.valueOf(4), iterator.next()); Assert.assertTrue(iterator.hasNext()); Assert.assertEquals(Integer.valueOf(5), iterator.next()); Assert.assertFalse(iterator.hasNext()); IntIterator intIterator = b.intIterator(); Assert.assertTrue(intIterator.hasNext()); Assert.assertEquals(4, intIterator.next()); Assert.assertTrue(intIterator.hasNext()); Assert.assertEquals(5, intIterator.next()); Assert.assertFalse(intIterator.hasNext()); } /** * Created: 2/4/11 6:03 PM By: Arnon Moscona. */ @Test public void EwahIteratorProblem() { System.out.println("testing ArnonMoscona"); EWAHCompressedBitmap bitmap = new EWAHCompressedBitmap(); for (int i = 9434560; i <= 9435159; i++) { bitmap.set(i); } IntIterator iterator = bitmap.intIterator(); List v = bitmap.getPositions(); int[] array = bitmap.toArray(); for (int k = 0; k < v.size(); ++k) { Assert.assertTrue(array[k] == v.get(k).intValue()); Assert.assertTrue(iterator.hasNext()); final int ival = iterator.next(); final int vval = v.get(k).intValue(); Assert.assertTrue(ival == vval); } Assert.assertTrue(!iterator.hasNext()); // for (int k = 2; k <= 1024; k *= 2) { int[] bitsToSet = createSortedIntArrayOfBitsToSet(k, 434455 + 5 * k); EWAHCompressedBitmap ewah = new EWAHCompressedBitmap(); for (int i : bitsToSet) { ewah.set(i); } equal(ewah.iterator(), bitsToSet); } } /** * Test submitted by Gregory Ssi-Yan-Kai */ @Test public void SsiYanKaiTest() { System.out.println("testing SsiYanKaiTest"); EWAHCompressedBitmap a = EWAHCompressedBitmap.bitmapOf(39935, 39936, 39937, 39938, 39939, 39940, 39941, 39942, 39943, 39944, 39945, 39946, 39947, 39948, 39949, 39950, 39951, 39952, 39953, 39954, 39955, 39956, 39957, 39958, 39959, 39960, 39961, 39962, 39963, 39964, 39965, 39966, 39967, 39968, 39969, 39970, 39971, 39972, 39973, 39974, 39975, 39976, 39977, 39978, 39979, 39980, 39981, 39982, 39983, 39984, 39985, 39986, 39987, 39988, 39989, 39990, 39991, 39992, 39993, 39994, 39995, 39996, 39997, 39998, 39999, 40000, 40001, 40002, 40003, 40004, 40005, 40006, 40007, 40008, 40009, 40010, 40011, 40012, 40013, 40014, 40015, 40016, 40017, 40018, 40019, 40020, 40021, 40022, 40023, 40024, 40025, 40026, 40027, 40028, 40029, 40030, 40031, 40032, 40033, 40034, 40035, 40036, 40037, 40038, 40039, 40040, 40041, 40042, 40043, 40044, 40045, 40046, 40047, 40048, 40049, 40050, 40051, 40052, 40053, 40054, 40055, 40056, 40057, 40058, 40059, 40060, 40061, 40062, 40063, 40064, 40065, 40066, 40067, 40068, 40069, 40070, 40071, 40072, 40073, 40074, 40075, 40076, 40077, 40078, 40079, 40080, 40081, 40082, 40083, 40084, 40085, 40086, 40087, 40088, 40089, 40090, 40091, 40092, 40093, 40094, 40095, 40096, 40097, 40098, 40099, 40100); EWAHCompressedBitmap b = EWAHCompressedBitmap.bitmapOf(39935, 39936, 39937, 39938, 39939, 39940, 39941, 39942, 39943, 39944, 39945, 39946, 39947, 39948, 39949, 39950, 39951, 39952, 39953, 39954, 39955, 39956, 39957, 39958, 39959, 39960, 39961, 39962, 39963, 39964, 39965, 39966, 39967, 39968, 39969, 39970, 39971, 39972, 39973, 39974, 39975, 39976, 39977, 39978, 39979, 39980, 39981, 39982, 39983, 39984, 39985, 39986, 39987, 39988, 39989, 39990, 39991, 39992, 39993, 39994, 39995, 39996, 39997, 39998, 39999, 270000); LinkedHashSet aPositions = new LinkedHashSet( a.getPositions()); int intersection = 0; EWAHCompressedBitmap inter = new EWAHCompressedBitmap(); LinkedHashSet bPositions = new LinkedHashSet( b.getPositions()); for (Integer integer : bPositions) { if (aPositions.contains(integer)) { inter.set(integer.intValue()); ++intersection; } } EWAHCompressedBitmap and2 = a.and(b); if (!and2.equals(inter)) throw new RuntimeException("intersections don't match"); if (intersection != and2.cardinality()) throw new RuntimeException("cardinalities don't match"); } /** * Test inspired by William Habermaas. */ @Test public void habermaasTest() { System.out.println("testing habermaasTest"); BitSet bitsetaa = new BitSet(); EWAHCompressedBitmap aa = new EWAHCompressedBitmap(); int[] val = { 55400, 1000000, 1000128 }; for (int k = 0; k < val.length; ++k) { aa.set(val[k]); bitsetaa.set(val[k]); } equal(aa, bitsetaa); BitSet bitsetab = new BitSet(); EWAHCompressedBitmap ab = new EWAHCompressedBitmap(); for (int i = 4096; i < (4096 + 5); i++) { ab.set(i); bitsetab.set(i); } ab.set(99000); bitsetab.set(99000); ab.set(1000130); bitsetab.set(1000130); equal(ab, bitsetab); EWAHCompressedBitmap bb = aa.or(ab); EWAHCompressedBitmap bbAnd = aa.and(ab); try { EWAHCompressedBitmap abnot = ab.clone(); abnot.not(); EWAHCompressedBitmap bbAnd2 = aa.andNot(abnot); assertEquals(bbAnd2, bbAnd); } catch (CloneNotSupportedException e) { e.printStackTrace(); } BitSet bitsetbb = (BitSet) bitsetaa.clone(); bitsetbb.or(bitsetab); BitSet bitsetbbAnd = (BitSet) bitsetaa.clone(); bitsetbbAnd.and(bitsetab); equal(bbAnd, bitsetbbAnd); equal(bb, bitsetbb); } @Test public void testAndResultAppend() { System.out.println("testing AndResultAppend"); EWAHCompressedBitmap bitmap1 = new EWAHCompressedBitmap(); bitmap1.set(35); EWAHCompressedBitmap bitmap2 = new EWAHCompressedBitmap(); bitmap2.set(35); bitmap2.set(130); EWAHCompressedBitmap resultBitmap = bitmap1.and(bitmap2); resultBitmap.set(131); bitmap1.set(131); assertEquals(bitmap1, resultBitmap); } /** * Test cardinality. */ @Test public void testCardinality() { System.out.println("testing EWAH cardinality"); EWAHCompressedBitmap bitmap = new EWAHCompressedBitmap(); bitmap.set(Integer.MAX_VALUE - 64); // System.out.format("Total Items %d\n", bitmap.cardinality()); Assert.assertTrue(bitmap.cardinality() == 1); } /** * Test clear function */ @Test public void testClear() { System.out.println("testing Clear"); EWAHCompressedBitmap bitmap = new EWAHCompressedBitmap(); bitmap.set(5); bitmap.clear(); bitmap.set(7); Assert.assertTrue(1 == bitmap.cardinality()); Assert.assertTrue(1 == bitmap.getPositions().size()); Assert.assertTrue(1 == bitmap.toArray().length); Assert.assertTrue(7 == bitmap.getPositions().get(0).intValue()); Assert.assertTrue(7 == bitmap.toArray()[0]); bitmap.clear(); bitmap.set(5000); Assert.assertTrue(1 == bitmap.cardinality()); Assert.assertTrue(1 == bitmap.getPositions().size()); Assert.assertTrue(1 == bitmap.toArray().length); Assert.assertTrue(5000 == bitmap.getPositions().get(0).intValue()); bitmap.set(5001); bitmap.set(5005); bitmap.set(5100); bitmap.set(5500); bitmap.clear(); bitmap.set(5); bitmap.set(7); bitmap.set(1000); bitmap.set(1001); Assert.assertTrue(4 == bitmap.cardinality()); List positions = bitmap.getPositions(); Assert.assertTrue(4 == positions.size()); Assert.assertTrue(5 == positions.get(0).intValue()); Assert.assertTrue(7 == positions.get(1).intValue()); Assert.assertTrue(1000 == positions.get(2).intValue()); Assert.assertTrue(1001 == positions.get(3).intValue()); } /** * Test ewah compressed bitmap. */ @Test public void testEWAHCompressedBitmap() { System.out.println("testing EWAH"); long zero = 0; long specialval = 1l | (1l << 4) | (1l << 63); long notzero = ~zero; EWAHCompressedBitmap myarray1 = new EWAHCompressedBitmap(); myarray1.add(zero); myarray1.add(zero); myarray1.add(zero); myarray1.add(specialval); myarray1.add(specialval); myarray1.add(notzero); myarray1.add(zero); Assert.assertEquals(myarray1.getPositions().size(), 6 + 64); EWAHCompressedBitmap myarray2 = new EWAHCompressedBitmap(); myarray2.add(zero); myarray2.add(specialval); myarray2.add(specialval); myarray2.add(notzero); myarray2.add(zero); myarray2.add(zero); myarray2.add(zero); Assert.assertEquals(myarray2.getPositions().size(), 6 + 64); List data1 = myarray1.getPositions(); List data2 = myarray2.getPositions(); Vector logicalor = new Vector(); { HashSet tmp = new HashSet(); tmp.addAll(data1); tmp.addAll(data2); logicalor.addAll(tmp); } Collections.sort(logicalor); Vector logicaland = new Vector(); logicaland.addAll(data1); logicaland.retainAll(data2); Collections.sort(logicaland); EWAHCompressedBitmap arrayand = myarray1.and(myarray2); Assert.assertTrue(arrayand.getPositions().equals(logicaland)); EWAHCompressedBitmap arrayor = myarray1.or(myarray2); Assert.assertTrue(arrayor.getPositions().equals(logicalor)); EWAHCompressedBitmap arrayandbis = myarray2.and(myarray1); Assert.assertTrue(arrayandbis.getPositions().equals(logicaland)); EWAHCompressedBitmap arrayorbis = myarray2.or(myarray1); Assert.assertTrue(arrayorbis.getPositions().equals(logicalor)); EWAHCompressedBitmap x = new EWAHCompressedBitmap(); for (Integer i : myarray1.getPositions()) { x.set(i.intValue()); } Assert.assertTrue(x.getPositions().equals(myarray1.getPositions())); x = new EWAHCompressedBitmap(); for (Integer i : myarray2.getPositions()) { x.set(i.intValue()); } Assert.assertTrue(x.getPositions().equals(myarray2.getPositions())); x = new EWAHCompressedBitmap(); for (Iterator k = myarray1.iterator(); k.hasNext();) { x.set(extracted(k).intValue()); } Assert.assertTrue(x.getPositions().equals(myarray1.getPositions())); x = new EWAHCompressedBitmap(); for (Iterator k = myarray2.iterator(); k.hasNext();) { x.set(extracted(k).intValue()); } Assert.assertTrue(x.getPositions().equals(myarray2.getPositions())); } /** * Test externalization. * * @throws IOException * Signals that an I/O exception has occurred. */ @Test public void testExternalization() throws IOException { System.out.println("testing EWAH externalization"); EWAHCompressedBitmap ewcb = new EWAHCompressedBitmap(); int[] val = { 5, 4400, 44600, 55400, 1000000 }; for (int k = 0; k < val.length; ++k) { ewcb.set(val[k]); } ByteArrayOutputStream bos = new ByteArrayOutputStream(); ObjectOutputStream oo = new ObjectOutputStream(bos); ewcb.writeExternal(oo); oo.close(); ewcb = null; ewcb = new EWAHCompressedBitmap(); ByteArrayInputStream bis = new ByteArrayInputStream(bos.toByteArray()); ewcb.readExternal(new ObjectInputStream(bis)); List result = ewcb.getPositions(); Assert.assertTrue(val.length == result.size()); for (int k = 0; k < val.length; ++k) { Assert.assertTrue(result.get(k).intValue() == val[k]); } } @Test public void testExtremeRange() { System.out.println("testing EWAH at its extreme range"); int N = 1024; EWAHCompressedBitmap myarray1 = new EWAHCompressedBitmap(); for (int i = 0; i < N; ++i) { myarray1.set(Integer.MAX_VALUE - 64 - N + i); Assert.assertTrue(myarray1.cardinality() == i + 1); int[] val = myarray1.toArray(); Assert.assertTrue(val[0] == Integer.MAX_VALUE - 64 - N); } } /** * Test the intersects method */ @Test public void testIntersectsMethod() { System.out.println("testing Intersets Bug"); EWAHCompressedBitmap bitmap = new EWAHCompressedBitmap(); bitmap.set(1); EWAHCompressedBitmap bitmap2 = new EWAHCompressedBitmap(); bitmap2.set(1); bitmap2.set(11); bitmap2.set(111); bitmap2.set(1111111); bitmap2.set(11111111); Assert.assertTrue(bitmap.intersects(bitmap2)); Assert.assertTrue(bitmap2.intersects(bitmap)); EWAHCompressedBitmap bitmap3 = new EWAHCompressedBitmap(); bitmap3.set(101); EWAHCompressedBitmap bitmap4 = new EWAHCompressedBitmap(); for (int i = 0; i < 100; i++) { bitmap4.set(i); } Assert.assertFalse(bitmap3.intersects(bitmap4)); Assert.assertFalse(bitmap4.intersects(bitmap3)); EWAHCompressedBitmap bitmap5 = new EWAHCompressedBitmap(); bitmap5.set(0); bitmap5.set(10); bitmap5.set(20); EWAHCompressedBitmap bitmap6 = new EWAHCompressedBitmap(); bitmap6.set(1); bitmap6.set(11); bitmap6.set(21); bitmap6.set(1111111); bitmap6.set(11111111); Assert.assertFalse(bitmap5.intersects(bitmap6)); Assert.assertFalse(bitmap6.intersects(bitmap5)); bitmap5.set(21); Assert.assertTrue(bitmap5.intersects(bitmap6)); Assert.assertTrue(bitmap6.intersects(bitmap5)); EWAHCompressedBitmap bitmap7 = new EWAHCompressedBitmap(); bitmap7.set(1); bitmap7.set(10); bitmap7.set(20); bitmap7.set(1111111); bitmap7.set(11111111); EWAHCompressedBitmap bitmap8 = new EWAHCompressedBitmap(); for (int i = 0; i < 1000; i++) { if (i != 1 && i != 10 && i != 20) { bitmap8.set(i); } } Assert.assertFalse(bitmap7.intersects(bitmap8)); Assert.assertFalse(bitmap8.intersects(bitmap7)); } /** * as per renaud.delbru, Feb 12, 2009 this might throw an error out of bound * exception. */ @Test public void testLargeEWAHCompressedBitmap() { System.out.println("testing EWAH over a large array"); EWAHCompressedBitmap myarray1 = new EWAHCompressedBitmap(); int N = 11000000; for (int i = 0; i < N; ++i) { myarray1.set(i); } Assert.assertTrue(myarray1.sizeInBits() == N); } /** * Test massive and. */ @Test public void testMassiveAnd() { System.out.println("testing massive logical and"); EWAHCompressedBitmap[] ewah = new EWAHCompressedBitmap[1024]; for (int k = 0; k < ewah.length; ++k) ewah[k] = new EWAHCompressedBitmap(); for (int k = 0; k < 30000; ++k) { ewah[(k + 2 * k * k) % ewah.length].set(k); } EWAHCompressedBitmap answer = ewah[0]; for (int k = 1; k < ewah.length; ++k) answer = answer.and(ewah[k]); // result should be empty if (answer.getPositions().size() != 0) System.out.println(answer.toDebugString()); Assert.assertTrue(answer.getPositions().size() == 0); Assert.assertTrue(EWAHCompressedBitmap.and(ewah).getPositions().size() == 0); } /** * Test massive and not. */ @Test public void testMassiveAndNot() { System.out.println("testing massive and not"); final int N = 1024; EWAHCompressedBitmap[] ewah = new EWAHCompressedBitmap[N]; for (int k = 0; k < ewah.length; ++k) ewah[k] = new EWAHCompressedBitmap(); for (int k = 0; k < 30000; ++k) { ewah[(k + 2 * k * k) % ewah.length].set(k); } EWAHCompressedBitmap answer = ewah[0]; EWAHCompressedBitmap answer2 = ewah[0]; for (int k = 1; k < ewah.length; ++k) { answer = answer.andNot(ewah[k]); EWAHCompressedBitmap copy = null; try { copy = ewah[k].clone(); copy.not(); answer2.and(copy); assertEqualsPositions(answer, answer2); } catch (CloneNotSupportedException e) { e.printStackTrace(); } } } /** * Test massive or. */ @Test public void testMassiveOr() { System.out .println("testing massive logical or (can take a couple of minutes)"); final int N = 128; for (int howmany = 512; howmany <= 10000; howmany *= 2) { EWAHCompressedBitmap[] ewah = new EWAHCompressedBitmap[N]; BitSet[] bset = new BitSet[N]; for (int k = 0; k < ewah.length; ++k) ewah[k] = new EWAHCompressedBitmap(); for (int k = 0; k < bset.length; ++k) bset[k] = new BitSet(); for (int k = 0; k < N; ++k) assertEqualsPositions(bset[k], ewah[k]); for (int k = 0; k < howmany; ++k) { ewah[(k + 2 * k * k) % ewah.length].set(k); bset[(k + 2 * k * k) % ewah.length].set(k); } for (int k = 0; k < N; ++k) assertEqualsPositions(bset[k], ewah[k]); EWAHCompressedBitmap answer = ewah[0]; BitSet bitsetanswer = bset[0]; for (int k = 1; k < ewah.length; ++k) { EWAHCompressedBitmap tmp = answer.or(ewah[k]); bitsetanswer.or(bset[k]); answer = tmp; assertEqualsPositions(bitsetanswer, answer); } assertEqualsPositions(bitsetanswer, answer); assertEqualsPositions(bitsetanswer, EWAHCompressedBitmap.or(ewah)); int k = 0; for (int j : answer) { if (k != j) System.out.println(answer.toDebugString()); Assert.assertEquals(k, j); k += 1; } } } @Test public void testsetSizeInBits() { System.out.println("testing setSizeInBits"); for (int k = 0; k < 4096; ++k) { EWAHCompressedBitmap ewah = new EWAHCompressedBitmap(); ewah.setSizeInBits(k,false); Assert.assertEquals(ewah.sizeinbits, k); Assert.assertEquals(ewah.cardinality(), 0); EWAHCompressedBitmap ewah2 = new EWAHCompressedBitmap(); ewah2.setSizeInBits(k, false); Assert.assertEquals(ewah2.sizeinbits, k); Assert.assertEquals(ewah2.cardinality(), 0); EWAHCompressedBitmap ewah3 = new EWAHCompressedBitmap(); for (int i = 0; i < k; ++i) { ewah3.set(i); } Assert.assertEquals(ewah3.sizeinbits, k); Assert.assertEquals(ewah3.cardinality(), k); EWAHCompressedBitmap ewah4 = new EWAHCompressedBitmap(); ewah4.setSizeInBits(k, true); Assert.assertEquals(ewah4.sizeinbits, k); Assert.assertEquals(ewah4.cardinality(), k); } } /** * Test massive xor. */ @Test public void testMassiveXOR() { System.out .println("testing massive xor (can take a couple of minutes)"); final int N = 16; EWAHCompressedBitmap[] ewah = new EWAHCompressedBitmap[N]; BitSet[] bset = new BitSet[N]; for (int k = 0; k < ewah.length; ++k) ewah[k] = new EWAHCompressedBitmap(); for (int k = 0; k < bset.length; ++k) bset[k] = new BitSet(); for (int k = 0; k < 30000; ++k) { ewah[(k + 2 * k * k) % ewah.length].set(k); bset[(k + 2 * k * k) % ewah.length].set(k); } EWAHCompressedBitmap answer = ewah[0]; BitSet bitsetanswer = bset[0]; for (int k = 1; k < ewah.length; ++k) { answer = answer.xor(ewah[k]); bitsetanswer.xor(bset[k]); assertEqualsPositions(bitsetanswer, answer); } int k = 0; for (int j : answer) { if (k != j) System.out.println(answer.toDebugString()); Assert.assertEquals(k, j); k += 1; } } @Test public void testMultiAnd() { System.out.println("testing MultiAnd"); // test bitmap3 has a literal word while bitmap1/2 have a run of 1 EWAHCompressedBitmap bitmap1 = new EWAHCompressedBitmap(); bitmap1.addStreamOfEmptyWords(true, 1000); EWAHCompressedBitmap bitmap2 = new EWAHCompressedBitmap(); bitmap2.addStreamOfEmptyWords(true, 2000); EWAHCompressedBitmap bitmap3 = new EWAHCompressedBitmap(); bitmap3.set(500); bitmap3.set(502); bitmap3.set(504); assertAndEquals(bitmap1, bitmap2, bitmap3); // equal bitmap1 = new EWAHCompressedBitmap(); bitmap1.set(35); bitmap2 = new EWAHCompressedBitmap(); bitmap2.set(35); bitmap3 = new EWAHCompressedBitmap(); bitmap3.set(35); assertAndEquals(bitmap1, bitmap2, bitmap3); // same number of words for each bitmap3.set(63); assertAndEquals(bitmap1, bitmap2, bitmap3); // one word bigger bitmap3.set(64); assertAndEquals(bitmap1, bitmap2, bitmap3); // two words bigger bitmap3.set(130); assertAndEquals(bitmap1, bitmap2, bitmap3); // test that result can still be appended to EWAHCompressedBitmap resultBitmap = EWAHCompressedBitmap.and(bitmap1, bitmap2, bitmap3); resultBitmap.set(131); bitmap1.set(131); assertEquals(bitmap1, resultBitmap); final int N = 128; for (int howmany = 512; howmany <= 10000; howmany *= 2) { EWAHCompressedBitmap[] ewah = new EWAHCompressedBitmap[N]; for (int k = 0; k < ewah.length; ++k) ewah[k] = new EWAHCompressedBitmap(); for (int k = 0; k < howmany; ++k) { ewah[(k + 2 * k * k) % ewah.length].set(k); } for (int k = 1; k <= ewah.length; ++k) { EWAHCompressedBitmap[] shortewah = new EWAHCompressedBitmap[k]; for (int i = 0; i < k; ++i) shortewah[i] = ewah[i]; assertAndEquals(shortewah); } } } @Test public void testMultiOr() { System.out.println("testing MultiOr"); // test bitmap3 has a literal word while bitmap1/2 have a run of 0 EWAHCompressedBitmap bitmap1 = new EWAHCompressedBitmap(); bitmap1.set(1000); EWAHCompressedBitmap bitmap2 = new EWAHCompressedBitmap(); bitmap2.set(2000); EWAHCompressedBitmap bitmap3 = new EWAHCompressedBitmap(); bitmap3.set(500); bitmap3.set(502); bitmap3.set(504); EWAHCompressedBitmap expected = bitmap1.or(bitmap2).or(bitmap3); assertEquals(expected, EWAHCompressedBitmap.or(bitmap1, bitmap2, bitmap3)); final int N = 128; for (int howmany = 512; howmany <= 10000; howmany *= 2) { EWAHCompressedBitmap[] ewah = new EWAHCompressedBitmap[N]; for (int k = 0; k < ewah.length; ++k) ewah[k] = new EWAHCompressedBitmap(); for (int k = 0; k < howmany; ++k) { ewah[(k + 2 * k * k) % ewah.length].set(k); } for (int k = 1; k <= ewah.length; ++k) { EWAHCompressedBitmap[] shortewah = new EWAHCompressedBitmap[k]; for (int i = 0; i < k; ++i) shortewah[i] = ewah[i]; assertOrEquals(shortewah); } } } /** * Test not. (Based on an idea by Ciaran Jessup) */ @Test public void testNot() { System.out.println("testing not"); EWAHCompressedBitmap ewah = new EWAHCompressedBitmap(); for (int i = 0; i <= 184; ++i) { ewah.set(i); } Assert.assertEquals(ewah.cardinality(), 185); ewah.not(); Assert.assertEquals(ewah.cardinality(), 0); } @Test public void testOrCardinality() { System.out.println("testing Or Cardinality"); for (int N = 0; N < 1024; ++N) { EWAHCompressedBitmap bitmap = new EWAHCompressedBitmap(); for (int i = 0; i < N; i++) { bitmap.set(i); } bitmap.set(1025); bitmap.set(1026); Assert.assertEquals(N + 2, bitmap.cardinality()); EWAHCompressedBitmap orbitmap = bitmap.or(bitmap); assertEquals(orbitmap, bitmap); Assert.assertEquals(N + 2, orbitmap.cardinality()); Assert.assertEquals(N + 2, bitmap.orCardinality(new EWAHCompressedBitmap())); } } /** * Test sets and gets. */ @Test public void testSetGet() { System.out.println("testing EWAH set/get"); EWAHCompressedBitmap ewcb = new EWAHCompressedBitmap(); int[] val = { 5, 4400, 44600, 55400, 1000000 }; for (int k = 0; k < val.length; ++k) { ewcb.set(val[k]); } List result = ewcb.getPositions(); Assert.assertTrue(val.length == result.size()); for (int k = 0; k < val.length; ++k) { Assert.assertEquals(result.get(k).intValue(), val[k]); } } @Test public void testHashCode() { System.out.println("testing hashCode"); EWAHCompressedBitmap ewcb = EWAHCompressedBitmap.bitmapOf(50, 70).and( EWAHCompressedBitmap.bitmapOf(50, 1000)); Assert.assertEquals(EWAHCompressedBitmap.bitmapOf(50), ewcb); Assert.assertEquals(EWAHCompressedBitmap.bitmapOf(50).hashCode(), ewcb.hashCode()); } @Test public void testSetSizeInBits() { System.out.println("testing SetSizeInBits"); testSetSizeInBits(130, 131); testSetSizeInBits(63, 64); testSetSizeInBits(64, 65); testSetSizeInBits(64, 128); testSetSizeInBits(35, 131); testSetSizeInBits(130, 400); testSetSizeInBits(130, 191); testSetSizeInBits(130, 192); EWAHCompressedBitmap bitmap = new EWAHCompressedBitmap(); bitmap.set(31); bitmap.setSizeInBits(130, false); bitmap.set(131); BitSet jdkBitmap = new BitSet(); jdkBitmap.set(31); jdkBitmap.set(131); assertEquals(jdkBitmap, bitmap); } /** * Test with parameters. * * @throws IOException * Signals that an I/O exception has occurred. */ @Test public void testWithParameters() throws IOException { System.out .println("These tests can run for several minutes. Please be patient."); for (int k = 2; k < 1 << 24; k *= 8) shouldSetBits(k); PolizziTest(64); PolizziTest(128); PolizziTest(256); PolizziTest(2048); System.out.println("Your code is probably ok."); } /** * Pseudo-non-deterministic test inspired by S.J.vanSchaik. (Yes, * non-deterministic tests are bad, but the test is actually deterministic.) */ @Test public void vanSchaikTest() { System.out.println("testing vanSchaikTest (this takes some time)"); final int totalNumBits = 32768; final double odds = 0.9; Random rand = new Random(323232323); for (int t = 0; t < 100; t++) { int numBitsSet = 0; EWAHCompressedBitmap cBitMap = new EWAHCompressedBitmap(); for (int i = 0; i < totalNumBits; i++) { if (rand.nextDouble() < odds) { cBitMap.set(i); numBitsSet++; } } Assert.assertEquals(cBitMap.cardinality(), numBitsSet); } } /** * Function used in a test inspired by Federico Fissore. * * @param size * the number of set bits * @param seed * the random seed * @return the pseudo-random array int[] */ public static int[] createSortedIntArrayOfBitsToSet(int size, int seed) { Random random = new Random(seed); // build raw int array int[] bits = new int[size]; for (int i = 0; i < bits.length; i++) { bits[i] = random.nextInt(TEST_BS_SIZE); } // might generate duplicates Arrays.sort(bits); // first count how many distinct values int counter = 0; int oldx = -1; for (int x : bits) { if (x != oldx) ++counter; oldx = x; } // then construct new array int[] answer = new int[counter]; counter = 0; oldx = -1; for (int x : bits) { if (x != oldx) { answer[counter] = x; ++counter; } oldx = x; } return answer; } /** * Test inspired by Bilal Tayara */ @Test public void TayaraTest() { System.out.println("Tayara test"); for (int offset = 64; offset < (1 << 30); offset *= 2) { EWAHCompressedBitmap a = new EWAHCompressedBitmap(); EWAHCompressedBitmap b = new EWAHCompressedBitmap(); for (int k = 0; k < 64; ++k) { a.set(offset + k); b.set(offset + k); } if (!a.and(b).equals(a)) throw new RuntimeException("bug"); if (!a.or(b).equals(a)) throw new RuntimeException("bug"); } } @Test public void TestCloneEwahCompressedBitArray() throws CloneNotSupportedException { System.out.println("testing EWAH clone"); EWAHCompressedBitmap a = new EWAHCompressedBitmap(); a.set(410018); a.set(410019); a.set(410020); a.set(410021); a.set(410022); a.set(410023); EWAHCompressedBitmap b; b = a.clone(); a.setSizeInBits(487123, false); b.setSizeInBits(487123, false); Assert.assertTrue(a.equals(b)); } /** * a non-deterministic test proposed by Marc Polizzi. * * @param maxlength * the maximum uncompressed size of the bitmap */ public static void PolizziTest(int maxlength) { System.out.println("Polizzi test with max length = " + maxlength); for (int k = 0; k < 10000; ++k) { final Random rnd = new Random(); final EWAHCompressedBitmap ewahBitmap1 = new EWAHCompressedBitmap(); final BitSet jdkBitmap1 = new BitSet(); final EWAHCompressedBitmap ewahBitmap2 = new EWAHCompressedBitmap(); final BitSet jdkBitmap2 = new BitSet(); final EWAHCompressedBitmap ewahBitmap3 = new EWAHCompressedBitmap(); final BitSet jdkBitmap3 = new BitSet(); final int len = rnd.nextInt(maxlength); for (int pos = 0; pos < len; pos++) { // random *** number of bits // set *** if (rnd.nextInt(7) == 0) { // random *** increasing *** values ewahBitmap1.set(pos); jdkBitmap1.set(pos); } if (rnd.nextInt(11) == 0) { // random *** increasing *** values ewahBitmap2.set(pos); jdkBitmap2.set(pos); } if (rnd.nextInt(7) == 0) { // random *** increasing *** values ewahBitmap3.set(pos); jdkBitmap3.set(pos); } } assertEquals(jdkBitmap1, ewahBitmap1); assertEquals(jdkBitmap2, ewahBitmap2); assertEquals(jdkBitmap3, ewahBitmap3); // XOR { final EWAHCompressedBitmap xorEwahBitmap = ewahBitmap1 .xor(ewahBitmap2); final BitSet xorJdkBitmap = (BitSet) jdkBitmap1.clone(); xorJdkBitmap.xor(jdkBitmap2); assertEquals(xorJdkBitmap, xorEwahBitmap); } // AND { final EWAHCompressedBitmap andEwahBitmap = ewahBitmap1 .and(ewahBitmap2); final BitSet andJdkBitmap = (BitSet) jdkBitmap1.clone(); andJdkBitmap.and(jdkBitmap2); assertEquals(andJdkBitmap, andEwahBitmap); } // AND { final EWAHCompressedBitmap andEwahBitmap = ewahBitmap2 .and(ewahBitmap1); final BitSet andJdkBitmap = (BitSet) jdkBitmap1.clone(); andJdkBitmap.and(jdkBitmap2); assertEquals(andJdkBitmap, andEwahBitmap); assertEquals(andJdkBitmap, EWAHCompressedBitmap.and(ewahBitmap1, ewahBitmap2)); } // MULTI AND { final BitSet andJdkBitmap = (BitSet) jdkBitmap1.clone(); andJdkBitmap.and(jdkBitmap2); andJdkBitmap.and(jdkBitmap3); assertEquals(andJdkBitmap, EWAHCompressedBitmap.and( ewahBitmap1, ewahBitmap2, ewahBitmap3)); assertEquals(andJdkBitmap, EWAHCompressedBitmap.and( ewahBitmap3, ewahBitmap2, ewahBitmap1)); Assert.assertEquals(andJdkBitmap.cardinality(), EWAHCompressedBitmap.andCardinality(ewahBitmap1, ewahBitmap2, ewahBitmap3)); } // AND NOT { final EWAHCompressedBitmap andNotEwahBitmap = ewahBitmap1 .andNot(ewahBitmap2); final BitSet andNotJdkBitmap = (BitSet) jdkBitmap1.clone(); andNotJdkBitmap.andNot(jdkBitmap2); assertEquals(andNotJdkBitmap, andNotEwahBitmap); } // AND NOT { final EWAHCompressedBitmap andNotEwahBitmap = ewahBitmap2 .andNot(ewahBitmap1); final BitSet andNotJdkBitmap = (BitSet) jdkBitmap2.clone(); andNotJdkBitmap.andNot(jdkBitmap1); assertEquals(andNotJdkBitmap, andNotEwahBitmap); } // OR { final EWAHCompressedBitmap orEwahBitmap = ewahBitmap1 .or(ewahBitmap2); final BitSet orJdkBitmap = (BitSet) jdkBitmap1.clone(); orJdkBitmap.or(jdkBitmap2); assertEquals(orJdkBitmap, orEwahBitmap); assertEquals(orJdkBitmap, EWAHCompressedBitmap.or(ewahBitmap1, ewahBitmap2)); Assert.assertEquals(orEwahBitmap.cardinality(), ewahBitmap1.orCardinality(ewahBitmap2)); } // OR { final EWAHCompressedBitmap orEwahBitmap = ewahBitmap2 .or(ewahBitmap1); final BitSet orJdkBitmap = (BitSet) jdkBitmap1.clone(); orJdkBitmap.or(jdkBitmap2); assertEquals(orJdkBitmap, orEwahBitmap); } // MULTI OR { final BitSet orJdkBitmap = (BitSet) jdkBitmap1.clone(); orJdkBitmap.or(jdkBitmap2); orJdkBitmap.or(jdkBitmap3); assertEquals(orJdkBitmap, EWAHCompressedBitmap.or(ewahBitmap1, ewahBitmap2, ewahBitmap3)); assertEquals(orJdkBitmap, EWAHCompressedBitmap.or(ewahBitmap3, ewahBitmap2, ewahBitmap1)); Assert.assertEquals(orJdkBitmap.cardinality(), EWAHCompressedBitmap.orCardinality(ewahBitmap1, ewahBitmap2, ewahBitmap3)); } } } /** * Pseudo-non-deterministic test inspired by Federico Fissore. * * @param length * the number of set bits in a bitmap */ public static void shouldSetBits(int length) { System.out.println("testing shouldSetBits " + length); int[] bitsToSet = createSortedIntArrayOfBitsToSet(length, 434222); EWAHCompressedBitmap ewah = new EWAHCompressedBitmap(); System.out.println(" ... setting " + bitsToSet.length + " values"); for (int i : bitsToSet) { ewah.set(i); } System.out.println(" ... verifying " + bitsToSet.length + " values"); equal(ewah.iterator(), bitsToSet); System.out.println(" ... checking cardinality"); Assert.assertEquals(bitsToSet.length, ewah.cardinality()); } @Test public void testSizeInBits1() { EWAHCompressedBitmap bitmap = new EWAHCompressedBitmap(); bitmap.setSizeInBits(1, false); bitmap.not(); Assert.assertEquals(1, bitmap.cardinality()); } @Test public void testHasNextSafe() { EWAHCompressedBitmap bitmap = new EWAHCompressedBitmap(); bitmap.set(0); IntIterator it = bitmap.intIterator(); Assert.assertTrue(it.hasNext()); Assert.assertEquals(0, it.next()); } @Test public void testHasNextSafe2() { EWAHCompressedBitmap bitmap = new EWAHCompressedBitmap(); bitmap.set(0); IntIterator it = bitmap.intIterator(); Assert.assertEquals(0, it.next()); } @Test public void testInfiniteLoop() { System.out.println("Testing for an infinite loop"); EWAHCompressedBitmap b1 = new EWAHCompressedBitmap(); EWAHCompressedBitmap b2 = new EWAHCompressedBitmap(); EWAHCompressedBitmap b3 = new EWAHCompressedBitmap(); b3.setSizeInBits(5,false); b1.set(2); b2.set(4); EWAHCompressedBitmap.and(b1, b2, b3); EWAHCompressedBitmap.or(b1, b2, b3); } @Test public void testSizeInBits2() { EWAHCompressedBitmap bitmap = new EWAHCompressedBitmap(); bitmap.setSizeInBits(1, true); bitmap.not(); Assert.assertEquals(0, bitmap.cardinality()); } private static void assertAndEquals(EWAHCompressedBitmap... bitmaps) { EWAHCompressedBitmap expected = bitmaps[0]; for (int i = 1; i < bitmaps.length; i++) { expected = expected.and(bitmaps[i]); } Assert.assertTrue(expected.equals(EWAHCompressedBitmap.and(bitmaps))); } private static void assertEquals(EWAHCompressedBitmap expected, EWAHCompressedBitmap actual) { Assert.assertEquals(expected.sizeInBits(), actual.sizeInBits()); assertEqualsPositions(expected, actual); } private static void assertOrEquals(EWAHCompressedBitmap... bitmaps) { EWAHCompressedBitmap expected = bitmaps[0]; for (int i = 1; i < bitmaps.length; i++) { expected = expected.or(bitmaps[i]); } assertEquals(expected, EWAHCompressedBitmap.or(bitmaps)); } /** * Extracted. * * @param bits * the bits * @return the integer */ private static Integer extracted(final Iterator bits) { return bits.next(); } private static void testSetSizeInBits(int size, int nextBit) { EWAHCompressedBitmap bitmap = new EWAHCompressedBitmap(); bitmap.setSizeInBits(size, false); bitmap.set(nextBit); BitSet jdkBitmap = new BitSet(); jdkBitmap.set(nextBit); assertEquals(jdkBitmap, bitmap); } /** * Assess equality between an uncompressed bitmap and a compressed one, part * of a test contributed by Marc Polizzi * * @param jdkBitmap * the uncompressed bitmap * @param ewahBitmap * the compressed bitmap */ static void assertCardinality(BitSet jdkBitmap, EWAHCompressedBitmap ewahBitmap) { final int c1 = jdkBitmap.cardinality(); final int c2 = ewahBitmap.cardinality(); Assert.assertEquals(c1, c2); } /** * Assess equality between an uncompressed bitmap and a compressed one, part * of a test contributed by Marc Polizzi. * * @param jdkBitmap * the uncompressed bitmap * @param ewahBitmap * the compressed bitmap */ static void assertEquals(BitSet jdkBitmap, EWAHCompressedBitmap ewahBitmap) { assertEqualsIterator(jdkBitmap, ewahBitmap); assertEqualsPositions(jdkBitmap, ewahBitmap); assertCardinality(jdkBitmap, ewahBitmap); } static void assertEquals(int[] v, List p) { assertEquals(p, v); } static void assertEquals(List p, int[] v) { if (v.length != p.size()) throw new RuntimeException("Different lengths " + v.length + " " + p.size()); for (int k = 0; k < v.length; ++k) if (v[k] != p.get(k).intValue()) throw new RuntimeException("expected equal at " + k + " " + v[k] + " " + p.get(k)); } // /** * Assess equality between an uncompressed bitmap and a compressed one, part * of a test contributed by Marc Polizzi * * @param jdkBitmap * the jdk bitmap * @param ewahBitmap * the ewah bitmap */ static void assertEqualsIterator(BitSet jdkBitmap, EWAHCompressedBitmap ewahBitmap) { final Vector positions = new Vector(); final Iterator bits = ewahBitmap.iterator(); while (bits.hasNext()) { final int bit = extracted(bits).intValue(); Assert.assertTrue(jdkBitmap.get(bit)); positions.add(new Integer(bit)); } for (int pos = jdkBitmap.nextSetBit(0); pos >= 0; pos = jdkBitmap .nextSetBit(pos + 1)) { if (!positions.contains(new Integer(pos))) { throw new RuntimeException( "iterator: bitset got different bits"); } } } // part of a test contributed by Marc Polizzi /** * Assert equals positions. * * @param jdkBitmap * the jdk bitmap * @param ewahBitmap * the ewah bitmap */ static void assertEqualsPositions(BitSet jdkBitmap, EWAHCompressedBitmap ewahBitmap) { final List positions = ewahBitmap.getPositions(); for (int position : positions) { if (!jdkBitmap.get(position)) { throw new RuntimeException( "positions: bitset got different bits"); } } for (int pos = jdkBitmap.nextSetBit(0); pos >= 0; pos = jdkBitmap .nextSetBit(pos + 1)) { if (!positions.contains(new Integer(pos))) { throw new RuntimeException( "positions: bitset got different bits"); } } // we check again final int[] fastpositions = ewahBitmap.toArray(); for (int position : fastpositions) { if (!jdkBitmap.get(position)) { throw new RuntimeException( "positions: bitset got different bits with toArray"); } } for (int pos = jdkBitmap.nextSetBit(0); pos >= 0; pos = jdkBitmap .nextSetBit(pos + 1)) { int index = Arrays.binarySearch(fastpositions, pos); if (index < 0) throw new RuntimeException( "positions: bitset got different bits with toArray"); if (fastpositions[index] != pos) throw new RuntimeException( "positions: bitset got different bits with toArray"); } } /** * Assert equals positions. * * @param ewahBitmap1 * the ewah bitmap1 * @param ewahBitmap2 * the ewah bitmap2 */ static void assertEqualsPositions(EWAHCompressedBitmap ewahBitmap1, EWAHCompressedBitmap ewahBitmap2) { final List positions1 = ewahBitmap1.getPositions(); final List positions2 = ewahBitmap2.getPositions(); if (!positions1.equals(positions2)) throw new RuntimeException( "positions: alternative got different bits (two bitmaps)"); // final int[] fastpositions1 = ewahBitmap1.toArray(); assertEquals(fastpositions1, positions1); final int[] fastpositions2 = ewahBitmap2.toArray(); assertEquals(fastpositions2, positions2); if (!Arrays.equals(fastpositions1, fastpositions2)) throw new RuntimeException( "positions: alternative got different bits with toArray but not with getPositions (two bitmaps)"); } /** * Convenience function to assess equality between a compressed bitset and * an uncompressed bitset * * @param x * the compressed bitset/bitmap * @param y * the uncompressed bitset/bitmap */ static void equal(EWAHCompressedBitmap x, BitSet y) { Assert.assertEquals(x.cardinality(), y.cardinality()); for (int i : x.getPositions()) Assert.assertTrue(y.get(i)); } /** * Convenience function to assess equality between an array and an iterator * over Integers * * @param i * the iterator * @param array * the array */ static void equal(Iterator i, int[] array) { int cursor = 0; while (i.hasNext()) { int x = extracted(i).intValue(); int y = array[cursor++]; Assert.assertEquals(x, y); } } /** The Constant MEGA: a large integer. */ private static final int MEGA = 8 * 1024 * 1024; /** The Constant TEST_BS_SIZE: used to represent the size of a large bitmap. */ private static final int TEST_BS_SIZE = 8 * MEGA; } javaewah-JavaEWAH-0.7.9/src/test/java/com/googlecode/javaewah/IntIteratorOverIteratingRLWTest.java000066400000000000000000000101531224043567000330450ustar00rootroot00000000000000package com.googlecode.javaewah; import static org.junit.Assert.*; import org.junit.Test; /* * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser * Licensed under the Apache License, Version 2.0. */ /** * Tests for utility class. Sketchy for now. * */ @SuppressWarnings("javadoc") public class IntIteratorOverIteratingRLWTest { @Test // had problems with bitmaps beginning with two consecutive clean runs public void testConsecClean() { System.out .println("testing int iteration, 2 consec clean runs starting with zeros"); EWAHCompressedBitmap e = new EWAHCompressedBitmap(); for (int i = 64; i < 128; ++i) e.set(i); IntIteratorOverIteratingRLW ii = new IntIteratorOverIteratingRLW( e.getIteratingRLW()); assertTrue(ii.hasNext()); int ctr = 0; while (ii.hasNext()) { ++ctr; ii.next(); } assertEquals(64, ctr); } @Test public void testConsecCleanStartOnes() { System.out .println("testing int iteration, 2 consec clean runs starting with ones"); EWAHCompressedBitmap e = new EWAHCompressedBitmap(); for (int i = 0; i < 2 * 64; ++i) e.set(i); for (int i = 4 * 64; i < 5 * 64; ++i) e.set(i); IntIteratorOverIteratingRLW ii = new IntIteratorOverIteratingRLW( e.getIteratingRLW()); assertTrue(ii.hasNext()); int ctr = 0; while (ii.hasNext()) { ++ctr; ii.next(); } assertEquals(3 * 64, ctr); } @Test public void testStartDirty() { System.out.println("testing int iteration, no initial runs"); EWAHCompressedBitmap e = new EWAHCompressedBitmap(); for (int i = 1; i < 2 * 64; ++i) e.set(i); for (int i = 4 * 64; i < 5 * 64; ++i) e.set(i); IntIteratorOverIteratingRLW ii = new IntIteratorOverIteratingRLW( e.getIteratingRLW()); assertTrue(ii.hasNext()); int ctr = 0; while (ii.hasNext()) { ++ctr; ii.next(); } assertEquals(3 * 64 - 1, ctr); } @Test public void testEmpty() { System.out.println("testing int iteration over empty bitmap"); EWAHCompressedBitmap e = new EWAHCompressedBitmap(); IntIteratorOverIteratingRLW ii = new IntIteratorOverIteratingRLW( e.getIteratingRLW()); assertFalse(ii.hasNext()); } @Test public void testRandomish() { EWAHCompressedBitmap e = new EWAHCompressedBitmap(); int upperlimit = 100000; for (int i = 0; i < upperlimit; ++i) { double probabilityOfOne = i / (double) (upperlimit / 2); if (probabilityOfOne > 1.0) probabilityOfOne = 1.0; if (Math.random() < probabilityOfOne) { e.set(i); } } IntIteratorOverIteratingRLW ii = new IntIteratorOverIteratingRLW( e.getIteratingRLW()); int ctr = 0; while (ii.hasNext()) { ++ctr; ii.next(); } assertEquals(e.cardinality(), ctr); System.out .println("checking int iteration over a var density bitset of size " + e.cardinality()); } } javaewah-JavaEWAH-0.7.9/src/test/java/com/googlecode/javaewah/IteratorAggregationTest.java000066400000000000000000000073621224043567000314620ustar00rootroot00000000000000package com.googlecode.javaewah; import static org.junit.Assert.*; import java.util.Iterator; import org.junit.Test; import com.googlecode.javaewah.benchmark.ClusteredDataGenerator; /* * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser * Licensed under the Apache License, Version 2.0. */ /** * Tests specifically for iterators. * */ public class IteratorAggregationTest { /** * @param N Number of bitmaps to generate in each set * @param nbr parameter determining the size of the arrays (in a log scale) * @return an iterator over sets of bitmaps */ public static Iterator getCollections(final int N, final int nbr) { final ClusteredDataGenerator cdg = new ClusteredDataGenerator(123); return new Iterator() { int sparsity = 1; @Override public boolean hasNext() { return this.sparsity < 5; } @Override public EWAHCompressedBitmap[] next() { int[][] data = new int[N][]; int Max = (1 << (nbr + this.sparsity)); for (int k = 0; k < N; ++k) data[k] = cdg.generateClustered(1 << nbr, Max); EWAHCompressedBitmap[] ewah = new EWAHCompressedBitmap[N]; for (int k = 0; k < N; ++k) { ewah[k] = new EWAHCompressedBitmap(); for (int x = 0; x < data[k].length; ++x) { ewah[k].set(data[k][x]); } data[k] = null; } this.sparsity += 3; return ewah; } @Override public void remove() { // unimplemented } }; } /** * */ @Test public void testAnd() { for (int N = 1; N < 10; ++N) { System.out.println("testAnd N = " + N); Iterator i = getCollections(N,3); while (i.hasNext()) { EWAHCompressedBitmap[] x = i.next(); EWAHCompressedBitmap tanswer = EWAHCompressedBitmap.and(x); EWAHCompressedBitmap x1 = IteratorUtil .materialize(IteratorAggregation.bufferedand(IteratorUtil .toIterators(x))); assertTrue(x1.equals(tanswer)); } System.gc(); } } /** * */ @Test public void testOr() { for (int N = 1; N < 10; ++N) { System.out.println("testOr N = " + N); Iterator i = getCollections(N,3); while (i.hasNext()) { EWAHCompressedBitmap[] x = i.next(); EWAHCompressedBitmap tanswer = EWAHCompressedBitmap.or(x); EWAHCompressedBitmap x1 = IteratorUtil .materialize(IteratorAggregation.bufferedor(IteratorUtil .toIterators(x))); assertTrue(x1.equals(tanswer)); } System.gc(); } } /** * */ @SuppressWarnings("deprecation") @Test public void testWideOr() { for (int nbr = 3; nbr <= 24; nbr += 3) { for (int N = 100; N < 1000; N += 100) { System.out.println("testWideOr N = " + N); Iterator i = getCollections(N, 3); while (i.hasNext()) { EWAHCompressedBitmap[] x = i.next(); EWAHCompressedBitmap tanswer = EWAHCompressedBitmap.or(x); EWAHCompressedBitmap container = new EWAHCompressedBitmap(); FastAggregation.legacy_orWithContainer(container, x); assertTrue(container.equals(tanswer)); EWAHCompressedBitmap x1 = IteratorUtil .materialize(IteratorAggregation .bufferedor(IteratorUtil.toIterators(x))); assertTrue(x1.equals(tanswer)); } System.gc(); } } } /** * */ @Test public void testXor() { System.out.println("testXor "); Iterator i = getCollections(2,3); while (i.hasNext()) { EWAHCompressedBitmap[] x = i.next(); EWAHCompressedBitmap tanswer = x[0].xor(x[1]); EWAHCompressedBitmap x1 = IteratorUtil .materialize(IteratorAggregation.bufferedxor( x[0].getIteratingRLW(), x[1].getIteratingRLW())); assertTrue(x1.equals(tanswer)); } System.gc(); } } javaewah-JavaEWAH-0.7.9/src/test/java/com/googlecode/javaewah32/000077500000000000000000000000001224043567000241535ustar00rootroot00000000000000javaewah-JavaEWAH-0.7.9/src/test/java/com/googlecode/javaewah32/EWAHCompressedBitmap32Test.java000066400000000000000000001313551224043567000317410ustar00rootroot00000000000000package com.googlecode.javaewah32; /* * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser * Licensed under the Apache License, Version 2.0. */ import org.junit.Test; import com.googlecode.javaewah.FastAggregation; import com.googlecode.javaewah.IntIterator; import java.util.*; import java.io.*; import junit.framework.Assert; /** * This class is used for basic unit testing. */ @SuppressWarnings("javadoc") public class EWAHCompressedBitmap32Test { @Test public void testGet() { for (int gap = 29; gap < 10000; gap *= 10) { EWAHCompressedBitmap32 x = new EWAHCompressedBitmap32(); for (int k = 0; k < 100; ++k) x.set(k * gap); for (int k = 0; k < 100 * gap; ++k) if (x.get(k)) { if (k % gap != 0) throw new RuntimeException( "spotted an extra set bit at " + k + " gap = " + gap); } else if (k % gap == 0) throw new RuntimeException( "missed a set bit " + k + " gap = " + gap); } } @SuppressWarnings({ "deprecation", "boxing" }) @Test public void OKaserBugReportJuly2013() { System.out.println("testing OKaserBugReportJuly2013"); int[][] data = { {}, { 5, 6, 7, 8, 9 }, { 1 }, { 2 }, { 2, 5, 7 }, { 1 }, { 2 }, { 1, 6, 9 }, { 1, 3, 4, 6, 8, 9 }, { 1, 3, 4, 6, 8, 9 }, { 1, 3, 6, 8, 9 }, { 2, 5, 7 }, { 2, 5, 7 }, { 1, 3, 9 }, { 3, 8, 9 } }; EWAHCompressedBitmap32[] toBeOred = new EWAHCompressedBitmap32[data.length]; Set bruteForceAnswer = new HashSet(); for (int i = 0; i < toBeOred.length; ++i) { toBeOred[i] = new EWAHCompressedBitmap32(); for (int j : data[i]) { toBeOred[i].set(j); bruteForceAnswer.add(j); } toBeOred[i].setSizeInBits(1000,false); } long rightcard = bruteForceAnswer.size(); EWAHCompressedBitmap32 foo = new EWAHCompressedBitmap32(); FastAggregation32.legacy_orWithContainer(foo, toBeOred); Assert.assertEquals(rightcard, foo.cardinality()); EWAHCompressedBitmap32 e1 = FastAggregation.or(toBeOred); Assert.assertEquals(rightcard, e1.cardinality()); EWAHCompressedBitmap32 e2 = FastAggregation32.bufferedor(65536, toBeOred); Assert.assertEquals(rightcard, e2.cardinality()); } @Test public void testSizeInBitsWithAnd() { System.out.println("testing SizeInBitsWithAnd"); EWAHCompressedBitmap32 a = new EWAHCompressedBitmap32(); EWAHCompressedBitmap32 b = new EWAHCompressedBitmap32(); a.set(1); a.set(2); a.set(3); b.set(3); b.set(4); b.set(5); a.setSizeInBits(10); b.setSizeInBits(10); EWAHCompressedBitmap32 and = a.and(b); Assert.assertEquals(10, and.sizeInBits()); EWAHCompressedBitmap32 and2 = EWAHCompressedBitmap32.and(a,b); Assert.assertEquals(10, and2.sizeInBits()); } @Test public void testSizeInBitsWithAndNot() { System.out.println("testing SizeInBitsWithAndNot"); EWAHCompressedBitmap32 a = new EWAHCompressedBitmap32(); EWAHCompressedBitmap32 b = new EWAHCompressedBitmap32(); a.set(1); a.set(2); a.set(3); b.set(3); b.set(4); b.set(5); a.setSizeInBits(10); b.setSizeInBits(10); EWAHCompressedBitmap32 and = a.andNot(b); Assert.assertEquals(10, and.sizeInBits()); } @Test public void testSizeInBitsWithOr() { System.out.println("testing SizeInBitsWithOr"); EWAHCompressedBitmap32 a = new EWAHCompressedBitmap32(); EWAHCompressedBitmap32 b = new EWAHCompressedBitmap32(); a.set(1); a.set(2); a.set(3); b.set(3); b.set(4); b.set(5); a.setSizeInBits(10); b.setSizeInBits(10); EWAHCompressedBitmap32 or = a.or(b); Assert.assertEquals(10, or.sizeInBits()); EWAHCompressedBitmap32 or2 = EWAHCompressedBitmap32.or(a,b); Assert.assertEquals(10, or2.sizeInBits()); } @Test public void testSizeInBitsWithXor() { System.out.println("testing SizeInBitsWithXor"); EWAHCompressedBitmap32 a = new EWAHCompressedBitmap32(); EWAHCompressedBitmap32 b = new EWAHCompressedBitmap32(); a.set(1); a.set(2); a.set(3); b.set(3); b.set(4); b.set(5); a.setSizeInBits(10); b.setSizeInBits(10); EWAHCompressedBitmap32 xor = a.xor(b); Assert.assertEquals(10, xor.sizeInBits()); EWAHCompressedBitmap32 xor2 = EWAHCompressedBitmap32.xor(a,b); Assert.assertEquals(10, xor2.sizeInBits()); } @Test public void testDebugSetSizeInBitsTest() { System.out.println("testing DebugSetSizeInBits"); EWAHCompressedBitmap32 b = new EWAHCompressedBitmap32(); b.set(4); b.setSizeInBits(6, true); List positions = b.getPositions(); Assert.assertEquals(2, positions.size()); Assert.assertEquals(Integer.valueOf(4), positions.get(0)); Assert.assertEquals(Integer.valueOf(5), positions.get(1)); Iterator iterator = b.iterator(); Assert.assertTrue(iterator.hasNext()); Assert.assertEquals(Integer.valueOf(4), iterator.next()); Assert.assertTrue(iterator.hasNext()); Assert.assertEquals(Integer.valueOf(5), iterator.next()); Assert.assertFalse(iterator.hasNext()); IntIterator intIterator = b.intIterator(); Assert.assertTrue(intIterator.hasNext()); Assert.assertEquals(4, intIterator.next()); Assert.assertTrue(intIterator.hasNext()); Assert.assertEquals(5, intIterator.next()); Assert.assertFalse(intIterator.hasNext()); } /** * Created: 2/4/11 6:03 PM By: Arnon Moscona. */ @Test public void EwahIteratorProblem() { System.out.println("testing ArnonMoscona"); EWAHCompressedBitmap32 bitmap = new EWAHCompressedBitmap32(); for (int i = 9434560; i <= 9435159; i++) { bitmap.set(i); } IntIterator iterator = bitmap.intIterator(); List v = bitmap.getPositions(); int[] array = bitmap.toArray(); for (int k = 0; k < v.size(); ++k) { Assert.assertTrue(array[k] == v.get(k).intValue()); Assert.assertTrue(iterator.hasNext()); final int ival = iterator.next(); final int vval = v.get(k).intValue(); Assert.assertTrue(ival == vval); } Assert.assertTrue(!iterator.hasNext()); // for (int k = 2; k <= 1024; k *= 2) { int[] bitsToSet = createSortedIntArrayOfBitsToSet(k, 434455 + 5 * k); EWAHCompressedBitmap32 ewah = new EWAHCompressedBitmap32(); for (int i : bitsToSet) { ewah.set(i); } equal(ewah.iterator(), bitsToSet); } } /** * Test submitted by Gregory Ssi-Yan-Kai */ @Test public void SsiYanKaiTest() { System.out.println("testing SsiYanKaiTest"); EWAHCompressedBitmap32 a = EWAHCompressedBitmap32.bitmapOf(39935, 39936, 39937, 39938, 39939, 39940, 39941, 39942, 39943, 39944, 39945, 39946, 39947, 39948, 39949, 39950, 39951, 39952, 39953, 39954, 39955, 39956, 39957, 39958, 39959, 39960, 39961, 39962, 39963, 39964, 39965, 39966, 39967, 39968, 39969, 39970, 39971, 39972, 39973, 39974, 39975, 39976, 39977, 39978, 39979, 39980, 39981, 39982, 39983, 39984, 39985, 39986, 39987, 39988, 39989, 39990, 39991, 39992, 39993, 39994, 39995, 39996, 39997, 39998, 39999, 40000, 40001, 40002, 40003, 40004, 40005, 40006, 40007, 40008, 40009, 40010, 40011, 40012, 40013, 40014, 40015, 40016, 40017, 40018, 40019, 40020, 40021, 40022, 40023, 40024, 40025, 40026, 40027, 40028, 40029, 40030, 40031, 40032, 40033, 40034, 40035, 40036, 40037, 40038, 40039, 40040, 40041, 40042, 40043, 40044, 40045, 40046, 40047, 40048, 40049, 40050, 40051, 40052, 40053, 40054, 40055, 40056, 40057, 40058, 40059, 40060, 40061, 40062, 40063, 40064, 40065, 40066, 40067, 40068, 40069, 40070, 40071, 40072, 40073, 40074, 40075, 40076, 40077, 40078, 40079, 40080, 40081, 40082, 40083, 40084, 40085, 40086, 40087, 40088, 40089, 40090, 40091, 40092, 40093, 40094, 40095, 40096, 40097, 40098, 40099, 40100); EWAHCompressedBitmap32 b = EWAHCompressedBitmap32.bitmapOf(39935, 39936, 39937, 39938, 39939, 39940, 39941, 39942, 39943, 39944, 39945, 39946, 39947, 39948, 39949, 39950, 39951, 39952, 39953, 39954, 39955, 39956, 39957, 39958, 39959, 39960, 39961, 39962, 39963, 39964, 39965, 39966, 39967, 39968, 39969, 39970, 39971, 39972, 39973, 39974, 39975, 39976, 39977, 39978, 39979, 39980, 39981, 39982, 39983, 39984, 39985, 39986, 39987, 39988, 39989, 39990, 39991, 39992, 39993, 39994, 39995, 39996, 39997, 39998, 39999, 270000); LinkedHashSet aPositions = new LinkedHashSet( a.getPositions()); int intersection = 0; EWAHCompressedBitmap32 inter = new EWAHCompressedBitmap32(); LinkedHashSet bPositions = new LinkedHashSet( b.getPositions()); for (Integer integer : bPositions) { if (aPositions.contains(integer)) { inter.set(integer.intValue()); ++intersection; } } EWAHCompressedBitmap32 and2 = a.and(b); if (!and2.equals(inter)) throw new RuntimeException("intersections don't match"); if (intersection != and2.cardinality()) throw new RuntimeException("cardinalities don't match"); } /** * Test inspired by William Habermaas. */ @Test public void habermaasTest() { System.out.println("testing habermaasTest"); BitSet bitsetaa = new BitSet(); EWAHCompressedBitmap32 aa = new EWAHCompressedBitmap32(); int[] val = { 55400, 1000000, 1000128 }; for (int k = 0; k < val.length; ++k) { aa.set(val[k]); bitsetaa.set(val[k]); } equal(aa, bitsetaa); BitSet bitsetab = new BitSet(); EWAHCompressedBitmap32 ab = new EWAHCompressedBitmap32(); for (int i = 4096; i < (4096 + 5); i++) { ab.set(i); bitsetab.set(i); } ab.set(99000); bitsetab.set(99000); ab.set(1000130); bitsetab.set(1000130); equal(ab, bitsetab); EWAHCompressedBitmap32 bb = aa.or(ab); EWAHCompressedBitmap32 bbAnd = aa.and(ab); try { EWAHCompressedBitmap32 abnot = ab.clone(); abnot.not(); EWAHCompressedBitmap32 bbAnd2 = aa.andNot(abnot); assertEquals(bbAnd2, bbAnd); } catch (CloneNotSupportedException e) { e.printStackTrace(); } BitSet bitsetbb = (BitSet) bitsetaa.clone(); bitsetbb.or(bitsetab); BitSet bitsetbbAnd = (BitSet) bitsetaa.clone(); bitsetbbAnd.and(bitsetab); equal(bbAnd, bitsetbbAnd); equal(bb, bitsetbb); } @Test public void testAndResultAppend() { System.out.println("testing AndResultAppend"); EWAHCompressedBitmap32 bitmap1 = new EWAHCompressedBitmap32(); bitmap1.set(35); EWAHCompressedBitmap32 bitmap2 = new EWAHCompressedBitmap32(); bitmap2.set(35); bitmap2.set(130); EWAHCompressedBitmap32 resultBitmap = bitmap1.and(bitmap2); resultBitmap.set(131); bitmap1.set(131); assertEquals(bitmap1, resultBitmap); } /** * Test cardinality. */ @Test public void testCardinality() { System.out.println("testing EWAH cardinality"); EWAHCompressedBitmap32 bitmap = new EWAHCompressedBitmap32(); bitmap.set(Integer.MAX_VALUE - 32); // System.out.format("Total Items %d\n", bitmap.cardinality()); Assert.assertTrue(bitmap.cardinality() == 1); } /** * Test clear function */ @Test public void testClear() { System.out.println("testing Clear"); EWAHCompressedBitmap32 bitmap = new EWAHCompressedBitmap32(); bitmap.set(5); bitmap.clear(); bitmap.set(7); Assert.assertTrue(1 == bitmap.cardinality()); Assert.assertTrue(1 == bitmap.getPositions().size()); Assert.assertTrue(1 == bitmap.toArray().length); Assert.assertTrue(7 == bitmap.getPositions().get(0).intValue()); Assert.assertTrue(7 == bitmap.toArray()[0]); bitmap.clear(); bitmap.set(5000); Assert.assertTrue(1 == bitmap.cardinality()); Assert.assertTrue(1 == bitmap.getPositions().size()); Assert.assertTrue(1 == bitmap.toArray().length); Assert.assertTrue(5000 == bitmap.getPositions().get(0).intValue()); bitmap.set(5001); bitmap.set(5005); bitmap.set(5100); bitmap.set(5500); bitmap.clear(); bitmap.set(5); bitmap.set(7); bitmap.set(1000); bitmap.set(1001); Assert.assertTrue(4 == bitmap.cardinality()); List positions = bitmap.getPositions(); Assert.assertTrue(4 == positions.size()); Assert.assertTrue(5 == positions.get(0).intValue()); Assert.assertTrue(7 == positions.get(1).intValue()); Assert.assertTrue(1000 == positions.get(2).intValue()); Assert.assertTrue(1001 == positions.get(3).intValue()); } /** * Test ewah compressed bitmap. */ @Test public void testEWAHCompressedBitmap() { System.out.println("testing EWAH"); int zero = 0; int specialval = 1 | (1 << 4) | (1 << 31); int notzero = ~zero; EWAHCompressedBitmap32 myarray1 = new EWAHCompressedBitmap32(); myarray1.add(zero); myarray1.add(zero); myarray1.add(zero); myarray1.add(specialval); myarray1.add(specialval); myarray1.add(notzero); myarray1.add(zero); Assert.assertEquals(myarray1.getPositions().size(), 6 + 32); EWAHCompressedBitmap32 myarray2 = new EWAHCompressedBitmap32(); myarray2.add(zero); myarray2.add(specialval); myarray2.add(specialval); myarray2.add(notzero); myarray2.add(zero); myarray2.add(zero); myarray2.add(zero); Assert.assertEquals(myarray2.getPositions().size(), 6 + 32); List data1 = myarray1.getPositions(); List data2 = myarray2.getPositions(); Vector logicalor = new Vector(); { HashSet tmp = new HashSet(); tmp.addAll(data1); tmp.addAll(data2); logicalor.addAll(tmp); } Collections.sort(logicalor); Vector logicaland = new Vector(); logicaland.addAll(data1); logicaland.retainAll(data2); Collections.sort(logicaland); EWAHCompressedBitmap32 arrayand = myarray1.and(myarray2); Assert.assertTrue(arrayand.getPositions().equals(logicaland)); EWAHCompressedBitmap32 arrayor = myarray1.or(myarray2); Assert.assertTrue(arrayor.getPositions().equals(logicalor)); EWAHCompressedBitmap32 arrayandbis = myarray2.and(myarray1); Assert.assertTrue(arrayandbis.getPositions().equals(logicaland)); EWAHCompressedBitmap32 arrayorbis = myarray2.or(myarray1); Assert.assertTrue(arrayorbis.getPositions().equals(logicalor)); EWAHCompressedBitmap32 x = new EWAHCompressedBitmap32(); for (Integer i : myarray1.getPositions()) { x.set(i.intValue()); } Assert.assertTrue(x.getPositions().equals(myarray1.getPositions())); x = new EWAHCompressedBitmap32(); for (Integer i : myarray2.getPositions()) { x.set(i.intValue()); } Assert.assertTrue(x.getPositions().equals(myarray2.getPositions())); x = new EWAHCompressedBitmap32(); for (Iterator k = myarray1.iterator(); k.hasNext();) { x.set(extracted(k).intValue()); } Assert.assertTrue(x.getPositions().equals(myarray1.getPositions())); x = new EWAHCompressedBitmap32(); for (Iterator k = myarray2.iterator(); k.hasNext();) { x.set(extracted(k).intValue()); } Assert.assertTrue(x.getPositions().equals(myarray2.getPositions())); } /** * Test externalization. * * @throws IOException * Signals that an I/O exception has occurred. */ @Test public void testExternalization() throws IOException { System.out.println("testing EWAH externalization"); EWAHCompressedBitmap32 ewcb = new EWAHCompressedBitmap32(); int[] val = { 5, 4400, 44600, 55400, 1000000 }; for (int k = 0; k < val.length; ++k) { ewcb.set(val[k]); } ByteArrayOutputStream bos = new ByteArrayOutputStream(); ObjectOutputStream oo = new ObjectOutputStream(bos); ewcb.writeExternal(oo); oo.close(); ewcb = null; ewcb = new EWAHCompressedBitmap32(); ByteArrayInputStream bis = new ByteArrayInputStream(bos.toByteArray()); ewcb.readExternal(new ObjectInputStream(bis)); List result = ewcb.getPositions(); Assert.assertTrue(val.length == result.size()); for (int k = 0; k < val.length; ++k) { Assert.assertTrue(result.get(k).intValue() == val[k]); } } @Test public void testExtremeRange() { System.out.println("testing EWAH at its extreme range"); EWAHCompressedBitmap32 myarray1 = new EWAHCompressedBitmap32(); int N = 1024; for (int i = 0; i < N; ++i) { myarray1.set(Integer.MAX_VALUE - 32 - N + i); Assert.assertTrue(myarray1.cardinality() == i + 1); int[] val = myarray1.toArray(); Assert.assertTrue(val[0] == Integer.MAX_VALUE - 32 - N); } } /** * Test the intersects method */ @Test public void testIntersectsMethod() { System.out.println("testing Intersets Bug"); EWAHCompressedBitmap32 bitmap = new EWAHCompressedBitmap32(); bitmap.set(1); EWAHCompressedBitmap32 bitmap2 = new EWAHCompressedBitmap32(); bitmap2.set(1); bitmap2.set(11); bitmap2.set(111); bitmap2.set(1111111); bitmap2.set(11111111); Assert.assertTrue(bitmap.intersects(bitmap2)); Assert.assertTrue(bitmap2.intersects(bitmap)); EWAHCompressedBitmap32 bitmap3 = new EWAHCompressedBitmap32(); bitmap3.set(101); EWAHCompressedBitmap32 bitmap4 = new EWAHCompressedBitmap32(); for (int i = 0; i < 100; i++) { bitmap4.set(i); } Assert.assertFalse(bitmap3.intersects(bitmap4)); Assert.assertFalse(bitmap4.intersects(bitmap3)); EWAHCompressedBitmap32 bitmap5 = new EWAHCompressedBitmap32(); bitmap5.set(0); bitmap5.set(10); bitmap5.set(20); EWAHCompressedBitmap32 bitmap6 = new EWAHCompressedBitmap32(); bitmap6.set(1); bitmap6.set(11); bitmap6.set(21); bitmap6.set(1111111); bitmap6.set(11111111); Assert.assertFalse(bitmap5.intersects(bitmap6)); Assert.assertFalse(bitmap6.intersects(bitmap5)); bitmap5.set(21); Assert.assertTrue(bitmap5.intersects(bitmap6)); Assert.assertTrue(bitmap6.intersects(bitmap5)); EWAHCompressedBitmap32 bitmap7 = new EWAHCompressedBitmap32(); bitmap7.set(1); bitmap7.set(10); bitmap7.set(20); bitmap7.set(1111111); bitmap7.set(11111111); EWAHCompressedBitmap32 bitmap8 = new EWAHCompressedBitmap32(); for (int i = 0; i < 1000; i++) { if (i != 1 && i != 10 && i != 20) { bitmap8.set(i); } } Assert.assertFalse(bitmap7.intersects(bitmap8)); Assert.assertFalse(bitmap8.intersects(bitmap7)); } /** * as per renaud.delbru, Feb 12, 2009 this might throw an error out of bound * exception. */ @Test public void testLargeEWAHCompressedBitmap() { System.out.println("testing EWAH over a large array"); EWAHCompressedBitmap32 myarray1 = new EWAHCompressedBitmap32(); int N = 11000000; for (int i = 0; i < N; ++i) { myarray1.set(i); } Assert.assertTrue(myarray1.sizeInBits() == N); } /** * Test massive and. */ @Test public void testMassiveAnd() { System.out.println("testing massive logical and"); EWAHCompressedBitmap32[] ewah = new EWAHCompressedBitmap32[1024]; for (int k = 0; k < ewah.length; ++k) ewah[k] = new EWAHCompressedBitmap32(); for (int k = 0; k < 30000; ++k) { ewah[(k + 2 * k * k) % ewah.length].set(k); } EWAHCompressedBitmap32 answer = ewah[0]; for (int k = 1; k < ewah.length; ++k) answer = answer.and(ewah[k]); // result should be empty if (answer.getPositions().size() != 0) System.out.println(answer.toDebugString()); Assert.assertTrue(answer.getPositions().size() == 0); Assert.assertTrue(EWAHCompressedBitmap32.and(ewah).getPositions() .size() == 0); } /** * Test massive and not. */ @Test public void testMassiveAndNot() { System.out.println("testing massive and not"); final int N = 1024; EWAHCompressedBitmap32[] ewah = new EWAHCompressedBitmap32[N]; for (int k = 0; k < ewah.length; ++k) ewah[k] = new EWAHCompressedBitmap32(); for (int k = 0; k < 30000; ++k) { ewah[(k + 2 * k * k) % ewah.length].set(k); } EWAHCompressedBitmap32 answer = ewah[0]; EWAHCompressedBitmap32 answer2 = ewah[0]; for (int k = 1; k < ewah.length; ++k) { answer = answer.andNot(ewah[k]); EWAHCompressedBitmap32 copy = null; try { copy = ewah[k].clone(); copy.not(); answer2.and(copy); assertEqualsPositions(answer, answer2); } catch (CloneNotSupportedException e) { e.printStackTrace(); } } } @Test public void testsetSizeInBits() { System.out.println("testing setSizeInBits"); for (int k = 0; k < 4096; ++k) { EWAHCompressedBitmap32 ewah = new EWAHCompressedBitmap32(); ewah.setSizeInBits(k,false); Assert.assertEquals(ewah.sizeinbits, k); Assert.assertEquals(ewah.cardinality(), 0); EWAHCompressedBitmap32 ewah2 = new EWAHCompressedBitmap32(); ewah2.setSizeInBits(k, false); Assert.assertEquals(ewah2.sizeinbits, k); Assert.assertEquals(ewah2.cardinality(), 0); EWAHCompressedBitmap32 ewah3 = new EWAHCompressedBitmap32(); for (int i = 0; i < k; ++i) { ewah3.set(i); } Assert.assertEquals(ewah3.sizeinbits, k); Assert.assertEquals(ewah3.cardinality(), k); EWAHCompressedBitmap32 ewah4 = new EWAHCompressedBitmap32(); ewah4.setSizeInBits(k, true); Assert.assertEquals(ewah4.sizeinbits, k); Assert.assertEquals(ewah4.cardinality(), k); } } /** * Test massive or. */ @Test public void testMassiveOr() { System.out .println("testing massive logical or (can take a couple of minutes)"); final int N = 128; for (int howmany = 512; howmany <= 10000; howmany *= 2) { EWAHCompressedBitmap32[] ewah = new EWAHCompressedBitmap32[N]; BitSet[] bset = new BitSet[N]; for (int k = 0; k < ewah.length; ++k) ewah[k] = new EWAHCompressedBitmap32(); for (int k = 0; k < bset.length; ++k) bset[k] = new BitSet(); for (int k = 0; k < N; ++k) assertEqualsPositions(bset[k], ewah[k]); for (int k = 0; k < howmany; ++k) { ewah[(k + 2 * k * k) % ewah.length].set(k); bset[(k + 2 * k * k) % ewah.length].set(k); } for (int k = 0; k < N; ++k) assertEqualsPositions(bset[k], ewah[k]); EWAHCompressedBitmap32 answer = ewah[0]; BitSet bitsetanswer = bset[0]; for (int k = 1; k < ewah.length; ++k) { EWAHCompressedBitmap32 tmp = answer.or(ewah[k]); bitsetanswer.or(bset[k]); answer = tmp; assertEqualsPositions(bitsetanswer, answer); } assertEqualsPositions(bitsetanswer, answer); assertEqualsPositions(bitsetanswer, EWAHCompressedBitmap32.or(ewah)); int k = 0; for (int j : answer) { if (k != j) System.out.println(answer.toDebugString()); Assert.assertEquals(k, j); k += 1; } } } /** * Test massive xor. */ @Test public void testMassiveXOR() { System.out .println("testing massive xor (can take a couple of minutes)"); final int N = 16; EWAHCompressedBitmap32[] ewah = new EWAHCompressedBitmap32[N]; BitSet[] bset = new BitSet[N]; for (int k = 0; k < ewah.length; ++k) ewah[k] = new EWAHCompressedBitmap32(); for (int k = 0; k < bset.length; ++k) bset[k] = new BitSet(); for (int k = 0; k < 30000; ++k) { ewah[(k + 2 * k * k) % ewah.length].set(k); bset[(k + 2 * k * k) % ewah.length].set(k); } EWAHCompressedBitmap32 answer = ewah[0]; BitSet bitsetanswer = bset[0]; for (int k = 1; k < ewah.length; ++k) { answer = answer.xor(ewah[k]); bitsetanswer.xor(bset[k]); assertEqualsPositions(bitsetanswer, answer); } int k = 0; for (int j : answer) { if (k != j) System.out.println(answer.toDebugString()); Assert.assertEquals(k, j); k += 1; } } @Test public void testMultiAnd() { System.out.println("testing MultiAnd"); // test bitmap3 has a literal word while bitmap1/2 have a run of 1 EWAHCompressedBitmap32 bitmap1 = new EWAHCompressedBitmap32(); bitmap1.addStreamOfEmptyWords(true, 1000); EWAHCompressedBitmap32 bitmap2 = new EWAHCompressedBitmap32(); bitmap2.addStreamOfEmptyWords(true, 2000); EWAHCompressedBitmap32 bitmap3 = new EWAHCompressedBitmap32(); bitmap3.set(500); bitmap3.set(502); bitmap3.set(504); assertAndEquals(bitmap1, bitmap2, bitmap3); // equal bitmap1 = new EWAHCompressedBitmap32(); bitmap1.set(35); bitmap2 = new EWAHCompressedBitmap32(); bitmap2.set(35); bitmap3 = new EWAHCompressedBitmap32(); bitmap3.set(35); assertAndEquals(bitmap1, bitmap2, bitmap3); // same number of words for each bitmap3.set(63); assertAndEquals(bitmap1, bitmap2, bitmap3); // one word bigger bitmap3.set(64); assertAndEquals(bitmap1, bitmap2, bitmap3); // two words bigger bitmap3.set(130); assertAndEquals(bitmap1, bitmap2, bitmap3); // test that result can still be appended to EWAHCompressedBitmap32 resultBitmap = EWAHCompressedBitmap32.and( bitmap1, bitmap2, bitmap3); resultBitmap.set(131); bitmap1.set(131); assertEquals(bitmap1, resultBitmap); final int N = 128; for (int howmany = 512; howmany <= 10000; howmany *= 2) { EWAHCompressedBitmap32[] ewah = new EWAHCompressedBitmap32[N]; for (int k = 0; k < ewah.length; ++k) ewah[k] = new EWAHCompressedBitmap32(); for (int k = 0; k < howmany; ++k) { ewah[(k + 2 * k * k) % ewah.length].set(k); } for (int k = 1; k <= ewah.length; ++k) { EWAHCompressedBitmap32[] shortewah = new EWAHCompressedBitmap32[k]; for (int i = 0; i < k; ++i) shortewah[i] = ewah[i]; assertAndEquals(shortewah); } } } @Test public void testMultiOr() { System.out.println("testing MultiOr"); // test bitmap3 has a literal word while bitmap1/2 have a run of 0 EWAHCompressedBitmap32 bitmap1 = new EWAHCompressedBitmap32(); bitmap1.set(1000); EWAHCompressedBitmap32 bitmap2 = new EWAHCompressedBitmap32(); bitmap2.set(2000); EWAHCompressedBitmap32 bitmap3 = new EWAHCompressedBitmap32(); bitmap3.set(500); bitmap3.set(502); bitmap3.set(504); EWAHCompressedBitmap32 expected = bitmap1.or(bitmap2).or(bitmap3); assertEquals(expected, EWAHCompressedBitmap32.or(bitmap1, bitmap2, bitmap3)); final int N = 128; for (int howmany = 512; howmany <= 10000; howmany *= 2) { EWAHCompressedBitmap32[] ewah = new EWAHCompressedBitmap32[N]; for (int k = 0; k < ewah.length; ++k) ewah[k] = new EWAHCompressedBitmap32(); for (int k = 0; k < howmany; ++k) { ewah[(k + 2 * k * k) % ewah.length].set(k); } for (int k = 1; k <= ewah.length; ++k) { EWAHCompressedBitmap32[] shortewah = new EWAHCompressedBitmap32[k]; for (int i = 0; i < k; ++i) shortewah[i] = ewah[i]; assertOrEquals(shortewah); } } } /** * Test not. (Based on an idea by Ciaran Jessup) */ @Test public void testNot() { System.out.println("testing not"); EWAHCompressedBitmap32 ewah = new EWAHCompressedBitmap32(); for (int i = 0; i <= 184; ++i) { ewah.set(i); } Assert.assertEquals(ewah.cardinality(), 185); ewah.not(); Assert.assertEquals(ewah.cardinality(), 0); } @Test public void testOrCardinality() { System.out.println("testing Or Cardinality"); for (int N = 0; N < 1024; ++N) { EWAHCompressedBitmap32 bitmap = new EWAHCompressedBitmap32(); for (int i = 0; i < N; i++) { bitmap.set(i); } bitmap.set(1025); bitmap.set(1026); Assert.assertEquals(N + 2, bitmap.cardinality()); EWAHCompressedBitmap32 orbitmap = bitmap.or(bitmap); assertEquals(orbitmap, bitmap); Assert.assertEquals(N + 2, orbitmap.cardinality()); if (N + 2 != bitmap.orCardinality(new EWAHCompressedBitmap32())) { System.out.println("N = " + N); System.out.println(bitmap.toDebugString()); System.out.println("cardinality = " + bitmap.cardinality()); System.out.println("orCardinality = " + bitmap.orCardinality(new EWAHCompressedBitmap32())); } Assert.assertEquals(N + 2, bitmap.orCardinality(new EWAHCompressedBitmap32())); } } /** * Test sets and gets. */ @Test public void testSetGet() { System.out.println("testing EWAH set/get"); EWAHCompressedBitmap32 ewcb = new EWAHCompressedBitmap32(); int[] val = { 5, 4400, 44600, 55400, 1000000 }; for (int k = 0; k < val.length; ++k) { ewcb.set(val[k]); } List result = ewcb.getPositions(); Assert.assertTrue(val.length == result.size()); for (int k = 0; k < val.length; ++k) { Assert.assertEquals(result.get(k).intValue(), val[k]); } } @Test public void testHashCode() { System.out.println("testing hashCode"); EWAHCompressedBitmap32 ewcb = EWAHCompressedBitmap32.bitmapOf(50, 70) .and(EWAHCompressedBitmap32.bitmapOf(50, 1000)); Assert.assertEquals(EWAHCompressedBitmap32.bitmapOf(50), ewcb); Assert.assertEquals(EWAHCompressedBitmap32.bitmapOf(50).hashCode(), ewcb.hashCode()); } @Test public void testSetSizeInBits() { System.out.println("testing SetSizeInBits"); testSetSizeInBits(130, 131); testSetSizeInBits(63, 64); testSetSizeInBits(64, 65); testSetSizeInBits(64, 128); testSetSizeInBits(35, 131); testSetSizeInBits(130, 400); testSetSizeInBits(130, 191); testSetSizeInBits(130, 192); EWAHCompressedBitmap32 bitmap = new EWAHCompressedBitmap32(); bitmap.set(31); bitmap.setSizeInBits(130, false); bitmap.set(131); BitSet jdkBitmap = new BitSet(); jdkBitmap.set(31); jdkBitmap.set(131); assertEquals(jdkBitmap, bitmap); } /** * Test with parameters. * * @throws IOException * Signals that an I/O exception has occurred. */ @Test public void testWithParameters() throws IOException { System.out .println("These tests can run for several minutes. Please be patient."); for (int k = 2; k < 1 << 24; k *= 8) shouldSetBits(k); PolizziTest(64); PolizziTest(128); PolizziTest(256); PolizziTest(2048); System.out.println("Your code is probably ok."); } /** * Pseudo-non-deterministic test inspired by S.J.vanSchaik. (Yes, * non-deterministic tests are bad, but the test is actually deterministic.) */ @Test public void vanSchaikTest() { System.out.println("testing vanSchaikTest (this takes some time)"); final int totalNumBits = 32768; final double odds = 0.9; Random rand = new Random(323232323); for (int t = 0; t < 100; t++) { int numBitsSet = 0; EWAHCompressedBitmap32 cBitMap = new EWAHCompressedBitmap32(); for (int i = 0; i < totalNumBits; i++) { if (rand.nextDouble() < odds) { cBitMap.set(i); numBitsSet++; } } Assert.assertEquals(cBitMap.cardinality(), numBitsSet); } } /** * Function used in a test inspired by Federico Fissore. * * @param size * the number of set bits * @param seed * the random seed * @return the pseudo-random array int[] */ public static int[] createSortedIntArrayOfBitsToSet(int size, int seed) { Random random = new Random(seed); // build raw int array int[] bits = new int[size]; for (int i = 0; i < bits.length; i++) { bits[i] = random.nextInt(TEST_BS_SIZE); } // might generate duplicates Arrays.sort(bits); // first count how many distinct values int counter = 0; int oldx = -1; for (int x : bits) { if (x != oldx) ++counter; oldx = x; } // then construct new array int[] answer = new int[counter]; counter = 0; oldx = -1; for (int x : bits) { if (x != oldx) { answer[counter] = x; ++counter; } oldx = x; } return answer; } /** * Test inspired by Bilal Tayara */ @Test public void TayaraTest() { System.out.println("Tayara test"); for (int offset = 64; offset < (1 << 30); offset *= 2) { EWAHCompressedBitmap32 a = new EWAHCompressedBitmap32(); EWAHCompressedBitmap32 b = new EWAHCompressedBitmap32(); for (int k = 0; k < 64; ++k) { a.set(offset + k); b.set(offset + k); } if (!a.and(b).equals(a)) throw new RuntimeException("bug"); if (!a.or(b).equals(a)) throw new RuntimeException("bug"); } } @Test public void TestCloneEwahCompressedBitArray() throws CloneNotSupportedException { System.out.println("testing EWAH clone"); EWAHCompressedBitmap32 a = new EWAHCompressedBitmap32(); a.set(410018); a.set(410019); a.set(410020); a.set(410021); a.set(410022); a.set(410023); EWAHCompressedBitmap32 b; b = a.clone(); a.setSizeInBits(487123, false); b.setSizeInBits(487123, false); Assert.assertTrue(a.equals(b)); } /** * a non-deterministic test proposed by Marc Polizzi. * * @param maxlength * the maximum uncompressed size of the bitmap */ public static void PolizziTest(int maxlength) { System.out.println("Polizzi test with max length = " + maxlength); for (int k = 0; k < 10000; ++k) { final Random rnd = new Random(); final EWAHCompressedBitmap32 ewahBitmap1 = new EWAHCompressedBitmap32(); final BitSet jdkBitmap1 = new BitSet(); final EWAHCompressedBitmap32 ewahBitmap2 = new EWAHCompressedBitmap32(); final BitSet jdkBitmap2 = new BitSet(); final EWAHCompressedBitmap32 ewahBitmap3 = new EWAHCompressedBitmap32(); final BitSet jdkBitmap3 = new BitSet(); final int len = rnd.nextInt(maxlength); for (int pos = 0; pos < len; pos++) { // random *** number of bits // set *** if (rnd.nextInt(7) == 0) { // random *** increasing *** values ewahBitmap1.set(pos); jdkBitmap1.set(pos); } if (rnd.nextInt(11) == 0) { // random *** increasing *** values ewahBitmap2.set(pos); jdkBitmap2.set(pos); } if (rnd.nextInt(7) == 0) { // random *** increasing *** values ewahBitmap3.set(pos); jdkBitmap3.set(pos); } } assertEquals(jdkBitmap1, ewahBitmap1); assertEquals(jdkBitmap2, ewahBitmap2); assertEquals(jdkBitmap3, ewahBitmap3); // XOR { final EWAHCompressedBitmap32 xorEwahBitmap = ewahBitmap1 .xor(ewahBitmap2); final BitSet xorJdkBitmap = (BitSet) jdkBitmap1.clone(); xorJdkBitmap.xor(jdkBitmap2); assertEquals(xorJdkBitmap, xorEwahBitmap); } // AND { final EWAHCompressedBitmap32 andEwahBitmap = ewahBitmap1 .and(ewahBitmap2); final BitSet andJdkBitmap = (BitSet) jdkBitmap1.clone(); andJdkBitmap.and(jdkBitmap2); assertEquals(andJdkBitmap, andEwahBitmap); } // AND { final EWAHCompressedBitmap32 andEwahBitmap = ewahBitmap2 .and(ewahBitmap1); final BitSet andJdkBitmap = (BitSet) jdkBitmap1.clone(); andJdkBitmap.and(jdkBitmap2); assertEquals(andJdkBitmap, andEwahBitmap); assertEquals(andJdkBitmap, EWAHCompressedBitmap32.and(ewahBitmap1, ewahBitmap2)); } // MULTI AND { final BitSet andJdkBitmap = (BitSet) jdkBitmap1.clone(); andJdkBitmap.and(jdkBitmap2); andJdkBitmap.and(jdkBitmap3); assertEquals(andJdkBitmap, EWAHCompressedBitmap32.and( ewahBitmap1, ewahBitmap2, ewahBitmap3)); assertEquals(andJdkBitmap, EWAHCompressedBitmap32.and( ewahBitmap3, ewahBitmap2, ewahBitmap1)); Assert.assertEquals(andJdkBitmap.cardinality(), EWAHCompressedBitmap32.andCardinality(ewahBitmap1, ewahBitmap2, ewahBitmap3)); } // AND NOT { final EWAHCompressedBitmap32 andNotEwahBitmap = ewahBitmap1 .andNot(ewahBitmap2); final BitSet andNotJdkBitmap = (BitSet) jdkBitmap1.clone(); andNotJdkBitmap.andNot(jdkBitmap2); assertEquals(andNotJdkBitmap, andNotEwahBitmap); } // AND NOT { final EWAHCompressedBitmap32 andNotEwahBitmap = ewahBitmap2 .andNot(ewahBitmap1); final BitSet andNotJdkBitmap = (BitSet) jdkBitmap2.clone(); andNotJdkBitmap.andNot(jdkBitmap1); assertEquals(andNotJdkBitmap, andNotEwahBitmap); } // OR { final EWAHCompressedBitmap32 orEwahBitmap = ewahBitmap1 .or(ewahBitmap2); final BitSet orJdkBitmap = (BitSet) jdkBitmap1.clone(); orJdkBitmap.or(jdkBitmap2); assertEquals(orJdkBitmap, orEwahBitmap); assertEquals(orJdkBitmap, EWAHCompressedBitmap32.or(ewahBitmap1, ewahBitmap2)); Assert.assertEquals(orEwahBitmap.cardinality(), ewahBitmap1.orCardinality(ewahBitmap2)); } // OR { final EWAHCompressedBitmap32 orEwahBitmap = ewahBitmap2 .or(ewahBitmap1); final BitSet orJdkBitmap = (BitSet) jdkBitmap1.clone(); orJdkBitmap.or(jdkBitmap2); assertEquals(orJdkBitmap, orEwahBitmap); } // MULTI OR { final BitSet orJdkBitmap = (BitSet) jdkBitmap1.clone(); orJdkBitmap.or(jdkBitmap2); orJdkBitmap.or(jdkBitmap3); assertEquals(orJdkBitmap, EWAHCompressedBitmap32.or( ewahBitmap1, ewahBitmap2, ewahBitmap3)); assertEquals(orJdkBitmap, EWAHCompressedBitmap32.or( ewahBitmap3, ewahBitmap2, ewahBitmap1)); Assert.assertEquals(orJdkBitmap.cardinality(), EWAHCompressedBitmap32.orCardinality(ewahBitmap1, ewahBitmap2, ewahBitmap3)); } } } /** * Pseudo-non-deterministic test inspired by Federico Fissore. * * @param length * the number of set bits in a bitmap */ public static void shouldSetBits(int length) { System.out.println("testing shouldSetBits " + length); int[] bitsToSet = createSortedIntArrayOfBitsToSet(length, 434222); EWAHCompressedBitmap32 ewah = new EWAHCompressedBitmap32(); System.out.println(" ... setting " + bitsToSet.length + " values"); for (int i : bitsToSet) { ewah.set(i); } System.out.println(" ... verifying " + bitsToSet.length + " values"); equal(ewah.iterator(), bitsToSet); System.out.println(" ... checking cardinality"); Assert.assertEquals(bitsToSet.length, ewah.cardinality()); } @Test public void testSizeInBits1() { EWAHCompressedBitmap32 bitmap = new EWAHCompressedBitmap32(); bitmap.setSizeInBits(1, false); bitmap.not(); Assert.assertEquals(1, bitmap.cardinality()); } @Test public void testHasNextSafe() { EWAHCompressedBitmap32 bitmap = new EWAHCompressedBitmap32(); bitmap.set(0); IntIterator it = bitmap.intIterator(); Assert.assertTrue(it.hasNext()); Assert.assertEquals(0, it.next()); } @Test public void testHasNextSafe2() { EWAHCompressedBitmap32 bitmap = new EWAHCompressedBitmap32(); bitmap.set(0); IntIterator it = bitmap.intIterator(); Assert.assertEquals(0, it.next()); } @Test public void testInfiniteLoop() { System.out.println("Testing for an infinite loop"); EWAHCompressedBitmap32 b1 = new EWAHCompressedBitmap32(); EWAHCompressedBitmap32 b2 = new EWAHCompressedBitmap32(); EWAHCompressedBitmap32 b3 = new EWAHCompressedBitmap32(); b3.setSizeInBits(5,false); b1.set(2); b2.set(4); EWAHCompressedBitmap32.and(b1, b2, b3); EWAHCompressedBitmap32.or(b1, b2, b3); } @Test public void testSizeInBits2() { EWAHCompressedBitmap32 bitmap = new EWAHCompressedBitmap32(); bitmap.setSizeInBits(1, true); bitmap.not(); Assert.assertEquals(0, bitmap.cardinality()); } private static void assertAndEquals(EWAHCompressedBitmap32... bitmaps) { EWAHCompressedBitmap32 expected = bitmaps[0]; for (int i = 1; i < bitmaps.length; i++) { expected = expected.and(bitmaps[i]); } Assert.assertTrue(expected.equals(EWAHCompressedBitmap32.and(bitmaps))); } private static void assertEquals(EWAHCompressedBitmap32 expected, EWAHCompressedBitmap32 actual) { Assert.assertEquals(expected.sizeInBits(), actual.sizeInBits()); assertEqualsPositions(expected, actual); } private static void assertOrEquals(EWAHCompressedBitmap32... bitmaps) { EWAHCompressedBitmap32 expected = bitmaps[0]; for (int i = 1; i < bitmaps.length; i++) { expected = expected.or(bitmaps[i]); } assertEquals(expected, EWAHCompressedBitmap32.or(bitmaps)); } /** * Extracted. * * @param bits * the bits * @return the integer */ private static Integer extracted(final Iterator bits) { return bits.next(); } private static void testSetSizeInBits(int size, int nextBit) { EWAHCompressedBitmap32 bitmap = new EWAHCompressedBitmap32(); bitmap.setSizeInBits(size, false); bitmap.set(nextBit); BitSet jdkBitmap = new BitSet(); jdkBitmap.set(nextBit); assertEquals(jdkBitmap, bitmap); } /** * Assess equality between an uncompressed bitmap and a compressed one, part * of a test contributed by Marc Polizzi * * @param jdkBitmap * the uncompressed bitmap * @param ewahBitmap * the compressed bitmap */ static void assertCardinality(BitSet jdkBitmap, EWAHCompressedBitmap32 ewahBitmap) { final int c1 = jdkBitmap.cardinality(); final int c2 = ewahBitmap.cardinality(); Assert.assertEquals(c1, c2); } /** * Assess equality between an uncompressed bitmap and a compressed one, part * of a test contributed by Marc Polizzi. * * @param jdkBitmap * the uncompressed bitmap * @param ewahBitmap * the compressed bitmap */ static void assertEquals(BitSet jdkBitmap, EWAHCompressedBitmap32 ewahBitmap) { assertEqualsIterator(jdkBitmap, ewahBitmap); assertEqualsPositions(jdkBitmap, ewahBitmap); assertCardinality(jdkBitmap, ewahBitmap); } static void assertEquals(int[] v, List p) { assertEquals(p, v); } static void assertEquals(List p, int[] v) { if (v.length != p.size()) throw new RuntimeException("Different lengths " + v.length + " " + p.size()); for (int k = 0; k < v.length; ++k) if (v[k] != p.get(k).intValue()) throw new RuntimeException("expected equal at " + k + " " + v[k] + " " + p.get(k)); } // /** * Assess equality between an uncompressed bitmap and a compressed one, part * of a test contributed by Marc Polizzi * * @param jdkBitmap * the jdk bitmap * @param ewahBitmap * the ewah bitmap */ static void assertEqualsIterator(BitSet jdkBitmap, EWAHCompressedBitmap32 ewahBitmap) { final Vector positions = new Vector(); final Iterator bits = ewahBitmap.iterator(); while (bits.hasNext()) { final int bit = extracted(bits).intValue(); Assert.assertTrue(jdkBitmap.get(bit)); positions.add(new Integer(bit)); } for (int pos = jdkBitmap.nextSetBit(0); pos >= 0; pos = jdkBitmap .nextSetBit(pos + 1)) { if (!positions.contains(new Integer(pos))) { throw new RuntimeException( "iterator: bitset got different bits"); } } } // part of a test contributed by Marc Polizzi /** * Assert equals positions. * * @param jdkBitmap * the jdk bitmap * @param ewahBitmap * the ewah bitmap */ static void assertEqualsPositions(BitSet jdkBitmap, EWAHCompressedBitmap32 ewahBitmap) { final List positions = ewahBitmap.getPositions(); for (int position : positions) { if (!jdkBitmap.get(position)) { throw new RuntimeException( "positions: bitset got different bits"); } } for (int pos = jdkBitmap.nextSetBit(0); pos >= 0; pos = jdkBitmap .nextSetBit(pos + 1)) { if (!positions.contains(new Integer(pos))) { throw new RuntimeException( "positions: bitset got different bits"); } } // we check again final int[] fastpositions = ewahBitmap.toArray(); for (int position : fastpositions) { if (!jdkBitmap.get(position)) { throw new RuntimeException( "positions: bitset got different bits with toArray"); } } for (int pos = jdkBitmap.nextSetBit(0); pos >= 0; pos = jdkBitmap .nextSetBit(pos + 1)) { int index = Arrays.binarySearch(fastpositions, pos); if (index < 0) throw new RuntimeException( "positions: bitset got different bits with toArray"); if (fastpositions[index] != pos) throw new RuntimeException( "positions: bitset got different bits with toArray"); } } /** * Assert equals positions. * * @param ewahBitmap1 * the ewah bitmap1 * @param ewahBitmap2 * the ewah bitmap2 */ static void assertEqualsPositions(EWAHCompressedBitmap32 ewahBitmap1, EWAHCompressedBitmap32 ewahBitmap2) { final List positions1 = ewahBitmap1.getPositions(); final List positions2 = ewahBitmap2.getPositions(); if (!positions1.equals(positions2)) throw new RuntimeException( "positions: alternative got different bits (two bitmaps)"); // final int[] fastpositions1 = ewahBitmap1.toArray(); assertEquals(fastpositions1, positions1); final int[] fastpositions2 = ewahBitmap2.toArray(); assertEquals(fastpositions2, positions2); if (!Arrays.equals(fastpositions1, fastpositions2)) throw new RuntimeException( "positions: alternative got different bits with toArray but not with getPositions (two bitmaps)"); } /** * Convenience function to assess equality between a compressed bitset and * an uncompressed bitset * * @param x * the compressed bitset/bitmap * @param y * the uncompressed bitset/bitmap */ static void equal(EWAHCompressedBitmap32 x, BitSet y) { Assert.assertEquals(x.cardinality(), y.cardinality()); for (int i : x.getPositions()) Assert.assertTrue(y.get(i)); } /** * Convenience function to assess equality between an array and an iterator * over Integers * * @param i * the iterator * @param array * the array */ static void equal(Iterator i, int[] array) { int cursor = 0; while (i.hasNext()) { int x = extracted(i).intValue(); int y = array[cursor++]; Assert.assertEquals(x, y); } } /** The Constant MEGA: a large integer. */ private static final int MEGA = 8 * 1024 * 1024; /** The Constant TEST_BS_SIZE: used to represent the size of a large bitmap. */ private static final int TEST_BS_SIZE = 8 * MEGA; } IntIteratorOverIteratingRLWTest32.java000066400000000000000000000103721224043567000333030ustar00rootroot00000000000000javaewah-JavaEWAH-0.7.9/src/test/java/com/googlecode/javaewah32package com.googlecode.javaewah32; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; import org.junit.Test; /* * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser * Licensed under the Apache License, Version 2.0. */ /** * Tests for utility class. Sketchy for now. * */ @SuppressWarnings("javadoc") public class IntIteratorOverIteratingRLWTest32 { @Test // had problems with bitmaps beginning with two consecutive clean runs public void testConsecClean() { System.out .println("testing int iteration, 2 consec clean runs starting with zeros"); EWAHCompressedBitmap32 e = new EWAHCompressedBitmap32(); for (int i = 64; i < 128; ++i) e.set(i); IntIteratorOverIteratingRLW32 ii = new IntIteratorOverIteratingRLW32( e.getIteratingRLW()); assertTrue(ii.hasNext()); int ctr = 0; while (ii.hasNext()) { ++ctr; ii.next(); } assertEquals(64, ctr); } @Test public void testConsecCleanStartOnes() { System.out .println("testing int iteration, 2 consec clean runs starting with ones"); EWAHCompressedBitmap32 e = new EWAHCompressedBitmap32(); for (int i = 0; i < 2 * 64; ++i) e.set(i); for (int i = 4 * 64; i < 5 * 64; ++i) e.set(i); IntIteratorOverIteratingRLW32 ii = new IntIteratorOverIteratingRLW32( e.getIteratingRLW()); assertTrue(ii.hasNext()); int ctr = 0; while (ii.hasNext()) { ++ctr; ii.next(); } assertEquals(3 * 64, ctr); } @Test public void testStartDirty() { System.out.println("testing int iteration, no initial runs"); EWAHCompressedBitmap32 e = new EWAHCompressedBitmap32(); for (int i = 1; i < 2 * 64; ++i) e.set(i); for (int i = 4 * 64; i < 5 * 64; ++i) e.set(i); IntIteratorOverIteratingRLW32 ii = new IntIteratorOverIteratingRLW32( e.getIteratingRLW()); assertTrue(ii.hasNext()); int ctr = 0; while (ii.hasNext()) { ++ctr; ii.next(); } assertEquals(3 * 64 - 1, ctr); } @Test public void testEmpty() { System.out.println("testing int iteration over empty bitmap"); EWAHCompressedBitmap32 e = new EWAHCompressedBitmap32(); IntIteratorOverIteratingRLW32 ii = new IntIteratorOverIteratingRLW32( e.getIteratingRLW()); assertFalse(ii.hasNext()); } @Test public void testRandomish() { EWAHCompressedBitmap32 e = new EWAHCompressedBitmap32(); int upperlimit = 100000; for (int i = 0; i < upperlimit; ++i) { double probabilityOfOne = i / (double) (upperlimit / 2); if (probabilityOfOne > 1.0) probabilityOfOne = 1.0; if (Math.random() < probabilityOfOne) { e.set(i); } } IntIteratorOverIteratingRLW32 ii = new IntIteratorOverIteratingRLW32( e.getIteratingRLW()); int ctr = 0; while (ii.hasNext()) { ++ctr; ii.next(); } assertEquals(e.cardinality(), ctr); System.out .println("checking int iteration over a var density bitset of size " + e.cardinality()); } } javaewah-JavaEWAH-0.7.9/src/test/java/com/googlecode/javaewah32/IteratorAggregationTest32.java000066400000000000000000000075041224043567000317720ustar00rootroot00000000000000package com.googlecode.javaewah32; import static org.junit.Assert.*; import java.util.Iterator; import org.junit.Test; import com.googlecode.javaewah.benchmark.ClusteredDataGenerator; /* * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser * Licensed under the Apache License, Version 2.0. */ /** * Tests specifically for iterators. * */ public class IteratorAggregationTest32 { /** * @param N number of bitmaps to generate in each set * @param nbr parameter determining the size of the arrays (in a log scale) * @return an iterator over sets of bitmaps */ public static Iterator getCollections(final int N, final int nbr) { final ClusteredDataGenerator cdg = new ClusteredDataGenerator(123); return new Iterator() { int sparsity = 1; @Override public boolean hasNext() { return this.sparsity < 5; } @Override public EWAHCompressedBitmap32[] next() { int[][] data = new int[N][]; int Max = (1 << (nbr + this.sparsity)); for (int k = 0; k < N; ++k) data[k] = cdg.generateClustered(1 << nbr, Max); EWAHCompressedBitmap32[] ewah = new EWAHCompressedBitmap32[N]; for (int k = 0; k < N; ++k) { ewah[k] = new EWAHCompressedBitmap32(); for (int x = 0; x < data[k].length; ++x) { ewah[k].set(data[k][x]); } data[k] = null; } this.sparsity += 3; return ewah; } @Override public void remove() { // unimplemented } }; } /** * */ @Test public void testAnd() { for (int N = 1; N < 10; ++N) { System.out.println("testAnd N = " + N); Iterator i = getCollections(N,3); while (i.hasNext()) { EWAHCompressedBitmap32[] x = i.next(); EWAHCompressedBitmap32 tanswer = EWAHCompressedBitmap32.and(x); EWAHCompressedBitmap32 x1 = IteratorUtil32 .materialize(IteratorAggregation32.bufferedand(IteratorUtil32 .toIterators(x))); assertTrue(x1.equals(tanswer)); } System.gc(); } } /** * */ @Test public void testOr() { for (int N = 1; N < 10; ++N) { System.out.println("testOr N = " + N); Iterator i = getCollections(N,3); while (i.hasNext()) { EWAHCompressedBitmap32[] x = i.next(); EWAHCompressedBitmap32 tanswer = EWAHCompressedBitmap32.or(x); EWAHCompressedBitmap32 x1 = IteratorUtil32 .materialize(IteratorAggregation32.bufferedor(IteratorUtil32 .toIterators(x))); assertTrue(x1.equals(tanswer)); } System.gc(); } } /** * */ @SuppressWarnings("deprecation") @Test public void testWideOr() { for (int nbr = 3; nbr <= 24; nbr += 3) { for (int N = 100; N < 1000; N += 100) { System.out.println("testWideOr N = " + N); Iterator i = getCollections(N, 3); while (i.hasNext()) { EWAHCompressedBitmap32[] x = i.next(); EWAHCompressedBitmap32 tanswer = EWAHCompressedBitmap32.or(x); EWAHCompressedBitmap32 container = new EWAHCompressedBitmap32(); FastAggregation32.legacy_orWithContainer(container, x); assertTrue(container.equals(tanswer)); EWAHCompressedBitmap32 x1 = IteratorUtil32 .materialize(IteratorAggregation32 .bufferedor(IteratorUtil32.toIterators(x))); assertTrue(x1.equals(tanswer)); } System.gc(); } } } /** * */ @Test public void testXor() { System.out.println("testXor "); Iterator i = getCollections(2,3); while (i.hasNext()) { EWAHCompressedBitmap32[] x = i.next(); EWAHCompressedBitmap32 tanswer = x[0].xor(x[1]); EWAHCompressedBitmap32 x1 = IteratorUtil32 .materialize(IteratorAggregation32.bufferedxor( x[0].getIteratingRLW(), x[1].getIteratingRLW())); assertTrue(x1.equals(tanswer)); } System.gc(); } }