clone() throws CloneNotSupportedException;
} javaewah-JavaEWAH-0.7.9/src/main/java/com/googlecode/javaewah/EWAHCompressedBitmap.java 0000664 0000000 0000000 00000152730 12240435670 0030534 0 ustar 00root root 0000000 0000000 package com.googlecode.javaewah;
/*
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser
* Licensed under the Apache License, Version 2.0.
*/
import java.util.*;
import java.io.*;
/**
*
* This implements the patent-free(1) EWAH scheme. Roughly speaking, it is a
* 64-bit variant of the BBC compression scheme used by Oracle for its bitmap
* indexes.
*
*
*
* The objective of this compression type is to provide some compression, while
* reducing as much as possible the CPU cycle usage.
*
*
*
*
* This implementation being 64-bit, it assumes a 64-bit CPU together with a
* 64-bit Java Virtual Machine. This same code on a 32-bit machine may not be as
* fast.
*
*
*
* There is also a 32-bit version of this code in the class
* javaewah32.EWAHCompressedBitmap32
*
*
* @see com.googlecode.javaewah32.EWAHCompressedBitmap32
*
*
* For more details, see the following paper:
*
*
*
* - Daniel Lemire, Owen Kaser, Kamel Aouiche, Sorting improves
* word-aligned bitmap indexes. Data & Knowledge Engineering 69 (1), pages
* 3-28, 2010. http://arxiv.org/abs/0901.3751
*
*
*
* A 32-bit version of the compressed format was described by Wu et al. and
* named WBC:
*
*
*
* - K. Wu, E. J. Otoo, A. Shoshani, H. Nordberg, Notes on design and
* implementation of compressed bit vectors, Tech. Rep. LBNL/PUB-3161,
* Lawrence Berkeley National Laboratory, available from http://crd.lbl.
* gov/~kewu/ps/PUB-3161.html (2001).
*
*
*
* Probably, the best prior art is the Oracle bitmap compression scheme
* (BBC):
*
*
* - G. Antoshenkov, Byte-Aligned Bitmap Compression, DCC'95, 1995.
*
*
*
* 1- The authors do not know of any patent infringed by the following
* implementation. However, similar schemes, like WAH are covered by
* patents.
*
*
* @since 0.1.0
*/
public final class EWAHCompressedBitmap implements Cloneable, Externalizable,
Iterable, BitmapStorage, LogicalElement {
/**
* Creates an empty bitmap (no bit set to true).
*/
public EWAHCompressedBitmap() {
this.buffer = new long[defaultbuffersize];
this.rlw = new RunningLengthWord(this, 0);
}
/**
* Sets explicitly the buffer size (in 64-bit words). The initial memory usage
* will be "buffersize * 64". For large poorly compressible bitmaps, using
* large values may improve performance.
*
* @param buffersize
* number of 64-bit words reserved when the object is created)
*/
public EWAHCompressedBitmap(final int buffersize) {
this.buffer = new long[buffersize];
this.rlw = new RunningLengthWord(this, 0);
}
/**
* Adding words directly to the bitmap (for expert use).
*
* This is normally how you add data to the array. So you add bits in streams
* of 8*8 bits.
*
* Example: if you add 321, you are have added (in binary notation)
* 0b101000001, so you have effectively called set(0), set(6), set(8)
* in sequence.
*
* @param newdata
* the word
*/
@Override
public void add(final long newdata) {
add(newdata, wordinbits);
}
/**
* Adding words directly to the bitmap (for expert use).
*
* @param newdata
* the word
* @param bitsthatmatter
* the number of significant bits (by default it should be 64)
*/
public void add(final long newdata, final int bitsthatmatter) {
this.sizeinbits += bitsthatmatter;
if (newdata == 0) {
addEmptyWord(false);
} else if (newdata == ~0l) {
addEmptyWord(true);
} else {
addLiteralWord(newdata);
}
}
/**
* For internal use.
*
* @param v
* the boolean value
*/
private void addEmptyWord(final boolean v) {
final boolean noliteralword = (this.rlw.getNumberOfLiteralWords() == 0);
final long runlen = this.rlw.getRunningLength();
if ((noliteralword) && (runlen == 0)) {
this.rlw.setRunningBit(v);
}
if ((noliteralword) && (this.rlw.getRunningBit() == v)
&& (runlen < RunningLengthWord.largestrunninglengthcount)) {
this.rlw.setRunningLength(runlen + 1);
return;
}
push_back(0);
this.rlw.position = this.actualsizeinwords - 1;
this.rlw.setRunningBit(v);
this.rlw.setRunningLength(1);
return;
}
/**
* For internal use.
*
* @param newdata
* the literal word
*/
private void addLiteralWord(final long newdata) {
final int numbersofar = this.rlw.getNumberOfLiteralWords();
if (numbersofar >= RunningLengthWord.largestliteralcount) {
push_back(0);
this.rlw.position = this.actualsizeinwords - 1;
this.rlw.setNumberOfLiteralWords(1);
push_back(newdata);
}
this.rlw.setNumberOfLiteralWords(numbersofar + 1);
push_back(newdata);
}
/**
* if you have several literal words to copy over, this might be faster.
*
*
* @param data
* the literal words
* @param start
* the starting point in the array
* @param number
* the number of literal words to add
*/
@Override
public void addStreamOfLiteralWords(final long[] data, final int start,
final int number) {
int leftovernumber = number;
while(leftovernumber > 0) {
final int NumberOfLiteralWords = this.rlw.getNumberOfLiteralWords();
final int whatwecanadd = leftovernumber < RunningLengthWord.largestliteralcount
- NumberOfLiteralWords ? leftovernumber : RunningLengthWord.largestliteralcount
- NumberOfLiteralWords;
this.rlw.setNumberOfLiteralWords(NumberOfLiteralWords + whatwecanadd);
leftovernumber -= whatwecanadd;
push_back(data, start, whatwecanadd);
this.sizeinbits += whatwecanadd * wordinbits;
if (leftovernumber > 0) {
push_back(0);
this.rlw.position = this.actualsizeinwords - 1;
}
}
}
/**
* For experts: You want to add many zeroes or ones? This is the method you
* use.
*
* @param v
* the boolean value
* @param number
* the number
*/
@Override
public void addStreamOfEmptyWords(final boolean v, long number) {
if (number == 0)
return;
this.sizeinbits += number * wordinbits;
if ((this.rlw.getRunningBit() != v) && (this.rlw.size() == 0)) {
this.rlw.setRunningBit(v);
} else if ((this.rlw.getNumberOfLiteralWords() != 0)
|| (this.rlw.getRunningBit() != v)) {
push_back(0);
this.rlw.position = this.actualsizeinwords - 1;
if (v)
this.rlw.setRunningBit(v);
}
final long runlen = this.rlw.getRunningLength();
final long whatwecanadd = number < RunningLengthWord.largestrunninglengthcount
- runlen ? number : RunningLengthWord.largestrunninglengthcount - runlen;
this.rlw.setRunningLength(runlen + whatwecanadd);
number -= whatwecanadd;
while (number >= RunningLengthWord.largestrunninglengthcount) {
push_back(0);
this.rlw.position = this.actualsizeinwords - 1;
if (v)
this.rlw.setRunningBit(v);
this.rlw.setRunningLength(RunningLengthWord.largestrunninglengthcount);
number -= RunningLengthWord.largestrunninglengthcount;
}
if (number > 0) {
push_back(0);
this.rlw.position = this.actualsizeinwords - 1;
if (v)
this.rlw.setRunningBit(v);
this.rlw.setRunningLength(number);
}
}
/**
* Same as addStreamOfLiteralWords, but the words are negated.
*
* @param data
* the literal words
* @param start
* the starting point in the array
* @param number
* the number of literal words to add
*/
@Override
public void addStreamOfNegatedLiteralWords(final long[] data,
final int start, final int number) {
int leftovernumber = number;
while (leftovernumber > 0) {
final int NumberOfLiteralWords = this.rlw.getNumberOfLiteralWords();
final int whatwecanadd = leftovernumber < RunningLengthWord.largestliteralcount
- NumberOfLiteralWords ? leftovernumber
: RunningLengthWord.largestliteralcount
- NumberOfLiteralWords;
this.rlw.setNumberOfLiteralWords(NumberOfLiteralWords
+ whatwecanadd);
leftovernumber -= whatwecanadd;
negative_push_back(data, start, whatwecanadd);
this.sizeinbits += whatwecanadd * wordinbits;
if (leftovernumber > 0) {
push_back(0);
this.rlw.position = this.actualsizeinwords - 1;
}
}
}
/**
* Returns a new compressed bitmap containing the bitwise AND values of the
* current bitmap with some other bitmap.
*
* The running time is proportional to the sum of the compressed sizes (as
* reported by sizeInBytes()).
*
* If you are not planning on adding to the resulting bitmap, you may call the trim()
* method to reduce memory usage.
*
* @since 0.4.3
* @param a
* the other bitmap
* @return the EWAH compressed bitmap
*/
@Override
public EWAHCompressedBitmap and(final EWAHCompressedBitmap a) {
final EWAHCompressedBitmap container = new EWAHCompressedBitmap();
container
.reserve(this.actualsizeinwords > a.actualsizeinwords ? this.actualsizeinwords
: a.actualsizeinwords);
andToContainer(a, container);
return container;
}
/**
* Computes new compressed bitmap containing the bitwise AND values of the
* current bitmap with some other bitmap.
*
* The running time is proportional to the sum of the compressed sizes (as
* reported by sizeInBytes()).
*
* @since 0.4.0
* @param a
* the other bitmap
* @param container
* where we store the result
*/
public void andToContainer(final EWAHCompressedBitmap a, final BitmapStorage container) {
final EWAHIterator i = a.getEWAHIterator();
final EWAHIterator j = getEWAHIterator();
final IteratingBufferedRunningLengthWord rlwi = new IteratingBufferedRunningLengthWord(i);
final IteratingBufferedRunningLengthWord rlwj = new IteratingBufferedRunningLengthWord(j);
while ((rlwi.size()>0) && (rlwj.size()>0)) {
while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) {
final boolean i_is_prey = rlwi.getRunningLength() < rlwj
.getRunningLength();
final IteratingBufferedRunningLengthWord prey = i_is_prey ? rlwi : rlwj;
final IteratingBufferedRunningLengthWord predator = i_is_prey ? rlwj
: rlwi;
if (predator.getRunningBit() == false) {
container.addStreamOfEmptyWords(false, predator.getRunningLength());
prey.discardFirstWords(predator.getRunningLength());
predator.discardFirstWords(predator.getRunningLength());
} else {
final long index = prey.discharge(container, predator.getRunningLength());
container.addStreamOfEmptyWords(false, predator.getRunningLength()
- index);
predator.discardFirstWords(predator.getRunningLength());
}
}
final int nbre_literal = Math.min(rlwi.getNumberOfLiteralWords(),
rlwj.getNumberOfLiteralWords());
if (nbre_literal > 0) {
for (int k = 0; k < nbre_literal; ++k)
container.add(rlwi.getLiteralWordAt(k) & rlwj.getLiteralWordAt(k));
rlwi.discardFirstWords(nbre_literal);
rlwj.discardFirstWords(nbre_literal);
}
}
if(adjustContainerSizeWhenAggregating) {
final boolean i_remains = rlwi.size()>0;
final IteratingBufferedRunningLengthWord remaining = i_remains ? rlwi : rlwj;
remaining.dischargeAsEmpty(container);
container.setSizeInBits(Math.max(sizeInBits(), a.sizeInBits()));
}
}
/**
* Returns the cardinality of the result of a bitwise AND of the values of the
* current bitmap with some other bitmap. Avoids needing to allocate an
* intermediate bitmap to hold the result of the OR.
*
* @since 0.4.0
* @param a
* the other bitmap
* @return the cardinality
*/
public int andCardinality(final EWAHCompressedBitmap a) {
final BitCounter counter = new BitCounter();
andToContainer(a, counter);
return counter.getCount();
}
/**
* Returns a new compressed bitmap containing the bitwise AND NOT values of
* the current bitmap with some other bitmap.
*
* The running time is proportional to the sum of the compressed sizes (as
* reported by sizeInBytes()).
*
* If you are not planning on adding to the resulting bitmap, you may call the trim()
* method to reduce memory usage.
*
* @param a
* the other bitmap
* @return the EWAH compressed bitmap
*/
@Override
public EWAHCompressedBitmap andNot(final EWAHCompressedBitmap a) {
final EWAHCompressedBitmap container = new EWAHCompressedBitmap();
container
.reserve(this.actualsizeinwords > a.actualsizeinwords ? this.actualsizeinwords
: a.actualsizeinwords);
andNotToContainer(a, container);
return container;
}
/**
* Returns a new compressed bitmap containing the bitwise AND NOT values of
* the current bitmap with some other bitmap. This method is expected to
* be faster than doing A.and(B.clone().not()).
*
* The running time is proportional to the sum of the compressed sizes (as
* reported by sizeInBytes()).
*
* @since 0.4.0
* @param a the other bitmap
* @param container where to store the result
*/
public void andNotToContainer(final EWAHCompressedBitmap a,
final BitmapStorage container) {
final EWAHIterator i = getEWAHIterator();
final EWAHIterator j = a.getEWAHIterator();
final IteratingBufferedRunningLengthWord rlwi = new IteratingBufferedRunningLengthWord(i);
final IteratingBufferedRunningLengthWord rlwj = new IteratingBufferedRunningLengthWord(j);
while ((rlwi.size()>0) && (rlwj.size()>0)) {
while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) {
final boolean i_is_prey = rlwi.getRunningLength() < rlwj
.getRunningLength();
final IteratingBufferedRunningLengthWord prey = i_is_prey ? rlwi : rlwj;
final IteratingBufferedRunningLengthWord predator = i_is_prey ? rlwj
: rlwi;
if ( ((predator.getRunningBit() == true) && (i_is_prey))
|| ((predator.getRunningBit() == false) && (!i_is_prey))){
container.addStreamOfEmptyWords(false, predator.getRunningLength());
prey.discardFirstWords(predator.getRunningLength());
predator.discardFirstWords(predator.getRunningLength());
} else if (i_is_prey) {
long index = prey.discharge(container, predator.getRunningLength());
container.addStreamOfEmptyWords(false, predator.getRunningLength()
- index);
predator.discardFirstWords(predator.getRunningLength());
} else {
long index = prey.dischargeNegated(container, predator.getRunningLength());
container.addStreamOfEmptyWords(true, predator.getRunningLength()
- index);
predator.discardFirstWords(predator.getRunningLength());
}
}
final int nbre_literal = Math.min(rlwi.getNumberOfLiteralWords(),
rlwj.getNumberOfLiteralWords());
if (nbre_literal > 0) {
for (int k = 0; k < nbre_literal; ++k)
container.add(rlwi.getLiteralWordAt(k) & (~rlwj.getLiteralWordAt(k)));
rlwi.discardFirstWords(nbre_literal);
rlwj.discardFirstWords(nbre_literal);
}
}
final boolean i_remains = rlwi.size()>0;
final IteratingBufferedRunningLengthWord remaining = i_remains ? rlwi : rlwj;
if(i_remains)
remaining.discharge(container);
else if(adjustContainerSizeWhenAggregating)
remaining.dischargeAsEmpty(container);
if(adjustContainerSizeWhenAggregating)
container.setSizeInBits(Math.max(sizeInBits(), a.sizeInBits()));
}
/**
* Returns the cardinality of the result of a bitwise AND NOT of the values of
* the current bitmap with some other bitmap. Avoids needing to allocate an
* intermediate bitmap to hold the result of the OR.
*
* @since 0.4.0
* @param a
* the other bitmap
* @return the cardinality
*/
public int andNotCardinality(final EWAHCompressedBitmap a) {
final BitCounter counter = new BitCounter();
andNotToContainer(a, counter);
return counter.getCount();
}
/**
* reports the number of bits set to true. Running time is proportional to
* compressed size (as reported by sizeInBytes).
*
* @return the number of bits set to true
*/
public int cardinality() {
int counter = 0;
final EWAHIterator i = new EWAHIterator(this, this.actualsizeinwords);
while (i.hasNext()) {
RunningLengthWord localrlw = i.next();
if (localrlw.getRunningBit()) {
counter += wordinbits * localrlw.getRunningLength();
}
for (int j = 0; j < localrlw.getNumberOfLiteralWords(); ++j) {
counter += Long.bitCount(i.buffer()[i.literalWords() + j]);
}
}
return counter;
}
/**
* Clear any set bits and set size in bits back to 0
*/
public void clear() {
this.sizeinbits = 0;
this.actualsizeinwords = 1;
this.rlw.position = 0;
// buffer is not fully cleared but any new set operations should overwrite
// stale data
this.buffer[0] = 0;
}
/*
* @see java.lang.Object#clone()
*/
@Override
public EWAHCompressedBitmap clone() throws java.lang.CloneNotSupportedException {
final EWAHCompressedBitmap clone = (EWAHCompressedBitmap) super.clone();
clone.buffer = this.buffer.clone();
clone.rlw = new RunningLengthWord(clone, this.rlw.position);
clone.actualsizeinwords = this.actualsizeinwords;
clone.sizeinbits = this.sizeinbits;
return clone;
}
/**
* Deserialize.
*
* @param in
* the DataInput stream
* @throws IOException
* Signals that an I/O exception has occurred.
*/
public void deserialize(DataInput in) throws IOException {
this.sizeinbits = in.readInt();
this.actualsizeinwords = in.readInt();
if (this.buffer.length < this.actualsizeinwords) {
this.buffer = new long[this.actualsizeinwords];
}
for (int k = 0; k < this.actualsizeinwords; ++k)
this.buffer[k] = in.readLong();
this.rlw = new RunningLengthWord(this, in.readInt());
}
/**
* Check to see whether the two compressed bitmaps contain the same set bits.
*
* @see java.lang.Object#equals(java.lang.Object)
*/
@Override
public boolean equals(Object o) {
if (o instanceof EWAHCompressedBitmap) {
try {
this.xorToContainer((EWAHCompressedBitmap) o, new NonEmptyVirtualStorage());
return true;
} catch (NonEmptyVirtualStorage.NonEmptyException e) {
return false;
}
}
return false;
}
/**
* For experts: You want to add many zeroes or ones faster?
*
* This method does not update sizeinbits.
*
* @param v
* the boolean value
* @param number
* the number (must be greater than 0)
*/
private void fastaddStreamOfEmptyWords(final boolean v, long number) {
if ((this.rlw.getRunningBit() != v) && (this.rlw.size() == 0)) {
this.rlw.setRunningBit(v);
} else if ((this.rlw.getNumberOfLiteralWords() != 0)
|| (this.rlw.getRunningBit() != v)) {
push_back(0);
this.rlw.position = this.actualsizeinwords - 1;
if (v)
this.rlw.setRunningBit(v);
}
final long runlen = this.rlw.getRunningLength();
final long whatwecanadd = number < RunningLengthWord.largestrunninglengthcount
- runlen ? number : RunningLengthWord.largestrunninglengthcount - runlen;
this.rlw.setRunningLength(runlen + whatwecanadd);
number -= whatwecanadd;
while (number >= RunningLengthWord.largestrunninglengthcount) {
push_back(0);
this.rlw.position = this.actualsizeinwords - 1;
if (v)
this.rlw.setRunningBit(v);
this.rlw.setRunningLength(RunningLengthWord.largestrunninglengthcount);
number -= RunningLengthWord.largestrunninglengthcount;
}
if (number > 0) {
push_back(0);
this.rlw.position = this.actualsizeinwords - 1;
if (v)
this.rlw.setRunningBit(v);
this.rlw.setRunningLength(number);
}
}
/**
* Gets an EWAHIterator over the data. This is a customized iterator which
* iterates over run length word. For experts only.
*
* @return the EWAHIterator
*/
public EWAHIterator getEWAHIterator() {
return new EWAHIterator(this, this.actualsizeinwords);
}
/**
* @return the IteratingRLW iterator corresponding to this bitmap
*/
public IteratingRLW getIteratingRLW() {
return new IteratingBufferedRunningLengthWord(this);
}
/**
* get the locations of the true values as one vector. (may use more memory
* than iterator())
*
* @return the positions
*/
public List getPositions() {
final ArrayList v = new ArrayList();
final EWAHIterator i = new EWAHIterator(this, this.actualsizeinwords);
int pos = 0;
while (i.hasNext()) {
RunningLengthWord localrlw = i.next();
if (localrlw.getRunningBit()) {
for (int j = 0; j < localrlw.getRunningLength(); ++j) {
for (int c = 0; c < wordinbits; ++c)
v.add(new Integer(pos++));
}
} else {
pos += wordinbits * localrlw.getRunningLength();
}
for (int j = 0; j < localrlw.getNumberOfLiteralWords(); ++j) {
long data = i.buffer()[i.literalWords() + j];
while (data != 0) {
final int ntz = Long.numberOfTrailingZeros(data);
data ^= (1l << ntz);
v.add(new Integer(ntz + pos));
}
pos += wordinbits;
}
}
while ((v.size() > 0)
&& (v.get(v.size() - 1).intValue() >= this.sizeinbits))
v.remove(v.size() - 1);
return v;
}
/**
* Returns a customized hash code (based on Karp-Rabin). Naturally, if the
* bitmaps are equal, they will hash to the same value.
*
*/
@Override
public int hashCode() {
int karprabin = 0;
final int B = 31;
final EWAHIterator i = new EWAHIterator(this, this.actualsizeinwords);
while( i.hasNext() ) {
i.next();
if (i.rlw.getRunningBit() == true) {
karprabin += B * karprabin
+ (i.rlw.getRunningLength() & ((1l << 32) - 1));
karprabin += B * karprabin + (i.rlw.getRunningLength() >>> 32);
}
for (int k = 0; k < i.rlw.getNumberOfLiteralWords(); ++k) {
karprabin += B * karprabin + (this.buffer[i.literalWords() + k] & ((1l << 32) - 1));
karprabin += B * karprabin + (this.buffer[i.literalWords() + k] >>> 32);
}
}
return karprabin;
}
/**
* Return true if the two EWAHCompressedBitmap have both at least one true bit
* in the same position. Equivalently, you could call "and" and check whether
* there is a set bit, but intersects will run faster if you don't need the
* result of the "and" operation.
*
* @since 0.3.2
* @param a
* the other bitmap
* @return whether they intersect
*/
public boolean intersects(final EWAHCompressedBitmap a) {
NonEmptyVirtualStorage nevs = new NonEmptyVirtualStorage();
try {
this.andToContainer(a, nevs);
} catch (NonEmptyVirtualStorage.NonEmptyException nee) {
return true;
}
return false;
}
/**
* Iterator over the set bits (this is what most people will want to use to
* browse the content if they want an iterator). The location of the set bits
* is returned, in increasing order.
*
* @return the int iterator
*/
public IntIterator intIterator() {
return new IntIteratorImpl(
new EWAHIterator(this, this.actualsizeinwords));
}
/**
* iterate over the positions of the true values. This is similar to
* intIterator(), but it uses Java generics.
*
* @return the iterator
*/
@Override
public Iterator iterator() {
return new Iterator() {
@Override
public boolean hasNext() {
return this.under.hasNext();
}
@Override
public Integer next() {
return new Integer(this.under.next());
}
@Override
public void remove() {
throw new UnsupportedOperationException("bitsets do not support remove");
}
final private IntIterator under = intIterator();
};
}
/**
* For internal use.
*
* @param data
* the array of words to be added
* @param start
* the starting point
* @param number
* the number of words to add
*/
private void negative_push_back(final long[] data, final int start,
final int number) {
while (this.actualsizeinwords + number >= this.buffer.length) {
final long oldbuffer[] = this.buffer;
if((this.actualsizeinwords + number) < 32768)
this.buffer = new long[ (this.actualsizeinwords + number) * 2];
else if((this.actualsizeinwords + number) * 3 / 2 < this.actualsizeinwords + number) // overflow
this.buffer = new long[Integer.MAX_VALUE];
else
this.buffer = new long[(this.actualsizeinwords + number) * 3 / 2];
System.arraycopy(oldbuffer, 0, this.buffer, 0, oldbuffer.length);
this.rlw.parent.buffer = this.buffer;
}
for (int k = 0; k < number; ++k)
this.buffer[this.actualsizeinwords + k] = ~data[start + k];
this.actualsizeinwords += number;
}
/**
* Negate (bitwise) the current bitmap. To get a negated copy, do
* EWAHCompressedBitmap x= ((EWAHCompressedBitmap) mybitmap.clone()); x.not();
*
* The running time is proportional to the compressed size (as reported by
* sizeInBytes()).
*
*/
@Override
public void not() {
final EWAHIterator i = new EWAHIterator(this, this.actualsizeinwords);
if (!i.hasNext())
return;
while (true) {
final RunningLengthWord rlw1 = i.next();
rlw1.setRunningBit(!rlw1.getRunningBit());
for (int j = 0; j < rlw1.getNumberOfLiteralWords(); ++j) {
i.buffer()[i.literalWords() + j] = ~i.buffer()[i.literalWords() + j];
}
if (!i.hasNext()) {// must potentially adjust the last literal word
final int usedbitsinlast = this.sizeinbits % wordinbits;
if (usedbitsinlast == 0)
return;
if (rlw1.getNumberOfLiteralWords() == 0) {
if((rlw1.getRunningLength()>0) && (rlw1.getRunningBit())) {
rlw1.setRunningLength(rlw1.getRunningLength()-1);
this.addLiteralWord((~0l) >>> (wordinbits - usedbitsinlast));
}
return;
}
i.buffer()[i.literalWords() + rlw1.getNumberOfLiteralWords() - 1] &= ((~0l) >>> (wordinbits - usedbitsinlast));
return;
}
}
}
/**
* Returns a new compressed bitmap containing the bitwise OR values of the
* current bitmap with some other bitmap.
*
* The running time is proportional to the sum of the compressed sizes (as
* reported by sizeInBytes()).
*
* If you are not planning on adding to the resulting bitmap, you may call the trim()
* method to reduce memory usage.
*
* @param a
* the other bitmap
* @return the EWAH compressed bitmap
*/
@Override
public EWAHCompressedBitmap or(final EWAHCompressedBitmap a) {
final EWAHCompressedBitmap container = new EWAHCompressedBitmap();
container.reserve(this.actualsizeinwords + a.actualsizeinwords);
orToContainer(a, container);
return container;
}
/**
* Computes the bitwise or between the current bitmap and the bitmap "a".
* Stores the result in the container.
*
* @since 0.4.0
* @param a
* the other bitmap
* @param container
* where we store the result
*/
public void orToContainer(final EWAHCompressedBitmap a, final BitmapStorage container) {
final EWAHIterator i = a.getEWAHIterator();
final EWAHIterator j = getEWAHIterator();
final IteratingBufferedRunningLengthWord rlwi = new IteratingBufferedRunningLengthWord(i);
final IteratingBufferedRunningLengthWord rlwj = new IteratingBufferedRunningLengthWord(j);
while ((rlwi.size()>0) && (rlwj.size()>0)) {
while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) {
final boolean i_is_prey = rlwi.getRunningLength() < rlwj
.getRunningLength();
final IteratingBufferedRunningLengthWord prey = i_is_prey ? rlwi
: rlwj;
final IteratingBufferedRunningLengthWord predator = i_is_prey ? rlwj
: rlwi;
if (predator.getRunningBit() == true) {
container.addStreamOfEmptyWords(true, predator.getRunningLength());
prey.discardFirstWords(predator.getRunningLength());
predator.discardFirstWords(predator.getRunningLength());
} else {
long index = prey.discharge(container, predator.getRunningLength());
container.addStreamOfEmptyWords(false, predator.getRunningLength()
- index);
predator.discardFirstWords(predator.getRunningLength());
}
}
final int nbre_literal = Math.min(rlwi.getNumberOfLiteralWords(),
rlwj.getNumberOfLiteralWords());
if (nbre_literal > 0) {
for (int k = 0; k < nbre_literal; ++k) {
container.add(rlwi.getLiteralWordAt(k) | rlwj.getLiteralWordAt(k));
}
rlwi.discardFirstWords(nbre_literal);
rlwj.discardFirstWords(nbre_literal);
}
}
final boolean i_remains = rlwi.size()>0;
final IteratingBufferedRunningLengthWord remaining = i_remains ? rlwi : rlwj;
remaining.discharge(container);
container.setSizeInBits(Math.max(sizeInBits(), a.sizeInBits()));
}
/**
* Returns the cardinality of the result of a bitwise OR of the values of the
* current bitmap with some other bitmap. Avoids needing to allocate an
* intermediate bitmap to hold the result of the OR.
*
* @since 0.4.0
* @param a
* the other bitmap
* @return the cardinality
*/
public int orCardinality(final EWAHCompressedBitmap a) {
final BitCounter counter = new BitCounter();
orToContainer(a, counter);
return counter.getCount();
}
/**
* For internal use.
*
* @param data
* the word to be added
*/
private void push_back(final long data) {
if (this.actualsizeinwords == this.buffer.length) {
final long oldbuffer[] = this.buffer;
if(oldbuffer.length < 32768)
this.buffer = new long[ oldbuffer.length * 2];
else if(oldbuffer.length * 3 / 2 < oldbuffer.length) // overflow
this.buffer = new long[Integer.MAX_VALUE];
else
this.buffer = new long[oldbuffer.length * 3 / 2];
System.arraycopy(oldbuffer, 0, this.buffer, 0, oldbuffer.length);
this.rlw.parent.buffer = this.buffer;
}
this.buffer[this.actualsizeinwords++] = data;
}
/**
* For internal use.
*
* @param data
* the array of words to be added
* @param start
* the starting point
* @param number
* the number of words to add
*/
private void push_back(final long[] data, final int start, final int number) {
if (this.actualsizeinwords + number >= this.buffer.length) {
final long oldbuffer[] = this.buffer;
if(this.actualsizeinwords + number < 32768)
this.buffer = new long[(this.actualsizeinwords + number) * 2];
else if ((this.actualsizeinwords + number) * 3 / 2 < this.actualsizeinwords + number) // overflow
this.buffer = new long[Integer.MAX_VALUE];
else
this.buffer = new long[( this.actualsizeinwords + number) * 3 / 2];
System.arraycopy(oldbuffer, 0, this.buffer, 0, oldbuffer.length);
this.rlw.parent.buffer = this.buffer;
}
System.arraycopy(data, start, this.buffer, this.actualsizeinwords, number);
this.actualsizeinwords += number;
}
/*
* @see java.io.Externalizable#readExternal(java.io.ObjectInput)
*/
@Override
public void readExternal(ObjectInput in) throws IOException {
deserialize(in);
}
/**
* For internal use (trading off memory for speed).
*
* @param size
* the number of words to allocate
* @return True if the operation was a success.
*/
private boolean reserve(final int size) {
if (size > this.buffer.length) {
final long oldbuffer[] = this.buffer;
this.buffer = new long[size];
System.arraycopy(oldbuffer, 0, this.buffer, 0, oldbuffer.length);
this.rlw.parent.buffer = this.buffer;
return true;
}
return false;
}
/**
* Serialize.
*
* @param out
* the DataOutput stream
* @throws IOException
* Signals that an I/O exception has occurred.
*/
public void serialize(DataOutput out) throws IOException {
out.writeInt(this.sizeinbits);
out.writeInt(this.actualsizeinwords);
for (int k = 0; k < this.actualsizeinwords; ++k)
out.writeLong(this.buffer[k]);
out.writeInt(this.rlw.position);
}
/**
* Report the size required to serialize this bitmap
*
* @return the size in bytes
*/
public int serializedSizeInBytes() {
return this.sizeInBytes() + 3 * 4;
}
/**
* Query the value of a single bit. Relying on this method when speed is
* needed is discouraged. The complexity is linear with the size of the
* bitmap.
*
* (This implementation is based on zhenjl's Go version of JavaEWAH.)
*
* @param i
* the bit we are interested in
* @return whether the bit is set to true
*/
public boolean get(final int i) {
if ((i < 0) || (i >= this.sizeinbits))
return false;
int WordChecked = 0;
final IteratingRLW j = getIteratingRLW();
final int wordi = i/wordinbits;
while (WordChecked <= wordi ) {
WordChecked += j.getRunningLength();
if (wordi < WordChecked) {
return j.getRunningBit();
}
if (wordi < WordChecked + j.getNumberOfLiteralWords()) {
final long w = j.getLiteralWordAt(wordi - WordChecked);
return (w & (1l << i)) != 0;
}
WordChecked += j.getNumberOfLiteralWords();
j.next();
}
return false;
}
/**
* Set the bit at position i to true, the bits must be set in (strictly) increasing
* order. For example, set(15) and then set(7) will fail. You must do set(7)
* and then set(15).
*
* @param i
* the index
* @return true if the value was set (always true when i greater or equal to sizeInBits()).
* @throws IndexOutOfBoundsException
* if i is negative or greater than Integer.MAX_VALUE - 64
*/
public boolean set(final int i) {
if ((i > Integer.MAX_VALUE - wordinbits) || (i < 0))
throw new IndexOutOfBoundsException("Set values should be between 0 and "
+ (Integer.MAX_VALUE - wordinbits));
if (i < this.sizeinbits)
return false;
// distance in words:
final int dist = (i + wordinbits) / wordinbits
- (this.sizeinbits + wordinbits - 1) / wordinbits;
this.sizeinbits = i + 1;
if (dist > 0) {// easy
if (dist > 1)
fastaddStreamOfEmptyWords(false, dist - 1);
addLiteralWord(1l << (i % wordinbits));
return true;
}
if (this.rlw.getNumberOfLiteralWords() == 0) {
this.rlw.setRunningLength(this.rlw.getRunningLength() - 1);
addLiteralWord(1l << (i % wordinbits));
return true;
}
this.buffer[this.actualsizeinwords - 1] |= 1l << (i % wordinbits);
if (this.buffer[this.actualsizeinwords - 1] == ~0l) {
this.buffer[this.actualsizeinwords - 1] = 0;
--this.actualsizeinwords;
this.rlw.setNumberOfLiteralWords(this.rlw.getNumberOfLiteralWords() - 1);
// next we add one clean word
addEmptyWord(true);
}
return true;
}
/**
* Set the size in bits. This does not change the compressed bitmap.
*
* @since 0.4.0
*/
@Override
public void setSizeInBits(final int size) {
if((size+EWAHCompressedBitmap.wordinbits-1)/EWAHCompressedBitmap.wordinbits!= (this.sizeinbits+EWAHCompressedBitmap.wordinbits-1)/EWAHCompressedBitmap.wordinbits)
throw new RuntimeException("You can only reduce the size of the bitmap within the scope of the last word. To extend the bitmap, please call setSizeInbits(int,boolean).");
this.sizeinbits = size;
}
/**
* Change the reported size in bits of the *uncompressed* bitmap represented
* by this compressed bitmap. It may change the underlying compressed bitmap.
* It is not possible to reduce the sizeInBits, but
* it can be extended. The new bits are set to false or true depending on the
* value of defaultvalue.
*
* @param size
* the size in bits
* @param defaultvalue
* the default boolean value
* @return true if the update was possible
*/
public boolean setSizeInBits(final int size, final boolean defaultvalue) {
if (size < this.sizeinbits)
return false;
if (defaultvalue == false)
extendEmptyBits(this, this.sizeinbits, size);
else {
// next bit could be optimized
while (((this.sizeinbits % wordinbits) != 0) && (this.sizeinbits < size)) {
this.set(this.sizeinbits);
}
this.addStreamOfEmptyWords(defaultvalue, (size / wordinbits)
- this.sizeinbits / wordinbits);
// next bit could be optimized
while (this.sizeinbits < size) {
this.set(this.sizeinbits);
}
}
this.sizeinbits = size;
return true;
}
/**
* Returns the size in bits of the *uncompressed* bitmap represented by this
* compressed bitmap. Initially, the sizeInBits is zero. It is extended
* automatically when you set bits to true.
*
* @return the size in bits
*/
@Override
public int sizeInBits() {
return this.sizeinbits;
}
/**
* Report the *compressed* size of the bitmap (equivalent to memory usage,
* after accounting for some overhead).
*
* @return the size in bytes
*/
@Override
public int sizeInBytes() {
return this.actualsizeinwords * (wordinbits / 8);
}
/**
* Populate an array of (sorted integers) corresponding to the location of the
* set bits.
*
* @return the array containing the location of the set bits
*/
public int[] toArray() {
int[] ans = new int[this.cardinality()];
int inanspos = 0;
int pos = 0;
final EWAHIterator i = new EWAHIterator(this, this.actualsizeinwords);
while (i.hasNext()) {
RunningLengthWord localrlw = i.next();
if (localrlw.getRunningBit()) {
for (int j = 0; j < localrlw.getRunningLength(); ++j) {
for (int c = 0; c < wordinbits; ++c) {
ans[inanspos++] = pos++;
}
}
} else {
pos += wordinbits * localrlw.getRunningLength();
}
for (int j = 0; j < localrlw.getNumberOfLiteralWords(); ++j) {
long data = i.buffer()[i.literalWords() + j];
if (!usetrailingzeros) {
for (int c = 0; c < wordinbits; ++c) {
if ((data & (1l << c)) != 0)
ans[inanspos++] = c + pos;
}
pos += wordinbits;
} else {
while (data != 0) {
final int ntz = Long.numberOfTrailingZeros(data);
data ^= (1l << ntz);
ans[inanspos++] = ntz + pos;
}
pos += wordinbits;
}
}
}
return ans;
}
/**
* A more detailed string describing the bitmap (useful for debugging).
*
* @return the string
*/
public String toDebugString() {
String ans = " EWAHCompressedBitmap, size in bits = " + this.sizeinbits
+ " size in words = " + this.actualsizeinwords + "\n";
final EWAHIterator i = new EWAHIterator(this, this.actualsizeinwords);
while (i.hasNext()) {
RunningLengthWord localrlw = i.next();
if (localrlw.getRunningBit()) {
ans += localrlw.getRunningLength() + " 1x11\n";
} else {
ans += localrlw.getRunningLength() + " 0x00\n";
}
ans += localrlw.getNumberOfLiteralWords() + " dirties\n";
for (int j = 0; j < localrlw.getNumberOfLiteralWords(); ++j) {
long data = i.buffer()[i.literalWords() + j];
ans += "\t" + data + "\n";
}
}
return ans;
}
/**
* A string describing the bitmap.
*
* @return the string
*/
@Override
public String toString() {
StringBuffer answer = new StringBuffer();
IntIterator i = this.intIterator();
answer.append("{");
if (i.hasNext())
answer.append(i.next());
while (i.hasNext()) {
answer.append(",");
answer.append(i.next());
}
answer.append("}");
return answer.toString();
}
/**
* swap the content of the bitmap with another.
* @param other bitmap to swap with
*/
public void swap(final EWAHCompressedBitmap other) {
long[] tmp = this.buffer;
this.buffer = other.buffer;
other.buffer = tmp;
int tmp2 = this.rlw.position;
this.rlw.position = other.rlw.position;
other.rlw.position = tmp2;
int tmp3 = this.actualsizeinwords;
this.actualsizeinwords = other.actualsizeinwords;
other.actualsizeinwords = tmp3;
int tmp4 = this.sizeinbits;
this.sizeinbits = other.sizeinbits;
other.sizeinbits = tmp4;
}
/**
* Reduce the internal buffer to its minimal allowable size (given
* by this.actualsizeinwords). This can free memory.
*/
public void trim() {
this.buffer = Arrays.copyOf(this.buffer, this.actualsizeinwords);
}
/*
* @see java.io.Externalizable#writeExternal(java.io.ObjectOutput)
*/
@Override
public void writeExternal(ObjectOutput out) throws IOException {
serialize(out);
}
/**
* Returns a new compressed bitmap containing the bitwise XOR values of the
* current bitmap with some other bitmap.
*
* The running time is proportional to the sum of the compressed sizes (as
* reported by sizeInBytes()).
*
* If you are not planning on adding to the resulting bitmap, you may call the trim()
* method to reduce memory usage.
*
* @param a
* the other bitmap
* @return the EWAH compressed bitmap
*/
@Override
public EWAHCompressedBitmap xor(final EWAHCompressedBitmap a) {
final EWAHCompressedBitmap container = new EWAHCompressedBitmap();
container.reserve(this.actualsizeinwords + a.actualsizeinwords);
xorToContainer(a, container);
return container;
}
/**
* Computes a new compressed bitmap containing the bitwise XOR values of the
* current bitmap with some other bitmap.
*
* The running time is proportional to the sum of the compressed sizes (as
* reported by sizeInBytes()).
*
* @since 0.4.0
* @param a
* the other bitmap
* @param container
* where we store the result
*/
public void xorToContainer(final EWAHCompressedBitmap a, final BitmapStorage container) {
final EWAHIterator i = a.getEWAHIterator();
final EWAHIterator j = getEWAHIterator();
final IteratingBufferedRunningLengthWord rlwi = new IteratingBufferedRunningLengthWord(i);
final IteratingBufferedRunningLengthWord rlwj = new IteratingBufferedRunningLengthWord(j);
while ((rlwi.size()>0) && (rlwj.size()>0)) {
while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) {
final boolean i_is_prey = rlwi.getRunningLength() < rlwj
.getRunningLength();
final IteratingBufferedRunningLengthWord prey = i_is_prey ? rlwi : rlwj;
final IteratingBufferedRunningLengthWord predator = i_is_prey ? rlwj
: rlwi;
if (predator.getRunningBit() == false) {
long index = prey.discharge(container, predator.getRunningLength());
container.addStreamOfEmptyWords(false, predator.getRunningLength()
- index);
predator.discardFirstWords(predator.getRunningLength());
} else {
long index = prey.dischargeNegated(container, predator.getRunningLength());
container.addStreamOfEmptyWords(true, predator.getRunningLength()
- index);
predator.discardFirstWords(predator.getRunningLength());
}
}
final int nbre_literal = Math.min(rlwi.getNumberOfLiteralWords(),
rlwj.getNumberOfLiteralWords());
if (nbre_literal > 0) {
for (int k = 0; k < nbre_literal; ++k)
container.add(rlwi.getLiteralWordAt(k) ^ rlwj.getLiteralWordAt(k));
rlwi.discardFirstWords(nbre_literal);
rlwj.discardFirstWords(nbre_literal);
}
}
final boolean i_remains = rlwi.size()>0;
final IteratingBufferedRunningLengthWord remaining = i_remains ? rlwi : rlwj;
remaining.discharge(container);
container.setSizeInBits(Math.max(sizeInBits(), a.sizeInBits()));
}
/**
* Returns the cardinality of the result of a bitwise XOR of the values of the
* current bitmap with some other bitmap. Avoids needing to allocate an
* intermediate bitmap to hold the result of the OR.
*
* @since 0.4.0
* @param a
* the other bitmap
* @return the cardinality
*/
public int xorCardinality(final EWAHCompressedBitmap a) {
final BitCounter counter = new BitCounter();
xorToContainer(a, counter);
return counter.getCount();
}
/**
* For internal use. Computes the bitwise and of the provided bitmaps and
* stores the result in the container.
*
* @param container
* where the result is stored
* @param bitmaps
* bitmaps to AND
* @since 0.4.3
*/
public static void andWithContainer(final BitmapStorage container,
final EWAHCompressedBitmap... bitmaps) {
if(bitmaps.length == 1) throw new IllegalArgumentException("Need at least one bitmap");
if(bitmaps.length == 2) {
bitmaps[0].andToContainer(bitmaps[1],container);
return;
}
EWAHCompressedBitmap answer = new EWAHCompressedBitmap();
EWAHCompressedBitmap tmp = new EWAHCompressedBitmap();
bitmaps[0].andToContainer(bitmaps[1], answer);
for(int k = 2; k < bitmaps.length - 1; ++k) {
answer.andToContainer(bitmaps[k], tmp);
tmp.swap(answer);
tmp.clear();
}
answer.andToContainer(bitmaps[bitmaps.length - 1], container);
}
/**
* Returns a new compressed bitmap containing the bitwise AND values of the
* provided bitmaps.
*
* It may or may not be faster than doing the aggregation two-by-two (A.and(B).and(C)).
*
* If only one bitmap is provided, it is returned as is.
*
* If you are not planning on adding to the resulting bitmap, you may call the trim()
* method to reduce memory usage.
*
* @since 0.4.3
* @param bitmaps
* bitmaps to AND together
* @return result of the AND
*/
public static EWAHCompressedBitmap and(final EWAHCompressedBitmap... bitmaps) {
if(bitmaps.length == 1) return bitmaps[0];
if(bitmaps.length == 2) return bitmaps[0].and(bitmaps[1]);
EWAHCompressedBitmap answer = new EWAHCompressedBitmap();
EWAHCompressedBitmap tmp = new EWAHCompressedBitmap();
bitmaps[0].andToContainer(bitmaps[1], answer);
for(int k = 2; k < bitmaps.length; ++k) {
answer.andToContainer(bitmaps[k], tmp);
tmp.swap(answer);
tmp.clear();
}
return answer;
}
/**
* Returns the cardinality of the result of a bitwise AND of the values of the
* provided bitmaps. Avoids needing to allocate an intermediate bitmap to hold
* the result of the AND.
*
* @since 0.4.3
* @param bitmaps
* bitmaps to AND
* @return the cardinality
*/
public static int andCardinality(final EWAHCompressedBitmap... bitmaps) {
if(bitmaps.length == 1) return bitmaps[0].cardinality();
final BitCounter counter = new BitCounter();
andWithContainer(counter, bitmaps);
return counter.getCount();
}
/**
* Return a bitmap with the bit set to true at the given
* positions. The positions should be given in sorted order.
*
* (This is a convenience method.)
*
* @since 0.4.5
* @param setbits list of set bit positions
* @return the bitmap
*/
public static EWAHCompressedBitmap bitmapOf(int ... setbits) {
EWAHCompressedBitmap a = new EWAHCompressedBitmap();
for (int k : setbits)
a.set(k);
return a;
}
/**
* For internal use. This simply adds a stream of words made of zeroes so that
* we pad to the desired size.
*
* @param storage
* bitmap to extend
* @param currentSize
* current size (in bits)
* @param newSize
* new desired size (in bits)
* @since 0.4.3
*/
private static void extendEmptyBits(final BitmapStorage storage,
final int currentSize, final int newSize) {
final int currentLeftover = currentSize % wordinbits;
final int finalLeftover = newSize % wordinbits;
storage.addStreamOfEmptyWords(false, (newSize / wordinbits) - currentSize
/ wordinbits + (finalLeftover != 0 ? 1 : 0)
+ (currentLeftover != 0 ? -1 : 0));
}
/**
* Uses an adaptive technique to compute the logical OR.
* Mostly for internal use.
*
* @param container where the aggregate is written.
* @param bitmaps to be aggregated
*/
public static void orWithContainer(final BitmapStorage container,
final EWAHCompressedBitmap... bitmaps) {
if (bitmaps.length < 2)
throw new IllegalArgumentException("You should provide at least two bitmaps, provided "+bitmaps.length);
long size = 0L;
long sinbits = 0L;
for (EWAHCompressedBitmap b : bitmaps) {
size += b.sizeInBytes();
if (sinbits < b.sizeInBits())
sinbits = b.sizeInBits();
}
if (size * 8 > sinbits) {
FastAggregation.bufferedorWithContainer(container, 65536, bitmaps);
} else {
FastAggregation.orToContainer(container, bitmaps);
}
}
/**
* Uses an adaptive technique to compute the logical XOR.
* Mostly for internal use.
*
* @param container where the aggregate is written.
* @param bitmaps to be aggregated
*/
public static void xorWithContainer(final BitmapStorage container,
final EWAHCompressedBitmap... bitmaps) {
if (bitmaps.length < 2)
throw new IllegalArgumentException("You should provide at least two bitmaps, provided "+bitmaps.length);
long size = 0L;
long sinbits = 0L;
for (EWAHCompressedBitmap b : bitmaps) {
size += b.sizeInBytes();
if (sinbits < b.sizeInBits())
sinbits = b.sizeInBits();
}
if (size * 8 > sinbits) {
FastAggregation.bufferedxorWithContainer(container, 65536, bitmaps);
} else {
FastAggregation.xorToContainer(container, bitmaps);
}
}
/**
* Returns a new compressed bitmap containing the bitwise OR values of the
* provided bitmaps. This is typically faster than doing the aggregation
* two-by-two (A.or(B).or(C).or(D)).
*
* If only one bitmap is provided, it is returned as is.
*
* If you are not planning on adding to the resulting bitmap, you may call the trim()
* method to reduce memory usage.
*
* @since 0.4.0
* @param bitmaps
* bitmaps to OR together
* @return result of the OR
*/
public static EWAHCompressedBitmap or(final EWAHCompressedBitmap... bitmaps) {
if(bitmaps.length == 1)
return bitmaps[0];
final EWAHCompressedBitmap container = new EWAHCompressedBitmap();
int largestSize = 0;
for (EWAHCompressedBitmap bitmap : bitmaps) {
largestSize = Math.max(bitmap.actualsizeinwords, largestSize);
}
container.reserve((int) (largestSize * 1.5));
orWithContainer(container, bitmaps);
return container;
}
/**
* Returns a new compressed bitmap containing the bitwise XOR values of the
* provided bitmaps. This is typically faster than doing the aggregation
* two-by-two (A.xor(B).xor(C).xor(D)).
*
* If only one bitmap is provided, it is returned as is.
*
* If you are not planning on adding to the resulting bitmap, you may call the trim()
* method to reduce memory usage.
*
* @param bitmaps
* bitmaps to XOR together
* @return result of the XOR
*/
public static EWAHCompressedBitmap xor(final EWAHCompressedBitmap... bitmaps) {
if(bitmaps.length == 1)
return bitmaps[0];
final EWAHCompressedBitmap container = new EWAHCompressedBitmap();
int largestSize = 0;
for (EWAHCompressedBitmap bitmap : bitmaps) {
largestSize = Math.max(bitmap.actualsizeinwords, largestSize);
}
container.reserve((int) (largestSize * 1.5));
xorWithContainer(container, bitmaps);
return container;
}
/**
* Returns the cardinality of the result of a bitwise OR of the values of the
* provided bitmaps. Avoids needing to allocate an intermediate bitmap to hold
* the result of the OR.
*
* @since 0.4.0
* @param bitmaps
* bitmaps to OR
* @return the cardinality
*/
public static int orCardinality(final EWAHCompressedBitmap... bitmaps) {
if(bitmaps.length == 1) return bitmaps[0].cardinality();
final BitCounter counter = new BitCounter();
orWithContainer(counter, bitmaps);
return counter.getCount();
}
/** The actual size in words. */
int actualsizeinwords = 1;
/** The buffer (array of 64-bit words) */
long buffer[] = null;
/** The current (last) running length word. */
RunningLengthWord rlw = null;
/** sizeinbits: number of bits in the (uncompressed) bitmap. */
int sizeinbits = 0;
/**
* The Constant defaultbuffersize: default memory allocation when the object
* is constructed.
*/
static final int defaultbuffersize = 4;
/** optimization option **/
public static final boolean usetrailingzeros = true;
/** whether we adjust after some aggregation by adding in zeroes **/
public static final boolean adjustContainerSizeWhenAggregating = true;
/** The Constant wordinbits represents the number of bits in a long. */
public static final int wordinbits = 64;
}
javaewah-JavaEWAH-0.7.9/src/main/java/com/googlecode/javaewah/EWAHIterator.java 0000664 0000000 0000000 00000004450 12240435670 0027057 0 ustar 00root root 0000000 0000000 package com.googlecode.javaewah;
/*
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser
* Licensed under the Apache License, Version 2.0.
*/
/**
* The class EWAHIterator represents a special type of
* efficient iterator iterating over (uncompressed) words of bits.
* It is not meant for end users.
* @author Daniel Lemire
* @since 0.1.0
*
*/
public final class EWAHIterator implements Cloneable {
/**
* Instantiates a new EWAH iterator.
*
* @param a the array of words
* @param sizeinwords the number of words that are significant in the array of words
*/
public EWAHIterator(final EWAHCompressedBitmap a, final int sizeinwords) {
this.rlw = new RunningLengthWord(a, 0);
this.size = sizeinwords;
this.pointer = 0;
}
/**
* Allow expert developers to instantiate an EWAHIterator.
*
* @param bitmap we want to iterate over
* @return an iterator
*/
public static EWAHIterator getEWAHIterator(EWAHCompressedBitmap bitmap) {
return bitmap.getEWAHIterator();
}
/**
* Access to the array of words
*
* @return the long[]
*/
public long[] buffer() {
return this.rlw.parent.buffer;
}
/**
* Position of the literal words represented by this running length word.
*
* @return the int
*/
public int literalWords() {
return this.pointer - this.rlw.getNumberOfLiteralWords();
}
/**
* Checks for next.
*
* @return true, if successful
*/
public boolean hasNext() {
return this.pointer < this.size;
}
/**
* Next running length word.
*
* @return the running length word
*/
public RunningLengthWord next() {
this.rlw.position = this.pointer;
this.pointer += this.rlw.getNumberOfLiteralWords() + 1;
return this.rlw;
}
@Override
public EWAHIterator clone() throws CloneNotSupportedException {
EWAHIterator ans = (EWAHIterator) super.clone();
ans.rlw = this.rlw.clone();
ans.size = this.size;
ans.pointer = this.pointer;
return ans;
}
/** The pointer represent the location of the current running length
* word in the array of words (embedded in the rlw attribute). */
int pointer;
/** The current running length word. */
RunningLengthWord rlw;
/** The size in words. */
int size;
}
javaewah-JavaEWAH-0.7.9/src/main/java/com/googlecode/javaewah/FastAggregation.java 0000664 0000000 0000000 00000033376 12240435670 0027677 0 ustar 00root root 0000000 0000000 package com.googlecode.javaewah;
import java.util.Arrays;
import java.util.Comparator;
import java.util.PriorityQueue;
/*
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser
* Licensed under the Apache License, Version 2.0.
*/
/**
* Fast algorithms to aggregate many bitmaps. These algorithms are just given as
* reference. They may not be faster than the corresponding methods in the
* EWAHCompressedBitmap class.
*
* @author Daniel Lemire
*
*/
public class FastAggregation {
/**
* Compute the and aggregate using a temporary uncompressed bitmap.
* @param bitmaps the source bitmaps
* @param bufsize buffer size used during the computation in 64-bit words (per input bitmap)
* @return the or aggregate.
*/
public static EWAHCompressedBitmap bufferedand(final int bufsize,
final EWAHCompressedBitmap... bitmaps) {
EWAHCompressedBitmap answer = new EWAHCompressedBitmap();
bufferedandWithContainer(answer,bufsize, bitmaps);
return answer;
}
/**
* Compute the and aggregate using a temporary uncompressed bitmap.
*
* @param container where the aggregate is written
* @param bufsize buffer size used during the computation in 64-bit words (per input bitmap)
* @param bitmaps the source bitmaps
*/
public static void bufferedandWithContainer(final BitmapStorage container,final int bufsize,
final EWAHCompressedBitmap... bitmaps) {
java.util.LinkedList al = new java.util.LinkedList();
for (EWAHCompressedBitmap bitmap : bitmaps) {
al.add(new IteratingBufferedRunningLengthWord(bitmap));
}
long[] hardbitmap = new long[bufsize*bitmaps.length];
for(IteratingRLW i : al)
if (i.size() == 0) {
al.clear();
break;
}
while (!al.isEmpty()) {
Arrays.fill(hardbitmap, ~0l);
long effective = Integer.MAX_VALUE;
for(IteratingRLW i : al) {
int eff = IteratorAggregation.inplaceand(hardbitmap, i);
if (eff < effective)
effective = eff;
}
for (int k = 0; k < effective; ++k)
container.add(hardbitmap[k]);
for(IteratingRLW i : al)
if (i.size() == 0) {
al.clear();
break;
}
}
}
/**
* Compute the or aggregate using a temporary uncompressed bitmap.
* @param bitmaps the source bitmaps
* @param bufsize buffer size used during the computation in 64-bit words
* @return the or aggregate.
*/
public static EWAHCompressedBitmap bufferedor(final int bufsize,
final EWAHCompressedBitmap... bitmaps) {
EWAHCompressedBitmap answer = new EWAHCompressedBitmap();
bufferedorWithContainer(answer, bufsize, bitmaps);
return answer;
}
/**
* Compute the or aggregate using a temporary uncompressed bitmap.
*
* @param container where the aggregate is written
* @param bufsize buffer size used during the computation in 64-bit words
* @param bitmaps the source bitmaps
*/
public static void bufferedorWithContainer(final BitmapStorage container, final int bufsize,
final EWAHCompressedBitmap... bitmaps) {
int range = 0;
EWAHCompressedBitmap[] sbitmaps = bitmaps.clone();
Arrays.sort(sbitmaps, new Comparator() {
@Override
public int compare(EWAHCompressedBitmap a, EWAHCompressedBitmap b) {
return b.sizeinbits - a.sizeinbits;
}
});
java.util.ArrayList al = new java.util.ArrayList();
for (EWAHCompressedBitmap bitmap : sbitmaps) {
if (bitmap.sizeinbits > range)
range = bitmap.sizeinbits;
al.add(new IteratingBufferedRunningLengthWord(bitmap));
}
long[] hardbitmap = new long[bufsize];
int maxr = al.size();
while (maxr > 0) {
long effective = 0;
for (int k = 0; k < maxr; ++k) {
if (al.get(k).size() > 0) {
int eff = IteratorAggregation.inplaceor(hardbitmap, al.get(k));
if (eff > effective)
effective = eff;
} else
maxr = k;
}
for (int k = 0; k < effective; ++k)
container.add(hardbitmap[k]);
Arrays.fill(hardbitmap, 0);
}
container.setSizeInBits(range);
}
/**
* Compute the xor aggregate using a temporary uncompressed bitmap.
* @param bitmaps the source bitmaps
* @param bufsize buffer size used during the computation in 64-bit words
* @return the xor aggregate.
*/
public static EWAHCompressedBitmap bufferedxor(final int bufsize,
final EWAHCompressedBitmap... bitmaps) {
EWAHCompressedBitmap answer = new EWAHCompressedBitmap();
bufferedxorWithContainer(answer, bufsize,bitmaps);
return answer;
}
/**
* Compute the xor aggregate using a temporary uncompressed bitmap.
*
* @param container where the aggregate is written
* @param bufsize buffer size used during the computation in 64-bit words
* @param bitmaps the source bitmaps
*/
public static void bufferedxorWithContainer(final BitmapStorage container, final int bufsize,
final EWAHCompressedBitmap... bitmaps) {
int range = 0;
EWAHCompressedBitmap[] sbitmaps = bitmaps.clone();
Arrays.sort(sbitmaps, new Comparator() {
@Override
public int compare(EWAHCompressedBitmap a, EWAHCompressedBitmap b) {
return b.sizeinbits - a.sizeinbits;
}
});
java.util.ArrayList al = new java.util.ArrayList();
for (EWAHCompressedBitmap bitmap : sbitmaps) {
if (bitmap.sizeinbits > range)
range = bitmap.sizeinbits;
al.add(new IteratingBufferedRunningLengthWord(bitmap));
}
long[] hardbitmap = new long[bufsize];
int maxr = al.size();
while (maxr > 0) {
long effective = 0;
for (int k = 0; k < maxr; ++k) {
if (al.get(k).size() > 0) {
int eff = IteratorAggregation.inplacexor(hardbitmap, al.get(k));
if (eff > effective)
effective = eff;
} else
maxr = k;
}
for (int k = 0; k < effective; ++k)
container.add(hardbitmap[k]);
Arrays.fill(hardbitmap, 0);
}
container.setSizeInBits(range);
}
/**
* Uses a priority queue to compute the or aggregate.
* @param a class extending LogicalElement (like a compressed bitmap)
* @param bitmaps
* bitmaps to be aggregated
* @return the or aggregate
*/
@SuppressWarnings({ "rawtypes", "unchecked" })
public static T or(T... bitmaps) {
PriorityQueue pq = new PriorityQueue(bitmaps.length,
new Comparator() {
@Override
public int compare(T a, T b) {
return a.sizeInBytes() - b.sizeInBytes();
}
});
for (T x : bitmaps) {
pq.add(x);
}
while (pq.size() > 1) {
T x1 = pq.poll();
T x2 = pq.poll();
pq.add((T) x1.or(x2));
}
return pq.poll();
}
/**
* Uses a priority queue to compute the or aggregate.
* @param container where we write the result
* @param bitmaps to be aggregated
*/
public static void orToContainer(final BitmapStorage container,
final EWAHCompressedBitmap ... bitmaps) {
if(bitmaps.length < 2) throw new IllegalArgumentException("We need at least two bitmaps");
PriorityQueue pq = new PriorityQueue(bitmaps.length,
new Comparator() {
@Override
public int compare(EWAHCompressedBitmap a, EWAHCompressedBitmap b) {
return a.sizeInBytes() - b.sizeInBytes();
}
});
for (EWAHCompressedBitmap x : bitmaps) {
pq.add(x);
}
while (pq.size() > 2) {
EWAHCompressedBitmap x1 = pq.poll();
EWAHCompressedBitmap x2 = pq.poll();
pq.add(x1.or(x2));
}
pq.poll().orToContainer(pq.poll(), container);
}
/**
* Uses a priority queue to compute the xor aggregate.
*
* @param a class extending LogicalElement (like a compressed bitmap)
* @param bitmaps
* bitmaps to be aggregated
* @return the xor aggregate
*/
@SuppressWarnings({ "rawtypes", "unchecked" })
public static T xor(T... bitmaps) {
PriorityQueue pq = new PriorityQueue(bitmaps.length,
new Comparator() {
@Override
public int compare(T a, T b) {
return a.sizeInBytes() - b.sizeInBytes();
}
});
for (T x : bitmaps)
pq.add(x);
while (pq.size() > 1) {
T x1 = pq.poll();
T x2 = pq.poll();
pq.add((T) x1.xor(x2));
}
return pq.poll();
}
/**
* Uses a priority queue to compute the xor aggregate.
* @param container where we write the result
* @param bitmaps to be aggregated
*/
public static void xorToContainer(final BitmapStorage container,
final EWAHCompressedBitmap ... bitmaps) {
if(bitmaps.length < 2) throw new IllegalArgumentException("We need at least two bitmaps");
PriorityQueue pq = new PriorityQueue(bitmaps.length,
new Comparator() {
@Override
public int compare(EWAHCompressedBitmap a, EWAHCompressedBitmap b) {
return a.sizeInBytes() - b.sizeInBytes();
}
});
for (EWAHCompressedBitmap x : bitmaps) {
pq.add(x);
}
while (pq.size() > 2) {
EWAHCompressedBitmap x1 = pq.poll();
EWAHCompressedBitmap x2 = pq.poll();
pq.add(x1.xor(x2));
}
pq.poll().xorToContainer(pq.poll(), container);
}
/**
* For internal use. Computes the bitwise or of the provided bitmaps and
* stores the result in the container. (This used to be the default.)
*
* @deprecated use EWAHCompressedBitmap.or instead
* @since 0.4.0
* @param container where store the result
* @param bitmaps to be aggregated
*/
@Deprecated
public static void legacy_orWithContainer(final BitmapStorage container,
final EWAHCompressedBitmap... bitmaps) {
if (bitmaps.length == 2) {
// should be more efficient
bitmaps[0].orToContainer(bitmaps[1], container);
return;
}
// Sort the bitmaps in descending order by sizeinbits. We will exhaust the
// sorted bitmaps from right to left.
final EWAHCompressedBitmap[] sortedBitmaps = bitmaps.clone();
Arrays.sort(sortedBitmaps, new Comparator() {
@Override
public int compare(EWAHCompressedBitmap a, EWAHCompressedBitmap b) {
return a.sizeinbits < b.sizeinbits ? 1
: a.sizeinbits == b.sizeinbits ? 0 : -1;
}
});
final IteratingBufferedRunningLengthWord[] rlws = new IteratingBufferedRunningLengthWord[bitmaps.length];
int maxAvailablePos = 0;
for (EWAHCompressedBitmap bitmap : sortedBitmaps) {
EWAHIterator iterator = bitmap.getEWAHIterator();
if (iterator.hasNext()) {
rlws[maxAvailablePos++] = new IteratingBufferedRunningLengthWord(
iterator);
}
}
if (maxAvailablePos == 0) { // this never happens...
container.setSizeInBits(0);
return;
}
int maxSize = sortedBitmaps[0].sizeinbits;
while (true) {
long maxOneRl = 0;
long minZeroRl = Long.MAX_VALUE;
long minSize = Long.MAX_VALUE;
int numEmptyRl = 0;
for (int i = 0; i < maxAvailablePos; i++) {
IteratingBufferedRunningLengthWord rlw = rlws[i];
long size = rlw.size();
if (size == 0) {
maxAvailablePos = i;
break;
}
minSize = Math.min(minSize, size);
if (rlw.getRunningBit()) {
long rl = rlw.getRunningLength();
maxOneRl = Math.max(maxOneRl, rl);
minZeroRl = 0;
if (rl == 0 && size > 0) {
numEmptyRl++;
}
} else {
long rl = rlw.getRunningLength();
minZeroRl = Math.min(minZeroRl, rl);
if (rl == 0 && size > 0) {
numEmptyRl++;
}
}
}
if (maxAvailablePos == 0) {
break;
} else if (maxAvailablePos == 1) {
// only one bitmap is left so just write the rest of it out
rlws[0].discharge(container);
break;
}
if (maxOneRl > 0) {
container.addStreamOfEmptyWords(true, maxOneRl);
for (int i = 0; i < maxAvailablePos; i++) {
IteratingBufferedRunningLengthWord rlw = rlws[i];
rlw.discardFirstWords(maxOneRl);
}
} else if (minZeroRl > 0) {
container.addStreamOfEmptyWords(false, minZeroRl);
for (int i = 0; i < maxAvailablePos; i++) {
IteratingBufferedRunningLengthWord rlw = rlws[i];
rlw.discardFirstWords(minZeroRl);
}
} else {
int index = 0;
if (numEmptyRl == 1) {
// if one rlw has literal words to process and the rest have a run of
// 0's we can write them out here
IteratingBufferedRunningLengthWord emptyRl = null;
long minNonEmptyRl = Long.MAX_VALUE;
for (int i = 0; i < maxAvailablePos; i++) {
IteratingBufferedRunningLengthWord rlw = rlws[i];
long rl = rlw.getRunningLength();
if (rl == 0) {
assert emptyRl == null;
emptyRl = rlw;
} else {
minNonEmptyRl = Math.min(minNonEmptyRl, rl);
}
}
long wordsToWrite = minNonEmptyRl > minSize ? minSize : minNonEmptyRl;
if (emptyRl != null)
emptyRl.writeLiteralWords((int) wordsToWrite, container);
index += wordsToWrite;
}
while (index < minSize) {
long word = 0;
for (int i = 0; i < maxAvailablePos; i++) {
IteratingBufferedRunningLengthWord rlw = rlws[i];
if (rlw.getRunningLength() <= index) {
word |= rlw.getLiteralWordAt(index - (int) rlw.getRunningLength());
}
}
container.add(word);
index++;
}
for (int i = 0; i < maxAvailablePos; i++) {
IteratingBufferedRunningLengthWord rlw = rlws[i];
rlw.discardFirstWords(minSize);
}
}
}
container.setSizeInBits(maxSize);
}
}
javaewah-JavaEWAH-0.7.9/src/main/java/com/googlecode/javaewah/IntIterator.java 0000664 0000000 0000000 00000001127 12240435670 0027063 0 ustar 00root root 0000000 0000000 package com.googlecode.javaewah;
/*
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser
* Licensed under the Apache License, Version 2.0.
*/
/**
*
* The IntIterator interface is used to iterate over a stream of integers.
*
* @author Daniel Lemire
* @since 0.2.0
*
*/
public interface IntIterator {
/**
* Is there more?
*
* @return true, if there is more, false otherwise
*/
public boolean hasNext();
/**
* Return the next integer
*
* @return the integer
*/
public int next();
}
javaewah-JavaEWAH-0.7.9/src/main/java/com/googlecode/javaewah/IntIteratorImpl.java 0000664 0000000 0000000 00000004336 12240435670 0027712 0 ustar 00root root 0000000 0000000 package com.googlecode.javaewah;
/*
* Copyright 2012, Google Inc.
* Licensed under the Apache License, Version 2.0.
*/
import static com.googlecode.javaewah.EWAHCompressedBitmap.wordinbits;
/**
* The IntIteratorImpl is the 64 bit implementation of the
* IntIterator interface, which efficiently returns the stream of integers
* represented by an EWAHIterator.
*
* @author Colby Ranger
* @since 0.5.6
*/
final class IntIteratorImpl implements IntIterator {
private final EWAHIterator ewahIter;
private final long[] ewahBuffer;
private int position;
private int runningLength;
private long word;
private int wordPosition;
private int wordLength;
private int literalPosition;
private boolean hasnext;
IntIteratorImpl(EWAHIterator ewahIter) {
this.ewahIter = ewahIter;
this.ewahBuffer = ewahIter.buffer();
this.hasnext = this.moveToNext();
}
public final boolean moveToNext() {
while (!runningHasNext() && !literalHasNext()) {
if (!this.ewahIter.hasNext()) {
return false;
}
setRunningLengthWord(this.ewahIter.next());
}
return true;
}
@Override
public boolean hasNext() {
return this.hasnext;
}
@Override
public final int next() {
final int answer;
if (runningHasNext()) {
answer = this.position++;
} else {
final int bit = Long.numberOfTrailingZeros(this.word);
this.word ^= (1l << bit);
answer = this.literalPosition + bit;
}
this.hasnext = this.moveToNext();
return answer;
}
private final void setRunningLengthWord(RunningLengthWord rlw) {
this.runningLength = wordinbits * (int) rlw.getRunningLength() + this.position;
if (!rlw.getRunningBit()) {
this.position = this.runningLength;
}
this.wordPosition = this.ewahIter.literalWords();
this.wordLength = this.wordPosition + rlw.getNumberOfLiteralWords();
}
private final boolean runningHasNext() {
return this.position < this.runningLength;
}
private final boolean literalHasNext() {
while (this.word == 0 && this.wordPosition < this.wordLength) {
this.word = this.ewahBuffer[this.wordPosition++];
this.literalPosition = this.position;
this.position += wordinbits;
}
return this.word != 0;
}
}
javaewah-JavaEWAH-0.7.9/src/main/java/com/googlecode/javaewah/IntIteratorOverIteratingRLW.java 0000664 0000000 0000000 00000004442 12240435670 0032156 0 ustar 00root root 0000000 0000000 package com.googlecode.javaewah;
import static com.googlecode.javaewah.EWAHCompressedBitmap.wordinbits;
/*
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser
* Licensed under the Apache License, Version 2.0.
*/
/**
* Implementation of an IntIterator over an IteratingRLW.
*
*
*/
public class IntIteratorOverIteratingRLW implements IntIterator {
IteratingRLW parent;
private int position;
private int runningLength;
private long word;
private int wordPosition;
private int wordLength;
private int literalPosition;
private boolean hasnext;
/**
* @param p iterator we wish to iterate over
*/
public IntIteratorOverIteratingRLW(final IteratingRLW p) {
this.parent = p;
this.position = 0;
setupForCurrentRunningLengthWord();
this.hasnext = moveToNext();
}
/**
* @return whether we could find another set bit; don't move if there is an unprocessed value
*/
private final boolean moveToNext() {
while (!runningHasNext() && !literalHasNext()) {
if (this.parent.next())
setupForCurrentRunningLengthWord();
else return false;
}
return true;
}
@Override
public boolean hasNext() {
return this.hasnext;
}
@Override
public final int next() {
final int answer;
if (runningHasNext()) {
answer = this.position++;
} else {
final int bit = Long.numberOfTrailingZeros(this.word);
this.word ^= (1l << bit);
answer = this.literalPosition + bit;
}
this.hasnext = this.moveToNext();
return answer;
}
private final void setupForCurrentRunningLengthWord() {
this.runningLength = wordinbits * (int) this.parent.getRunningLength()
+ this.position;
if (!this.parent.getRunningBit()) {
this.position = this.runningLength;
}
this.wordPosition = 0;
this.wordLength = this.parent.getNumberOfLiteralWords();
}
private final boolean runningHasNext() {
return this.position < this.runningLength;
}
private final boolean literalHasNext() {
while (this.word == 0 && this.wordPosition < this.wordLength) {
this.word = this.parent.getLiteralWordAt(this.wordPosition++);
this.literalPosition = this.position;
this.position += wordinbits;
}
return this.word != 0;
}
}
IteratingBufferedRunningLengthWord.java 0000664 0000000 0000000 00000020051 12240435670 0033465 0 ustar 00root root 0000000 0000000 javaewah-JavaEWAH-0.7.9/src/main/java/com/googlecode/javaewah package com.googlecode.javaewah;
/*
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser
* Licensed under the Apache License, Version 2.0.
*/
/**
* Mostly for internal use. Similar to BufferedRunningLengthWord, but automatically
* advances to the next BufferedRunningLengthWord as words are discarded.
*
* @since 0.4.0
* @author David McIntosh
*/
public final class IteratingBufferedRunningLengthWord implements IteratingRLW, Cloneable{
/**
* Instantiates a new iterating buffered running length word.
*
* @param iterator iterator
*/
public IteratingBufferedRunningLengthWord(final EWAHIterator iterator) {
this.iterator = iterator;
this.brlw = new BufferedRunningLengthWord(this.iterator.next());
this.literalWordStartPosition = this.iterator.literalWords() + this.brlw.literalwordoffset;
this.buffer = this.iterator.buffer();
}
/**
* Instantiates a new iterating buffered running length word.
* @param bitmap over which we want to iterate
*
*/
public IteratingBufferedRunningLengthWord(final EWAHCompressedBitmap bitmap) {
this.iterator = EWAHIterator.getEWAHIterator(bitmap);
this.brlw = new BufferedRunningLengthWord(this.iterator.next());
this.literalWordStartPosition = this.iterator.literalWords() + this.brlw.literalwordoffset;
this.buffer = this.iterator.buffer();
}
/**
* Discard first words, iterating to the next running length word if needed.
*
* @param x the number of words to be discarded
*/
@Override
public void discardFirstWords(long x) {
while (x > 0) {
if (this.brlw.RunningLength > x) {
this.brlw.RunningLength -= x;
return;
}
x -= this.brlw.RunningLength;
this.brlw.RunningLength = 0;
long toDiscard = x > this.brlw.NumberOfLiteralWords ? this.brlw.NumberOfLiteralWords : x;
this.literalWordStartPosition += toDiscard;
this.brlw.NumberOfLiteralWords -= toDiscard;
x -= toDiscard;
if ((x > 0) || (this.brlw.size() == 0)) {
if (!this.iterator.hasNext()) {
break;
}
this.brlw.reset(this.iterator.next());
this.literalWordStartPosition = this.iterator.literalWords(); // + this.brlw.literalwordoffset ==0
}
}
}
/**
* Move to the next RunningLengthWord
* @return whether the move was possible
*/
@Override
public boolean next() {
if (!this.iterator.hasNext()) {
this.brlw.NumberOfLiteralWords = 0;
this.brlw.RunningLength = 0;
return false;
}
this.brlw.reset(this.iterator.next());
this.literalWordStartPosition = this.iterator.literalWords(); // + this.brlw.literalwordoffset ==0
return true;
}
/**
* Write out up to max words, returns how many were written
* @param container target for writes
* @param max maximal number of writes
* @return how many written
*/
public long discharge(BitmapStorage container, long max) {
long index = 0;
while ((index < max) && (size() > 0)) {
// first run
long pl = getRunningLength();
if (index + pl > max) {
pl = max - index;
}
container.addStreamOfEmptyWords(getRunningBit(), pl);
index += pl;
int pd = getNumberOfLiteralWords();
if (pd + index > max) {
pd = (int) (max - index);
}
writeLiteralWords(pd, container);
discardFirstWords(pl+pd);
index += pd;
}
return index;
}
/**
* Write out up to max words (negated), returns how many were written
* @param container target for writes
* @param max maximal number of writes
* @return how many written
*/
public long dischargeNegated(BitmapStorage container, long max) {
long index = 0;
while ((index < max) && (size() > 0)) {
// first run
long pl = getRunningLength();
if (index + pl > max) {
pl = max - index;
}
container.addStreamOfEmptyWords(!getRunningBit(), pl);
index += pl;
int pd = getNumberOfLiteralWords();
if (pd + index > max) {
pd = (int) (max - index);
}
writeNegatedLiteralWords(pd, container);
discardFirstWords(pl+pd);
index += pd;
}
return index;
}
/**
* Write out the remain words, transforming them to zeroes.
* @param container target for writes
*/
public void dischargeAsEmpty(BitmapStorage container) {
while(size()>0) {
container.addStreamOfEmptyWords(false, size());
discardFirstWords(size());
}
}
/**
* Write out the remaining words
* @param container target for writes
*/
public void discharge(BitmapStorage container) {
this.brlw.literalwordoffset = this.literalWordStartPosition - this.iterator.literalWords();
discharge(this.brlw, this.iterator, container);
}
/**
* Get the nth literal word for the current running length word
* @param index zero based index
* @return the literal word
*/
@Override
public long getLiteralWordAt(int index) {
return this.buffer[this.literalWordStartPosition + index];
}
/**
* Gets the number of literal words for the current running length word.
*
* @return the number of literal words
*/
@Override
public int getNumberOfLiteralWords() {
return this.brlw.NumberOfLiteralWords;
}
/**
* Gets the running bit.
*
* @return the running bit
*/
@Override
public boolean getRunningBit() {
return this.brlw.RunningBit;
}
/**
* Gets the running length.
*
* @return the running length
*/
@Override
public long getRunningLength() {
return this.brlw.RunningLength;
}
/**
* Size in uncompressed words of the current running length word.
*
* @return the long
*/
@Override
public long size() {
return this.brlw.size();
}
/**
* write the first N literal words to the target bitmap. Does not discard the words or perform iteration.
* @param numWords number of words to be written
* @param container where we write
*/
public void writeLiteralWords(int numWords, BitmapStorage container) {
container.addStreamOfLiteralWords(this.buffer, this.literalWordStartPosition, numWords);
}
/**
* write the first N literal words (negated) to the target bitmap. Does not discard the words or perform iteration.
* @param numWords number of words to be written
* @param container where we write
*/
public void writeNegatedLiteralWords(int numWords, BitmapStorage container) {
container.addStreamOfNegatedLiteralWords(this.buffer, this.literalWordStartPosition, numWords);
}
/**
* For internal use. (One could use the non-static discharge method instead,
* but we expect them to be slower.)
*
* @param initialWord
* the initial word
* @param iterator
* the iterator
* @param container
* the container
*/
private static void discharge(final BufferedRunningLengthWord initialWord,
final EWAHIterator iterator, final BitmapStorage container) {
BufferedRunningLengthWord runningLengthWord = initialWord;
for (;;) {
final long runningLength = runningLengthWord.getRunningLength();
container.addStreamOfEmptyWords(runningLengthWord.getRunningBit(),
runningLength);
container.addStreamOfLiteralWords(iterator.buffer(), iterator.literalWords()
+ runningLengthWord.literalwordoffset,
runningLengthWord.getNumberOfLiteralWords());
if (!iterator.hasNext())
break;
runningLengthWord = new BufferedRunningLengthWord(iterator.next());
}
}
@Override
public IteratingBufferedRunningLengthWord clone() throws CloneNotSupportedException {
IteratingBufferedRunningLengthWord answer = (IteratingBufferedRunningLengthWord) super.clone();
answer.brlw = this.brlw.clone();
answer.buffer = this.buffer;
answer.iterator = this.iterator.clone();
answer.literalWordStartPosition = this.literalWordStartPosition;
return answer;
}
private BufferedRunningLengthWord brlw;
private long[] buffer;
private int literalWordStartPosition;
private EWAHIterator iterator;
}
javaewah-JavaEWAH-0.7.9/src/main/java/com/googlecode/javaewah/IteratingRLW.java 0000664 0000000 0000000 00000002265 12240435670 0027136 0 ustar 00root root 0000000 0000000 package com.googlecode.javaewah;
/*
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser
* Licensed under the Apache License, Version 2.0.
*/
/**
* High-level iterator over a compressed bitmap.
*
*/
public interface IteratingRLW {
/**
* @return whether there is more
*/
public boolean next() ;
/**
* @param index where the literal word is
* @return the literal word at the given index.
*/
public long getLiteralWordAt(int index);
/**
* @return the number of literal (non-fill) words
*/
public int getNumberOfLiteralWords() ;
/**
* @return the bit used for the fill bits
*/
public boolean getRunningBit() ;
/**
* @return sum of getRunningLength() and getNumberOfLiteralWords()
*/
public long size() ;
/**
* @return length of the run of fill words
*/
public long getRunningLength() ;
/**
* @param x the number of words to discard
*/
public void discardFirstWords(long x);
/**
* @return a copy of the iterator
* @throws CloneNotSupportedException this should not be thrown in theory
*/
public IteratingRLW clone() throws CloneNotSupportedException;
}
javaewah-JavaEWAH-0.7.9/src/main/java/com/googlecode/javaewah/IteratorAggregation.java 0000664 0000000 0000000 00000043354 12240435670 0030570 0 ustar 00root root 0000000 0000000 package com.googlecode.javaewah;
import java.util.Arrays;
import java.util.Iterator;
import java.util.LinkedList;
/*
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser
* Licensed under the Apache License, Version 2.0.
*/
/**
* Set of helper functions to aggregate bitmaps.
*
*/
public class IteratorAggregation {
/**
* @param x iterator to negate
* @return negated version of the iterator
*/
public static IteratingRLW not(final IteratingRLW x) {
return new IteratingRLW() {
@Override
public boolean next() {
return x.next();
}
@Override
public long getLiteralWordAt(int index) {
return ~x.getLiteralWordAt(index);
}
@Override
public int getNumberOfLiteralWords() {
return x.getNumberOfLiteralWords();
}
@Override
public boolean getRunningBit() {
return ! x.getRunningBit();
}
@Override
public long size() {
return x.size();
}
@Override
public long getRunningLength() {
return x.getRunningLength();
}
@Override
public void discardFirstWords(long y) {
x.discardFirstWords(y);
}
@Override
public IteratingRLW clone() throws CloneNotSupportedException {
throw new CloneNotSupportedException();
}
};
}
/**
* Aggregate the iterators using a bitmap buffer.
*
* @param al set of iterators to aggregate
* @return and aggregate
*/
public static IteratingRLW bufferedand(final IteratingRLW... al) {
return bufferedand(DEFAULTMAXBUFSIZE,al);
}
/**
* Aggregate the iterators using a bitmap buffer.
*
* @param al set of iterators to aggregate
* @param bufsize size of the internal buffer used by the iterator in 64-bit words (per input iterator)
* @return and aggregate
*/
public static IteratingRLW bufferedand(final int bufsize, final IteratingRLW... al) {
if (al.length == 0)
throw new IllegalArgumentException("Need at least one iterator");
if (al.length == 1)
return al[0];
final LinkedList basell = new LinkedList();
for (IteratingRLW i : al)
basell.add(i);
return new BufferedIterator(new BufferedAndIterator(basell,bufsize));
}
/**
* Aggregate the iterators using a bitmap buffer.
*
* @param al set of iterators to aggregate
* @return or aggregate
*/
public static IteratingRLW bufferedor(final IteratingRLW... al) {
return bufferedor(DEFAULTMAXBUFSIZE,al);
}
/**
* Aggregate the iterators using a bitmap buffer.
*
* @param al iterators to aggregate
* @param bufsize size of the internal buffer used by the iterator in 64-bit words
* @return or aggregate
*/
public static IteratingRLW bufferedor(final int bufsize, final IteratingRLW... al) {
if (al.length == 0)
throw new IllegalArgumentException("Need at least one iterator");
if (al.length == 1)
return al[0];
final LinkedList basell = new LinkedList();
for (IteratingRLW i : al)
basell.add(i);
return new BufferedIterator(new BufferedORIterator(basell,bufsize));
}
/**
* Aggregate the iterators using a bitmap buffer.
*
* @param al set of iterators to aggregate
* @return xor aggregate
*/
public static IteratingRLW bufferedxor(final IteratingRLW... al) {
return bufferedxor(DEFAULTMAXBUFSIZE,al);
}
/**
* Aggregate the iterators using a bitmap buffer.
*
* @param al iterators to aggregate
* @param bufsize size of the internal buffer used by the iterator in 64-bit words
* @return xor aggregate
*/
public static IteratingRLW bufferedxor(final int bufsize, final IteratingRLW... al) {
if (al.length == 0)
throw new IllegalArgumentException("Need at least one iterator");
if (al.length == 1)
return al[0];
final LinkedList basell = new LinkedList();
for (IteratingRLW i : al)
basell.add(i);
return new BufferedIterator(new BufferedXORIterator(basell, bufsize));
}
/**
* Write out the content of the iterator, but as if it were all zeros.
*
* @param container
* where we write
* @param i
* the iterator
*/
protected static void dischargeAsEmpty(final BitmapStorage container,
final IteratingRLW i) {
while (i.size() > 0) {
container.addStreamOfEmptyWords(false, i.size());
i.next();
}
}
/**
* Write out up to max words, returns how many were written
* @param container target for writes
* @param i source of data
* @param max maximal number of writes
* @return how many written
*/
protected static long discharge(final BitmapStorage container, IteratingRLW i, long max) {
long counter = 0;
while (i.size() > 0 && counter < max) {
long L1 = i.getRunningLength();
if (L1 > 0) {
if (L1 + counter > max)
L1 = max - counter;
container.addStreamOfEmptyWords(i.getRunningBit(), L1);
counter += L1;
}
long L = i.getNumberOfLiteralWords();
if(L + counter > max) L = max - counter;
for (int k = 0; k < L; ++k) {
container.add(i.getLiteralWordAt(k));
}
counter += L;
i.discardFirstWords(L+L1);
}
return counter;
}
/**
* Write out up to max negated words, returns how many were written
* @param container target for writes
* @param i source of data
* @param max maximal number of writes
* @return how many written
*/
protected static long dischargeNegated(final BitmapStorage container, IteratingRLW i, long max) {
long counter = 0;
while (i.size() > 0 && counter < max) {
long L1 = i.getRunningLength();
if (L1 > 0) {
if (L1 + counter > max)
L1 = max - counter;
container.addStreamOfEmptyWords(!i.getRunningBit(), L1);
counter += L1;
}
long L = i.getNumberOfLiteralWords();
if(L + counter > max) L = max - counter;
for (int k = 0; k < L; ++k) {
container.add(~i.getLiteralWordAt(k));
}
counter += L;
i.discardFirstWords(L+L1);
}
return counter;
}
static void andToContainer(final BitmapStorage container,
int desiredrlwcount, final IteratingRLW rlwi, IteratingRLW rlwj) {
while ((rlwi.size()>0) && (rlwj.size()>0) && (desiredrlwcount-- >0) ) {
while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) {
final boolean i_is_prey = rlwi.getRunningLength() < rlwj
.getRunningLength();
final IteratingRLW prey = i_is_prey ? rlwi : rlwj;
final IteratingRLW predator = i_is_prey ? rlwj
: rlwi;
if (predator.getRunningBit() == false) {
container.addStreamOfEmptyWords(false, predator.getRunningLength());
prey.discardFirstWords(predator.getRunningLength());
predator.discardFirstWords(predator.getRunningLength());
} else {
final long index = discharge(container, prey, predator.getRunningLength());
container.addStreamOfEmptyWords(false, predator.getRunningLength()
- index);
predator.discardFirstWords(predator.getRunningLength());
}
}
final int nbre_literal = Math.min(rlwi.getNumberOfLiteralWords(),
rlwj.getNumberOfLiteralWords());
if (nbre_literal > 0) {
desiredrlwcount -= nbre_literal;
for (int k = 0; k < nbre_literal; ++k)
container.add(rlwi.getLiteralWordAt(k) & rlwj.getLiteralWordAt(k));
rlwi.discardFirstWords(nbre_literal);
rlwj.discardFirstWords(nbre_literal);
}
}
}
static void andToContainer(final BitmapStorage container,
final IteratingRLW rlwi, IteratingRLW rlwj) {
while ((rlwi.size()>0) && (rlwj.size()>0) ) {
while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) {
final boolean i_is_prey = rlwi.getRunningLength() < rlwj
.getRunningLength();
final IteratingRLW prey = i_is_prey ? rlwi : rlwj;
final IteratingRLW predator = i_is_prey ? rlwj
: rlwi;
if (predator.getRunningBit() == false) {
container.addStreamOfEmptyWords(false, predator.getRunningLength());
prey.discardFirstWords(predator.getRunningLength());
predator.discardFirstWords(predator.getRunningLength());
} else {
final long index = discharge(container, prey, predator.getRunningLength());
container.addStreamOfEmptyWords(false, predator.getRunningLength()
- index);
predator.discardFirstWords(predator.getRunningLength());
}
}
final int nbre_literal = Math.min(rlwi.getNumberOfLiteralWords(),
rlwj.getNumberOfLiteralWords());
if (nbre_literal > 0) {
for (int k = 0; k < nbre_literal; ++k)
container.add(rlwi.getLiteralWordAt(k) & rlwj.getLiteralWordAt(k));
rlwi.discardFirstWords(nbre_literal);
rlwj.discardFirstWords(nbre_literal);
}
}
}
/**
* Compute the first few words of the XOR aggregate between two iterators.
*
* @param container where to write
* @param desiredrlwcount number of words to be written (max)
* @param rlwi first iterator to aggregate
* @param rlwj second iterator to aggregate
*/
public static void xorToContainer(final BitmapStorage container,
int desiredrlwcount, final IteratingRLW rlwi, final IteratingRLW rlwj) {
while ((rlwi.size()>0) && (rlwj.size()>0) && (desiredrlwcount-- >0) ) {
while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) {
final boolean i_is_prey = rlwi.getRunningLength() < rlwj
.getRunningLength();
final IteratingRLW prey = i_is_prey ? rlwi : rlwj;
final IteratingRLW predator = i_is_prey ? rlwj
: rlwi;
if (predator.getRunningBit() == false) {
long index = discharge(container, prey, predator.getRunningLength());
container.addStreamOfEmptyWords(false, predator.getRunningLength()
- index);
predator.discardFirstWords(predator.getRunningLength());
} else {
long index = dischargeNegated(container, prey, predator.getRunningLength());
container.addStreamOfEmptyWords(true, predator.getRunningLength()
- index);
predator.discardFirstWords(predator.getRunningLength());
}
}
final int nbre_literal = Math.min(rlwi.getNumberOfLiteralWords(),
rlwj.getNumberOfLiteralWords());
if (nbre_literal > 0) {
desiredrlwcount -= nbre_literal;
for (int k = 0; k < nbre_literal; ++k)
container.add(rlwi.getLiteralWordAt(k) ^ rlwj.getLiteralWordAt(k));
rlwi.discardFirstWords(nbre_literal);
rlwj.discardFirstWords(nbre_literal);
}
}
}
protected static int inplaceor(long[] bitmap,
IteratingRLW i) {
int pos = 0;
long s;
while ((s = i.size()) > 0) {
if (pos + s < bitmap.length) {
final int L = (int) i.getRunningLength();
if (i.getRunningBit())
java.util.Arrays.fill(bitmap, pos, pos + L, ~0l);
pos += L;
final int LR = i.getNumberOfLiteralWords();
for (int k = 0; k < LR; ++k)
bitmap[pos++] |= i.getLiteralWordAt(k);
if (!i.next()) {
return pos;
}
} else {
int howmany = bitmap.length - pos;
int L = (int) i.getRunningLength();
if (pos + L > bitmap.length) {
if (i.getRunningBit()) {
java.util.Arrays.fill(bitmap, pos, bitmap.length, ~0l);
}
i.discardFirstWords(howmany);
return bitmap.length;
}
if (i.getRunningBit())
java.util.Arrays.fill(bitmap, pos, pos + L, ~0l);
pos += L;
for (int k = 0; pos < bitmap.length; ++k)
bitmap[pos++] |= i.getLiteralWordAt(k);
i.discardFirstWords(howmany);
return pos;
}
}
return pos;
}
protected static int inplacexor(long[] bitmap,
IteratingRLW i) {
int pos = 0;
long s;
while ((s = i.size()) > 0) {
if (pos + s < bitmap.length) {
final int L = (int) i.getRunningLength();
if (i.getRunningBit()) {
for(int k = pos ; k < pos + L; ++k)
bitmap[k] = ~bitmap[k];
}
pos += L;
final int LR = i.getNumberOfLiteralWords();
for (int k = 0; k < LR; ++k)
bitmap[pos++] ^= i.getLiteralWordAt(k);
if (!i.next()) {
return pos;
}
} else {
int howmany = bitmap.length - pos;
int L = (int) i.getRunningLength();
if (pos + L > bitmap.length) {
if (i.getRunningBit()) {
for(int k = pos ; k < bitmap.length; ++k)
bitmap[k] = ~bitmap[k];
}
i.discardFirstWords(howmany);
return bitmap.length;
}
if (i.getRunningBit())
for(int k = pos ; k < pos + L; ++k)
bitmap[k] = ~bitmap[k];
pos += L;
for (int k = 0; pos < bitmap.length; ++k)
bitmap[pos++] ^= i.getLiteralWordAt(k);
i.discardFirstWords(howmany);
return pos;
}
}
return pos;
}
protected static int inplaceand(long[] bitmap,
IteratingRLW i) {
int pos = 0;
long s;
while ((s = i.size()) > 0) {
if (pos + s < bitmap.length) {
final int L = (int) i.getRunningLength();
if (!i.getRunningBit()) {
for(int k = pos ; k < pos + L; ++k)
bitmap[k] = 0;
}
pos += L;
final int LR = i.getNumberOfLiteralWords();
for (int k = 0; k < LR; ++k)
bitmap[pos++] &= i.getLiteralWordAt(k);
if (!i.next()) {
return pos;
}
} else {
int howmany = bitmap.length - pos;
int L = (int) i.getRunningLength();
if (pos + L > bitmap.length) {
if (!i.getRunningBit()) {
for(int k = pos ; k < bitmap.length; ++k)
bitmap[k] = 0;
}
i.discardFirstWords(howmany);
return bitmap.length;
}
if (!i.getRunningBit())
for(int k = pos ; k < pos + L; ++k)
bitmap[k] = 0;
pos += L;
for (int k = 0; pos < bitmap.length; ++k)
bitmap[pos++] &= i.getLiteralWordAt(k);
i.discardFirstWords(howmany);
return pos;
}
}
return pos;
}
/**
* An optimization option. Larger values may improve speed, but at
* the expense of memory.
*/
public final static int DEFAULTMAXBUFSIZE = 65536;
}
class BufferedORIterator implements CloneableIterator {
EWAHCompressedBitmap buffer = new EWAHCompressedBitmap();
long[] hardbitmap;
LinkedList ll;
int buffersize;
BufferedORIterator(LinkedList basell, int bufsize) {
this.ll = basell;
this.hardbitmap = new long[bufsize];
}
@Override
public BufferedXORIterator clone() throws CloneNotSupportedException {
BufferedXORIterator answer = (BufferedXORIterator) super.clone();
answer.buffer = this.buffer.clone();
answer.hardbitmap = this.hardbitmap.clone();
answer.ll = (LinkedList) this.ll.clone();
return answer;
}
@Override
public boolean hasNext() {
return !this.ll.isEmpty();
}
@Override
public EWAHIterator next() {
this.buffer.clear();
long effective = 0;
Iterator i = this.ll.iterator();
while (i.hasNext()) {
IteratingRLW rlw = i.next();
if (rlw.size() > 0) {
int eff = IteratorAggregation.inplaceor(this.hardbitmap, rlw);
if (eff > effective)
effective = eff;
} else
i.remove();
}
for (int k = 0; k < effective; ++k) {
this.buffer.add(this.hardbitmap[k]);
}
Arrays.fill(this.hardbitmap, 0);
return this.buffer.getEWAHIterator();
}
}
class BufferedXORIterator implements CloneableIterator {
EWAHCompressedBitmap buffer = new EWAHCompressedBitmap();
long[] hardbitmap;
LinkedList ll;
int buffersize;
BufferedXORIterator(LinkedList basell, int bufsize) {
this.ll = basell;
this.hardbitmap = new long[bufsize];
}
@Override
public BufferedXORIterator clone() throws CloneNotSupportedException {
BufferedXORIterator answer = (BufferedXORIterator) super.clone();
answer.buffer = this.buffer.clone();
answer.hardbitmap = this.hardbitmap.clone();
answer.ll = (LinkedList) this.ll.clone();
return answer;
}
@Override
public boolean hasNext() {
return !this.ll.isEmpty();
}
@Override
public EWAHIterator next() {
this.buffer.clear();
long effective = 0;
Iterator i = this.ll.iterator();
while (i.hasNext()) {
IteratingRLW rlw = i.next();
if (rlw.size() > 0) {
int eff = IteratorAggregation.inplacexor(this.hardbitmap, rlw);
if (eff > effective)
effective = eff;
} else
i.remove();
}
for (int k = 0; k < effective; ++k)
this.buffer.add(this.hardbitmap[k]);
Arrays.fill(this.hardbitmap, 0);
return this.buffer.getEWAHIterator();
}
}
class BufferedAndIterator implements CloneableIterator {
EWAHCompressedBitmap buffer = new EWAHCompressedBitmap();
LinkedList ll;
int buffersize;
public BufferedAndIterator(LinkedList basell, int bufsize) {
this.ll = basell;
this.buffersize = bufsize;
}
@Override
public boolean hasNext() {
return !this.ll.isEmpty();
}
@Override
public BufferedAndIterator clone() throws CloneNotSupportedException {
BufferedAndIterator answer = (BufferedAndIterator) super.clone();
answer.buffer = this.buffer.clone();
answer.ll = (LinkedList) this.ll.clone();
return answer;
}
@Override
public EWAHIterator next() {
this.buffer.clear();
IteratorAggregation.andToContainer(this.buffer, this.buffersize * this.ll.size(),
this.ll.get(0), this.ll.get(1));
if (this.ll.size() > 2) {
Iterator i = this.ll.iterator();
i.next();
i.next();
EWAHCompressedBitmap tmpbuffer = new EWAHCompressedBitmap();
while (i.hasNext() && this.buffer.sizeInBytes() > 0) {
IteratorAggregation.andToContainer(tmpbuffer,
this.buffer.getIteratingRLW(), i.next());
this.buffer.swap(tmpbuffer);
tmpbuffer.clear();
}
}
Iterator i = this.ll.iterator();
while(i.hasNext()) {
if(i.next().size() == 0) {
this.ll.clear();
break;
}
}
return this.buffer.getEWAHIterator();
}
}
javaewah-JavaEWAH-0.7.9/src/main/java/com/googlecode/javaewah/IteratorUtil.java 0000664 0000000 0000000 00000006606 12240435670 0027255 0 ustar 00root root 0000000 0000000 package com.googlecode.javaewah;
import java.util.Iterator;
/*
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser
* Licensed under the Apache License, Version 2.0.
*/
/**
* Convenience functions for working over iterators
*
*/
public class IteratorUtil {
/**
* @param i iterator we wish to iterate over
* @return an iterator over the set bits corresponding to the iterator
*/
public static IntIterator toSetBitsIntIterator(final IteratingRLW i) {
return new IntIteratorOverIteratingRLW(i);
}
/**
* @param i iterator we wish to iterate over
* @return an iterator over the set bits corresponding to the iterator
*/
public static Iterator toSetBitsIterator(final IteratingRLW i) {
return new Iterator() {
@Override
public boolean hasNext() {
return this.under.hasNext();
}
@Override
public Integer next() {
return new Integer(this.under.next());
}
@Override
public void remove() {
}
final private IntIterator under = toSetBitsIntIterator(i);
};
}
/**
* Generate a bitmap from an iterator
*
* @param i iterator we wish to materialize
* @param c where we write
*/
public static void materialize(final IteratingRLW i, final BitmapStorage c) {
while (true) {
if (i.getRunningLength() > 0) {
c.addStreamOfEmptyWords(i.getRunningBit(), i.getRunningLength());
}
for (int k = 0; k < i.getNumberOfLiteralWords(); ++k)
c.add(i.getLiteralWordAt(k));
if (!i.next())
break;
}
}
/**
* @param i iterator we wish to iterate over
* @return the cardinality (number of set bits) corresponding to the iterator
*/
public static int cardinality(final IteratingRLW i) {
int answer = 0;
while (true) {
if(i.getRunningBit()) answer += i.getRunningLength() * EWAHCompressedBitmap.wordinbits;
for (int k = 0; k < i.getNumberOfLiteralWords(); ++k)
answer += Long.bitCount(i.getLiteralWordAt(k));
if(!i.next()) break;
}
return answer;
}
/**
* @param x set of bitmaps
* @return an array of iterators corresponding to the array of bitmaps
*/
public static IteratingRLW[] toIterators(final EWAHCompressedBitmap... x) {
IteratingRLW[] X = new IteratingRLW[x.length];
for (int k = 0; k < X.length; ++k) {
X[k] = new IteratingBufferedRunningLengthWord(x[k]);
}
return X;
}
/**
* Turn an iterator into a bitmap.
*
* @param i iterator we wish to materialize
* @param c where we write
* @param Max maximum number of words we wish to materialize
* @return how many words were actually materialized
*/
public static long materialize(final IteratingRLW i, final BitmapStorage c, long Max) {
final long origMax = Max;
while (true) {
if (i.getRunningLength() > 0) {
long L = i.getRunningLength();
if(L > Max) L = Max;
c.addStreamOfEmptyWords(i.getRunningBit(), L);
Max -= L;
}
long L = i.getNumberOfLiteralWords();
for (int k = 0; k < L; ++k)
c.add(i.getLiteralWordAt(k));
if(Max>0) {
if (!i.next())
break;
}
else break;
}
return origMax - Max;
}
/**
* Turn an iterator into a bitmap
*
* @param i iterator we wish to materialize
* @return materialized version of the iterator
*/
public static EWAHCompressedBitmap materialize(final IteratingRLW i) {
EWAHCompressedBitmap ewah = new EWAHCompressedBitmap();
materialize(i, ewah);
return ewah;
}
}
javaewah-JavaEWAH-0.7.9/src/main/java/com/googlecode/javaewah/LogicalElement.java 0000664 0000000 0000000 00000002434 12240435670 0027505 0 ustar 00root root 0000000 0000000 package com.googlecode.javaewah;
/**
* A prototypical model for bitmaps. Used by the
* class FastAggregation. Users should probably not
* be concerned by this class.
*
* @author Daniel Lemire
* @param the type of element (e.g., a bitmap class)
*
*/
public interface LogicalElement {
/**
* Compute the bitwise logical and
* @param le element
* @return the result of the operation
*/
public T and(T le);
/**
* Compute the bitwise logical and not
* @param le element
* @return the result of the operation
*/
public T andNot(T le);
/**
* Compute the bitwise logical not (in place)
*/
public void not();
@SuppressWarnings({ "rawtypes", "javadoc" })
/**
* Compute the bitwise logical or
* @param le another element
* @return the result of the operation
*/
public LogicalElement or(T le);
/**
* How many logical bits does this element represent?
*
* @return the number of bits represented by this element
*/
public int sizeInBits();
/**
* Should report the storage requirement
* @return How many bytes
* @since 0.6.2
*/
public int sizeInBytes();
/**
* Compute the bitwise logical Xor
* @param le element
* @return the results of the operation
*/
public T xor(T le);
}
javaewah-JavaEWAH-0.7.9/src/main/java/com/googlecode/javaewah/NonEmptyVirtualStorage.java 0000664 0000000 0000000 00000004455 12240435670 0031273 0 ustar 00root root 0000000 0000000 package com.googlecode.javaewah;
/*
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser
* Licensed under the Apache License, Version 2.0.
*/
/**
* This is a BitmapStorage that can be used to determine quickly if the result
* of an operation is non-trivial... that is, whether there will be at least on
* set bit.
*
* @since 0.4.2
* @author Daniel Lemire and Veronika Zenz
*
*/
public class NonEmptyVirtualStorage implements BitmapStorage {
static class NonEmptyException extends RuntimeException {
private static final long serialVersionUID = 1L;
/**
* Do not fill in the stack trace for this exception
* for performance reasons.
*
* @return this instance
* @see java.lang.Throwable#fillInStackTrace()
*/
@Override
public synchronized Throwable fillInStackTrace() {
return this;
}
}
private static final NonEmptyException nonEmptyException = new NonEmptyException();
/**
* If the word to be added is non-zero, a NonEmptyException exception is
* thrown.
*
* @see com.googlecode.javaewah.BitmapStorage#add(long)
*/
@Override
public void add(long newdata) {
if (newdata != 0)
throw nonEmptyException;
return;
}
/**
* throws a NonEmptyException exception when number is greater than 0
*
*/
@Override
public void addStreamOfLiteralWords(long[] data, int start, int number) {
if(number>0){
throw nonEmptyException;
}
}
/**
* If the boolean value is true and number is greater than 0, then it throws a NonEmptyException exception,
* otherwise, nothing happens.
*
* @see com.googlecode.javaewah.BitmapStorage#addStreamOfEmptyWords(boolean, long)
*/
@Override
public void addStreamOfEmptyWords(boolean v, long number) {
if (v && (number>0))
throw nonEmptyException;
return;
}
/**
* throws a NonEmptyException exception when number is greater than 0
*
*/
@Override
public void addStreamOfNegatedLiteralWords(long[] data, int start, int number) {
if(number>0){
throw nonEmptyException;
}
}
/**
* Does nothing.
*
* @see com.googlecode.javaewah.BitmapStorage#setSizeInBits(int)
*/
@Override
public void setSizeInBits(int bits) {
}
}
javaewah-JavaEWAH-0.7.9/src/main/java/com/googlecode/javaewah/RunningLengthWord.java 0000664 0000000 0000000 00000011446 12240435670 0030242 0 ustar 00root root 0000000 0000000 package com.googlecode.javaewah;
/*
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser
* Licensed under the Apache License, Version 2.0.
*/
/**
* Mostly for internal use.
*
* @since 0.1.0
* @author Daniel Lemire
*/
public final class RunningLengthWord implements Cloneable {
/**
* Instantiates a new running length word.
*
* @param a
* an array of 64-bit words
* @param p
* position in the array where the running length word is
* located.
*/
RunningLengthWord(final EWAHCompressedBitmap a, final int p) {
this.parent = a;
this.position = p;
}
/**
* Gets the number of literal words.
*
* @return the number of literal words
*/
public int getNumberOfLiteralWords() {
return (int) (this.parent.buffer[this.position] >>> (1 + runninglengthbits));
}
/**
* Gets the running bit.
*
* @return the running bit
*/
public boolean getRunningBit() {
return (this.parent.buffer[this.position] & 1) != 0;
}
/**
* Gets the running length.
*
* @return the running length
*/
public long getRunningLength() {
return (this.parent.buffer[this.position] >>> 1)
& largestrunninglengthcount;
}
/**
* Sets the number of literal words.
*
* @param number
* the new number of literal words
*/
public void setNumberOfLiteralWords(final long number) {
this.parent.buffer[this.position] |= notrunninglengthplusrunningbit;
this.parent.buffer[this.position] &= (number << (runninglengthbits + 1))
| runninglengthplusrunningbit;
}
/**
* Sets the running bit.
*
* @param b
* the new running bit
*/
public void setRunningBit(final boolean b) {
if (b)
this.parent.buffer[this.position] |= 1l;
else
this.parent.buffer[this.position] &= ~1l;
}
/**
* Sets the running length.
*
* @param number
* the new running length
*/
public void setRunningLength(final long number) {
this.parent.buffer[this.position] |= shiftedlargestrunninglengthcount;
this.parent.buffer[this.position] &= (number << 1)
| notshiftedlargestrunninglengthcount;
}
/**
* Return the size in uncompressed words represented by this running
* length word.
*
* @return the size
*/
public long size() {
return getRunningLength() + getNumberOfLiteralWords();
}
/*
* @see java.lang.Object#toString()
*/
@Override
public String toString() {
return "running bit = " + getRunningBit()
+ " running length = " + getRunningLength()
+ " number of lit. words " + getNumberOfLiteralWords();
}
@Override
public RunningLengthWord clone() throws CloneNotSupportedException {
RunningLengthWord answer;
answer = (RunningLengthWord) super.clone();
answer.parent = this.parent;
answer.position = this.position;
return answer;
}
/** The array of words. */
public EWAHCompressedBitmap parent;
/** The position in array. */
public int position;
/**
* number of bits dedicated to marking of the running length of clean
* words
*/
public static final int runninglengthbits = 32;
private static final int literalbits = 64 - 1 - runninglengthbits;
/** largest number of literal words in a run. */
public static final int largestliteralcount = (1 << literalbits) - 1;
/** largest number of clean words in a run */
public static final long largestrunninglengthcount = (1l << runninglengthbits) - 1;
private static final long runninglengthplusrunningbit = (1l << (runninglengthbits + 1)) - 1;
private static final long shiftedlargestrunninglengthcount = largestrunninglengthcount << 1;
private static final long notrunninglengthplusrunningbit = ~runninglengthplusrunningbit;
private static final long notshiftedlargestrunninglengthcount = ~shiftedlargestrunninglengthcount;
} javaewah-JavaEWAH-0.7.9/src/main/java/com/googlecode/javaewah/benchmark/ 0000775 0000000 0000000 00000000000 12240435670 0025705 5 ustar 00root root 0000000 0000000 javaewah-JavaEWAH-0.7.9/src/main/java/com/googlecode/javaewah/benchmark/Benchmark.java 0000664 0000000 0000000 00000021563 12240435670 0030451 0 ustar 00root root 0000000 0000000 package com.googlecode.javaewah.benchmark;
/*
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser
* Licensed under the Apache License, Version 2.0.
*/
import java.text.DecimalFormat;
import java.util.Arrays;
import java.util.List;
import com.googlecode.javaewah.EWAHCompressedBitmap;
import com.googlecode.javaewah.FastAggregation;
import com.googlecode.javaewah.IntIterator;
import com.googlecode.javaewah.IteratingRLW;
import com.googlecode.javaewah.IteratorAggregation;
import com.googlecode.javaewah.IteratorUtil;
/**
* This class is used to benchmark the performance EWAH.
*
* @author Daniel Lemire
*/
public class Benchmark {
/**
* Compute the union between two sorted arrays
* @param set1 first sorted array
* @param set2 second sorted array
* @return merged array
*/
static public int[] unite2by2(final int[] set1, final int[] set2) {
int pos = 0;
int k1 = 0, k2 = 0;
if (0 == set1.length)
return Arrays.copyOf(set2, set2.length);
if (0 == set2.length)
return Arrays.copyOf(set1, set1.length);
int[] buffer = new int[set1.length + set2.length];
while (true) {
if (set1[k1] < set2[k2]) {
buffer[pos++] = set1[k1];
++k1;
if (k1 >= set1.length) {
for (; k2 < set2.length; ++k2)
buffer[pos++] = set2[k2];
break;
}
} else if (set1[k1] == set2[k2]) {
buffer[pos++] = set1[k1];
++k1;
++k2;
if (k1 >= set1.length) {
for (; k2 < set2.length; ++k2)
buffer[pos++] = set2[k2];
break;
}
if (k2 >= set2.length) {
for (; k1 < set1.length; ++k1)
buffer[pos++] = set1[k1];
break;
}
} else {// if (set1[k1]>set2[k2]) {
buffer[pos++] = set2[k2];
++k2;
if (k2 >= set2.length) {
for (; k1 < set1.length; ++k1)
buffer[pos++] = set1[k1];
break;
}
}
}
return Arrays.copyOf(buffer, pos);
}
@SuppressWarnings("javadoc")
public static void main(String args[]) {
//test(2, 24, 1);
test(100, 16, 1);
}
@SuppressWarnings("javadoc")
public static void test(int N, int nbr, int repeat) {
DecimalFormat df = new DecimalFormat("0.###");
ClusteredDataGenerator cdg = new ClusteredDataGenerator();
for (int sparsity = 1; sparsity < 30 - nbr; sparsity += 2) {
long bogus = 0;
String line = "";
long bef, aft;
line += sparsity;
int[][] data = new int[N][];
int Max = (1 << (nbr + sparsity));
System.out.println("# generating random data...");
int[] inter = cdg.generateClustered(1 << (nbr/2), Max);
for (int k = 0; k < N; ++k)
data[k] = unite2by2(cdg.generateClustered(1 << nbr, Max),inter);
System.out.println("# generating random data... ok.");
// building
bef = System.currentTimeMillis();
EWAHCompressedBitmap[] ewah = new EWAHCompressedBitmap[N];
int size = 0;
for (int r = 0; r < repeat; ++r) {
size = 0;
for (int k = 0; k < N; ++k) {
ewah[k] = new EWAHCompressedBitmap();
for (int x = 0; x < data[k].length; ++x) {
ewah[k].set(data[k][x]);
}
size += ewah[k].sizeInBytes();
}
}
aft = System.currentTimeMillis();
line += "\t" + size;
line += "\t" + df.format((aft - bef) / 1000.0);
// uncompressing
bef = System.currentTimeMillis();
for (int r = 0; r < repeat; ++r)
for (int k = 0; k < N; ++k) {
int[] array = ewah[k].toArray();
bogus += array.length;
}
aft = System.currentTimeMillis();
line += "\t" + df.format((aft - bef) / 1000.0);
// uncompressing
bef = System.currentTimeMillis();
for (int r = 0; r < repeat; ++r)
for (int k = 0; k < N; ++k) {
int[] array = new int[ewah[k].cardinality()];
int c = 0;
for (int x : ewah[k])
array[c++] = x;
}
aft = System.currentTimeMillis();
line += "\t" + df.format((aft - bef) / 1000.0);
// uncompressing
bef = System.currentTimeMillis();
for (int r = 0; r < repeat; ++r)
for (int k = 0; k < N; ++k) {
List L = ewah[k].getPositions();
int[] array = new int[L.size()];
int c = 0;
for (int x : L)
array[c++] = x;
}
aft = System.currentTimeMillis();
line += "\t" + df.format((aft - bef) / 1000.0);
// uncompressing
bef = System.currentTimeMillis();
for (int r = 0; r < repeat; ++r)
for (int k = 0; k < N; ++k) {
IntIterator iter = ewah[k].intIterator();
while (iter.hasNext()) {
bogus += iter.next();
}
}
aft = System.currentTimeMillis();
line += "\t" + df.format((aft - bef) / 1000.0);
line += "\t\t\t";
// logical or
bef = System.currentTimeMillis();
for (int r = 0; r < repeat; ++r)
for (int k = 0; k < N; ++k) {
EWAHCompressedBitmap ewahor = ewah[0];
for (int j = 1; j < k + 1; ++j) {
ewahor = ewahor.or(ewah[j]);
}
bogus += ewahor.sizeInBits();
}
aft = System.currentTimeMillis();
line += "\t" + df.format((aft - bef) / 1000.0);
// fast logical or
bef = System.currentTimeMillis();
for (int r = 0; r < repeat; ++r)
for (int k = 0; k < N; ++k) {
EWAHCompressedBitmap[] ewahcp = new EWAHCompressedBitmap[k + 1];
for (int j = 0; j < k + 1; ++j) {
ewahcp[j] = ewah[j];
}
EWAHCompressedBitmap ewahor = EWAHCompressedBitmap
.or(ewahcp);
bogus += ewahor.sizeInBits();
}
aft = System.currentTimeMillis();
line += "\t" + df.format((aft - bef) / 1000.0);
// fast logical or
bef = System.currentTimeMillis();
for (int r = 0; r < repeat; ++r)
for (int k = 0; k < N; ++k) {
EWAHCompressedBitmap[] ewahcp = new EWAHCompressedBitmap[k + 1];
for (int j = 0; j < k + 1; ++j) {
ewahcp[j] = ewah[j];
}
EWAHCompressedBitmap ewahor = FastAggregation.or(ewahcp);
bogus += ewahor.sizeInBits();
}
aft = System.currentTimeMillis();
line += "\t" + df.format((aft - bef) / 1000.0);
// fast logical or
// run sanity check
for (int k = 0; k < N; ++k) {
IteratingRLW[] ewahcp = new IteratingRLW[k + 1];
for (int j = 0; j < k + 1; ++j) {
ewahcp[j] = ewah[j].getIteratingRLW();
}
IteratingRLW ewahor = IteratorAggregation.bufferedor(ewahcp);
EWAHCompressedBitmap ewahorp = EWAHCompressedBitmap.or(Arrays.copyOf(ewah, k+1));
EWAHCompressedBitmap mewahor = IteratorUtil.materialize(ewahor);
if(!ewahorp.equals(mewahor)) throw new RuntimeException("bug");
}
bef = System.currentTimeMillis();
for (int r = 0; r < repeat; ++r)
for (int k = 0; k < N; ++k) {
IteratingRLW[] ewahcp = new IteratingRLW[k + 1];
for (int j = 0; j < k + 1; ++j) {
ewahcp[j] = ewah[j].getIteratingRLW();
}
IteratingRLW ewahor = IteratorAggregation.bufferedor(ewahcp);
bogus += IteratorUtil.materialize(ewahor).sizeInBits();
}
aft = System.currentTimeMillis();
line += "\t" + df.format((aft - bef) / 1000.0);
line += "\t\t\t";
// logical and
bef = System.currentTimeMillis();
for (int r = 0; r < repeat; ++r)
for (int k = 0; k < N; ++k) {
EWAHCompressedBitmap ewahand = ewah[0];
for (int j = 1; j < k + 1; ++j) {
ewahand = ewahand.and(ewah[j]);
}
bogus += ewahand.sizeInBits();
}
aft = System.currentTimeMillis();
line += "\t" + df.format((aft - bef) / 1000.0);
// fast logical and
bef = System.currentTimeMillis();
for (int r = 0; r < repeat; ++r)
for (int k = 0; k < N; ++k) {
EWAHCompressedBitmap[] ewahcp = new EWAHCompressedBitmap[k + 1];
for (int j = 0; j < k + 1; ++j) {
ewahcp[j] = ewah[j];
}
EWAHCompressedBitmap ewahand = EWAHCompressedBitmap
.and(ewahcp);
bogus += ewahand.sizeInBits();
}
aft = System.currentTimeMillis();
line += "\t" + df.format((aft - bef) / 1000.0);
for (int k = 0; k < N; ++k) {
IteratingRLW[] ewahcp = new IteratingRLW[k + 1];
for (int j = 0; j < k + 1; ++j) {
ewahcp[j] = ewah[j].getIteratingRLW();
}
IteratingRLW ewahand = IteratorAggregation.bufferedand(ewahcp);
EWAHCompressedBitmap ewahandp = EWAHCompressedBitmap.and(Arrays.copyOf(ewah, k+1));
EWAHCompressedBitmap mewahand = IteratorUtil.materialize(ewahand);
if(!ewahandp.equals(mewahand)) throw new RuntimeException("bug");
}
// fast logical and
bef = System.currentTimeMillis();
for (int r = 0; r < repeat; ++r)
for (int k = 0; k < N; ++k) {
IteratingRLW[] ewahcp = new IteratingRLW[k + 1];
for (int j = 0; j < k + 1; ++j) {
ewahcp[j] = ewah[j].getIteratingRLW();
}
IteratingRLW ewahand = IteratorAggregation.bufferedand(ewahcp);
bogus += IteratorUtil.materialize(ewahand).sizeInBits();
}
aft = System.currentTimeMillis();
line += "\t" + df.format((aft - bef) / 1000.0);
System.out
.println("time for building, toArray(), Java iterator, intIterator,\t\t\t logical or (2-by-2), logical or (grouped), FastAggregation.or, iterator-based or, \t\t\t (2-by-2) and, logical and (grouped), iterator-based and");
System.out.println(line);
System.out.println("# bogus =" + bogus);
}
}
}
javaewah-JavaEWAH-0.7.9/src/main/java/com/googlecode/javaewah/benchmark/Benchmark32.java 0000664 0000000 0000000 00000015446 12240435670 0030621 0 ustar 00root root 0000000 0000000 package com.googlecode.javaewah.benchmark;
/*
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser
* Licensed under the Apache License, Version 2.0.
*/
import java.text.DecimalFormat;
import java.util.List;
import com.googlecode.javaewah32.EWAHCompressedBitmap32;
import com.googlecode.javaewah.FastAggregation;
import com.googlecode.javaewah.IntIterator;
import com.googlecode.javaewah32.IteratingRLW32;
import com.googlecode.javaewah32.IteratorAggregation32;
import com.googlecode.javaewah32.IteratorUtil32;
/**
* This class is used to benchmark the performance EWAH.
*
* @author Daniel Lemire
*/
public class Benchmark32 {
@SuppressWarnings("javadoc")
public static void main(String args[]) {
test(100, 16, 1);
// test(2, 24, 1);
}
@SuppressWarnings("javadoc")
public static void test(int N, int nbr, int repeat) {
DecimalFormat df = new DecimalFormat("0.###");
ClusteredDataGenerator cdg = new ClusteredDataGenerator();
for (int sparsity = 1; sparsity < 30 - nbr; sparsity += 2) {
long bogus = 0;
String line = "";
long bef, aft;
line += sparsity;
int[][] data = new int[N][];
int Max = (1 << (nbr + sparsity));
System.out.println("# generating random data...");
int[] inter = cdg.generateClustered(1 << (nbr/2), Max);
for (int k = 0; k < N; ++k)
data[k] = Benchmark.unite2by2(cdg.generateClustered(1 << nbr, Max),inter);
System.out.println("# generating random data... ok.");
// building
bef = System.currentTimeMillis();
EWAHCompressedBitmap32[] ewah = new EWAHCompressedBitmap32[N];
int size = 0;
for (int r = 0; r < repeat; ++r) {
size = 0;
for (int k = 0; k < N; ++k) {
ewah[k] = new EWAHCompressedBitmap32();
for (int x = 0; x < data[k].length; ++x) {
ewah[k].set(data[k][x]);
}
size += ewah[k].sizeInBytes();
}
}
aft = System.currentTimeMillis();
line += "\t" + size;
line += "\t" + df.format((aft - bef) / 1000.0);
// uncompressing
bef = System.currentTimeMillis();
for (int r = 0; r < repeat; ++r)
for (int k = 0; k < N; ++k) {
int[] array = ewah[k].toArray();
bogus += array.length;
}
aft = System.currentTimeMillis();
line += "\t" + df.format((aft - bef) / 1000.0);
// uncompressing
bef = System.currentTimeMillis();
for (int r = 0; r < repeat; ++r)
for (int k = 0; k < N; ++k) {
int[] array = new int[ewah[k].cardinality()];
int c = 0;
for (int x : ewah[k])
array[c++] = x;
}
aft = System.currentTimeMillis();
line += "\t" + df.format((aft - bef) / 1000.0);
// uncompressing
bef = System.currentTimeMillis();
for (int r = 0; r < repeat; ++r)
for (int k = 0; k < N; ++k) {
List L = ewah[k].getPositions();
int[] array = new int[L.size()];
int c = 0;
for (int x : L)
array[c++] = x;
}
aft = System.currentTimeMillis();
line += "\t" + df.format((aft - bef) / 1000.0);
// uncompressing
bef = System.currentTimeMillis();
for (int r = 0; r < repeat; ++r)
for (int k = 0; k < N; ++k) {
IntIterator iter = ewah[k].intIterator();
while (iter.hasNext()) {
bogus += iter.next();
}
}
aft = System.currentTimeMillis();
line += "\t" + df.format((aft - bef) / 1000.0);
line += "\t\t\t";
// logical or
bef = System.currentTimeMillis();
for (int r = 0; r < repeat; ++r)
for (int k = 0; k < N; ++k) {
EWAHCompressedBitmap32 ewahor = ewah[0];
for (int j = 1; j < k + 1; ++j) {
ewahor = ewahor.or(ewah[j]);
}
bogus += ewahor.sizeInBits();
}
aft = System.currentTimeMillis();
line += "\t" + df.format((aft - bef) / 1000.0);
// fast logical or
bef = System.currentTimeMillis();
for (int r = 0; r < repeat; ++r)
for (int k = 0; k < N; ++k) {
EWAHCompressedBitmap32[] ewahcp = new EWAHCompressedBitmap32[k + 1];
for (int j = 0; j < k + 1; ++j) {
ewahcp[j] = ewah[j];
}
EWAHCompressedBitmap32 ewahor = EWAHCompressedBitmap32
.or(ewahcp);
bogus += ewahor.sizeInBits();
}
aft = System.currentTimeMillis();
line += "\t" + df.format((aft - bef) / 1000.0);
// fast logical or
bef = System.currentTimeMillis();
for (int r = 0; r < repeat; ++r)
for (int k = 0; k < N; ++k) {
EWAHCompressedBitmap32[] ewahcp = new EWAHCompressedBitmap32[k + 1];
for (int j = 0; j < k + 1; ++j) {
ewahcp[j] = ewah[j];
}
EWAHCompressedBitmap32 ewahor = FastAggregation.or(ewahcp);
bogus += ewahor.sizeInBits();
}
aft = System.currentTimeMillis();
line += "\t" + df.format((aft - bef) / 1000.0);
// fast logical or
bef = System.currentTimeMillis();
for (int r = 0; r < repeat; ++r)
for (int k = 0; k < N; ++k) {
IteratingRLW32[] ewahcp = new IteratingRLW32[k + 1];
for (int j = 0; j < k + 1; ++j) {
ewahcp[j] = ewah[j].getIteratingRLW();
}
IteratingRLW32 ewahor = IteratorAggregation32.bufferedor(ewahcp);
bogus += IteratorUtil32.materialize(ewahor).sizeInBits();
}
aft = System.currentTimeMillis();
line += "\t" + df.format((aft - bef) / 1000.0);
line += "\t\t\t";
// logical and
bef = System.currentTimeMillis();
for (int r = 0; r < repeat; ++r)
for (int k = 0; k < N; ++k) {
EWAHCompressedBitmap32 ewahand = ewah[0];
for (int j = 1; j < k + 1; ++j) {
ewahand = ewahand.and(ewah[j]);
}
bogus += ewahand.sizeInBits();
}
aft = System.currentTimeMillis();
line += "\t" + df.format((aft - bef) / 1000.0);
// fast logical and
bef = System.currentTimeMillis();
for (int r = 0; r < repeat; ++r)
for (int k = 0; k < N; ++k) {
EWAHCompressedBitmap32[] ewahcp = new EWAHCompressedBitmap32[k + 1];
for (int j = 0; j < k + 1; ++j) {
ewahcp[j] = ewah[j];
}
EWAHCompressedBitmap32 ewahand = EWAHCompressedBitmap32
.and(ewahcp);
bogus += ewahand.sizeInBits();
}
aft = System.currentTimeMillis();
line += "\t" + df.format((aft - bef) / 1000.0);
// fast logical and
bef = System.currentTimeMillis();
for (int r = 0; r < repeat; ++r)
for (int k = 0; k < N; ++k) {
IteratingRLW32[] ewahcp = new IteratingRLW32[k + 1];
for (int j = 0; j < k + 1; ++j) {
ewahcp[j] = ewah[j].getIteratingRLW();
}
IteratingRLW32 ewahand = IteratorAggregation32.bufferedand(ewahcp);
bogus += IteratorUtil32.materialize(ewahand).sizeInBits();
}
aft = System.currentTimeMillis();
line += "\t" + df.format((aft - bef) / 1000.0);
System.out
.println("time for building, toArray(), Java iterator, intIterator,\t\t\t logical or (2-by-2), logical or (grouped), FastAggregation.or, iterator-based or, \t\t\t (2-by-2) and, logical and (grouped), iterator-based and");
System.out.println(line);
System.out.println("# bogus =" + bogus);
}
}
}
javaewah-JavaEWAH-0.7.9/src/main/java/com/googlecode/javaewah/benchmark/BenchmarkIntersection.java 0000664 0000000 0000000 00000007744 12240435670 0033045 0 ustar 00root root 0000000 0000000 package com.googlecode.javaewah.benchmark;
import java.text.DecimalFormat;
import com.googlecode.javaewah.*;
/*
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser
* Licensed under the Apache License, Version 2.0.
*/
/**
* To benchmark the logical and (intersection) aggregate.
*/
public class BenchmarkIntersection {
@SuppressWarnings("javadoc")
public static void main(String args[]) {
test(10, 18, 1);
}
@SuppressWarnings({ "javadoc"})
public static void test(int N, int nbr, int repeat) {
long bogus = 0;
DecimalFormat df = new DecimalFormat("0.###");
ClusteredDataGenerator cdg = new ClusteredDataGenerator();
for (int sparsity = 1; sparsity < 30 - nbr; sparsity++) {
for (int times = 0; times < 2; ++times) {
String line = "";
long bef, aft;
line += sparsity;
int[][] data = new int[N][];
int Max = (1 << (nbr + sparsity));
int[] inter = cdg.generateClustered(1 << (nbr/2), Max);
for (int k = 0; k < N; ++k)
data[k] = Benchmark.unite2by2(cdg.generateClustered(1 << nbr, Max),inter);
// building
EWAHCompressedBitmap[] ewah = new EWAHCompressedBitmap[N];
for (int k = 0; k < N; ++k) {
ewah[k] = new EWAHCompressedBitmap();
for (int x = 0; x < data[k].length; ++x) {
ewah[k].set(data[k][x]);
}
data[k] = null;
}
// sanity check
if (true) {
EWAHCompressedBitmap answer = ewah[0].and(ewah[1]);
for (int k = 2; k < ewah.length; ++k)
answer = answer.and(ewah[k]);
EWAHCompressedBitmap ewahand = EWAHCompressedBitmap.and(ewah);
if (!answer.equals(ewahand))
throw new RuntimeException(
"bug EWAHCompressedBitmap.and");
EWAHCompressedBitmap ewahand2 = FastAggregation
.bufferedand(65536,ewah);
if (!ewahand.equals(ewahand2))
throw new RuntimeException(
"bug FastAggregation.bufferedand ");
}
// logical or
bef = System.currentTimeMillis();
for (int r = 0; r < repeat; ++r)
for (int k = 0; k < N; ++k) {
EWAHCompressedBitmap ewahor = ewah[0];
for (int j = 1; j < k + 1; ++j) {
ewahor = ewahor.and(ewah[j]);
}
}
aft = System.currentTimeMillis();
line += "\t" + df.format((aft - bef) / 1000.0);
// fast logical or
bef = System.currentTimeMillis();
for (int r = 0; r < repeat; ++r)
for (int k = 0; k < N; ++k) {
EWAHCompressedBitmap[] ewahcp = new EWAHCompressedBitmap[k + 1];
for (int j = 0; j < k + 1; ++j) {
ewahcp[j] = ewah[j];
}
EWAHCompressedBitmap ewahor = EWAHCompressedBitmap
.and(ewahcp);
bogus += ewahor.sizeInBits();
}
aft = System.currentTimeMillis();
line += "\t" + df.format((aft - bef) / 1000.0);
// fast logical or
bef = System.currentTimeMillis();
for (int r = 0; r < repeat; ++r)
for (int k = 0; k < N; ++k) {
EWAHCompressedBitmap[] ewahcp = new EWAHCompressedBitmap[k + 1];
for (int j = 0; j < k + 1; ++j) {
ewahcp[j] = ewah[j];
}
EWAHCompressedBitmap ewahor = FastAggregation
.bufferedand(65536,ewahcp);
bogus += ewahor.sizeInBits();
}
aft = System.currentTimeMillis();
line += "\t" + df.format((aft - bef) / 1000.0);
// fast logical or
bef = System.currentTimeMillis();
for (int r = 0; r < repeat; ++r)
for (int k = 0; k < N; ++k) {
IteratingRLW[] ewahcp = new IteratingRLW[k + 1];
for (int j = 0; j < k + 1; ++j) {
ewahcp[j] = new IteratingBufferedRunningLengthWord(
ewah[j]);
}
IteratingRLW ewahor = IteratorAggregation.bufferedand(ewahcp);
int wordcounter = IteratorUtil.cardinality(ewahor);
bogus += wordcounter;
}
aft = System.currentTimeMillis();
line += "\t" + df.format((aft - bef) / 1000.0);
System.out
.println("# times for: 2by2 EWAHCompressedBitmap.and bufferedand iterator-bufferedand");
System.out.println(line);
}
System.out.println("# bogus =" + bogus);
}
}
}
javaewah-JavaEWAH-0.7.9/src/main/java/com/googlecode/javaewah/benchmark/BenchmarkIntersection32.java0000664 0000000 0000000 00000010027 12240435670 0033176 0 ustar 00root root 0000000 0000000 package com.googlecode.javaewah.benchmark;
import java.text.DecimalFormat;
import com.googlecode.javaewah32.*;
/*
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser
* Licensed under the Apache License, Version 2.0.
*/
/**
* To benchmark the logical and (intersection) aggregate.
*/
public class BenchmarkIntersection32 {
@SuppressWarnings("javadoc")
public static void main(String args[]) {
test(10, 18, 1);
}
@SuppressWarnings({ "javadoc" })
public static void test(int N, int nbr, int repeat) {
long bogus = 0;
DecimalFormat df = new DecimalFormat("0.###");
ClusteredDataGenerator cdg = new ClusteredDataGenerator();
for (int sparsity = 1; sparsity < 30 - nbr; sparsity++) {
for (int times = 0; times < 2; ++times) {
String line = "";
long bef, aft;
line += sparsity;
int[][] data = new int[N][];
int Max = (1 << (nbr + sparsity));
int[] inter = cdg.generateClustered(1 << (nbr/2), Max);
for (int k = 0; k < N; ++k)
data[k] = Benchmark.unite2by2(cdg.generateClustered(1 << nbr, Max),inter);
// building
EWAHCompressedBitmap32[] ewah = new EWAHCompressedBitmap32[N];
for (int k = 0; k < N; ++k) {
ewah[k] = new EWAHCompressedBitmap32();
for (int x = 0; x < data[k].length; ++x) {
ewah[k].set(data[k][x]);
}
data[k] = null;
}
// sanity check
if (true) {
EWAHCompressedBitmap32 answer = ewah[0].and(ewah[1]);
for (int k = 2; k < ewah.length; ++k)
answer = answer.and(ewah[k]);
EWAHCompressedBitmap32 ewahand = EWAHCompressedBitmap32.and(ewah);
if (!answer.equals(ewahand))
throw new RuntimeException(
"bug EWAHCompressedBitmap.and");
EWAHCompressedBitmap32 ewahand2 = FastAggregation32
.bufferedand(65536,ewah);
if (!ewahand.equals(ewahand2))
throw new RuntimeException(
"bug FastAggregation.bufferedand ");
}
// logical or
bef = System.currentTimeMillis();
for (int r = 0; r < repeat; ++r)
for (int k = 0; k < N; ++k) {
EWAHCompressedBitmap32 ewahor = ewah[0];
for (int j = 1; j < k + 1; ++j) {
ewahor = ewahor.and(ewah[j]);
}
}
aft = System.currentTimeMillis();
line += "\t" + df.format((aft - bef) / 1000.0);
// fast logical or
bef = System.currentTimeMillis();
for (int r = 0; r < repeat; ++r)
for (int k = 0; k < N; ++k) {
EWAHCompressedBitmap32[] ewahcp = new EWAHCompressedBitmap32[k + 1];
for (int j = 0; j < k + 1; ++j) {
ewahcp[j] = ewah[j];
}
EWAHCompressedBitmap32 ewahor = EWAHCompressedBitmap32
.and(ewahcp);
bogus += ewahor.sizeInBits();
}
aft = System.currentTimeMillis();
line += "\t" + df.format((aft - bef) / 1000.0);
// fast logical or
bef = System.currentTimeMillis();
for (int r = 0; r < repeat; ++r)
for (int k = 0; k < N; ++k) {
EWAHCompressedBitmap32[] ewahcp = new EWAHCompressedBitmap32[k + 1];
for (int j = 0; j < k + 1; ++j) {
ewahcp[j] = ewah[j];
}
EWAHCompressedBitmap32 ewahor = FastAggregation32
.bufferedand(65536,ewahcp);
bogus += ewahor.sizeInBits();
}
aft = System.currentTimeMillis();
line += "\t" + df.format((aft - bef) / 1000.0);
// fast logical or
bef = System.currentTimeMillis();
for (int r = 0; r < repeat; ++r)
for (int k = 0; k < N; ++k) {
IteratingRLW32[] ewahcp = new IteratingRLW32[k + 1];
for (int j = 0; j < k + 1; ++j) {
ewahcp[j] = new IteratingBufferedRunningLengthWord32(
ewah[j]);
}
IteratingRLW32 ewahor = IteratorAggregation32.bufferedand(ewahcp);
int wordcounter = IteratorUtil32.cardinality(ewahor);
bogus += wordcounter;
}
aft = System.currentTimeMillis();
line += "\t" + df.format((aft - bef) / 1000.0);
System.out
.println("# times for: 2by2 EWAHCompressedBitmap.and bufferedand iterator-bufferedand");
System.out.println(line);
}
System.out.println("# bogus =" + bogus);
}
}
}
javaewah-JavaEWAH-0.7.9/src/main/java/com/googlecode/javaewah/benchmark/BenchmarkUnion.java 0000664 0000000 0000000 00000012077 12240435670 0031462 0 ustar 00root root 0000000 0000000 package com.googlecode.javaewah.benchmark;
import java.text.DecimalFormat;
import com.googlecode.javaewah.*;
/*
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser
* Licensed under the Apache License, Version 2.0.
*/
/**
* To benchmark the logical or (union) aggregate.
*/
public class BenchmarkUnion {
@SuppressWarnings("javadoc")
public static void main(String args[]) {
test(10, 18, 1);
}
@SuppressWarnings({ "javadoc", "deprecation" })
public static void test(int N, int nbr, int repeat) {
long bogus = 0;
DecimalFormat df = new DecimalFormat("0.###");
ClusteredDataGenerator cdg = new ClusteredDataGenerator();
for (int sparsity = 1; sparsity < 30 - nbr; sparsity++) {
for (int times = 0; times < 2; ++times) {
String line = "";
long bef, aft;
line += sparsity;
int[][] data = new int[N][];
int Max = (1 << (nbr + sparsity));
for (int k = 0; k < N; ++k)
data[k] = cdg.generateClustered(1 << nbr, Max);
// building
EWAHCompressedBitmap[] ewah = new EWAHCompressedBitmap[N];
for (int k = 0; k < N; ++k) {
ewah[k] = new EWAHCompressedBitmap();
for (int x = 0; x < data[k].length; ++x) {
ewah[k].set(data[k][x]);
}
data[k] = null;
}
// sanity check
if (true) {
EWAHCompressedBitmap answer = ewah[0].or(ewah[1]);
for (int k = 2; k < ewah.length; ++k)
answer = answer.or(ewah[k]);
EWAHCompressedBitmap ewahor = EWAHCompressedBitmap.or(ewah);
if (!answer.equals(ewahor))
throw new RuntimeException(
"bug EWAHCompressedBitmap.or");
EWAHCompressedBitmap ewahor3 = FastAggregation.or(ewah);
if (!ewahor.equals(ewahor3))
throw new RuntimeException("bug FastAggregation.or");
EWAHCompressedBitmap ewahor2 = FastAggregation
.bufferedor(65536,ewah);
if (!ewahor.equals(ewahor2))
throw new RuntimeException(
"bug FastAggregation.bufferedor ");
}
// logical or
bef = System.currentTimeMillis();
for (int r = 0; r < repeat; ++r)
for (int k = 0; k < N; ++k) {
EWAHCompressedBitmap ewahor = ewah[0];
for (int j = 1; j < k + 1; ++j) {
ewahor = ewahor.or(ewah[j]);
}
}
aft = System.currentTimeMillis();
line += "\t" + df.format((aft - bef) / 1000.0);
// fast logical or
bef = System.currentTimeMillis();
for (int r = 0; r < repeat; ++r)
for (int k = 0; k < N; ++k) {
EWAHCompressedBitmap[] ewahcp = new EWAHCompressedBitmap[k + 1];
for (int j = 0; j < k + 1; ++j) {
ewahcp[j] = ewah[j];
}
EWAHCompressedBitmap ewahor = EWAHCompressedBitmap
.or(ewahcp);
bogus += ewahor.sizeInBits();
}
aft = System.currentTimeMillis();
line += "\t" + df.format((aft - bef) / 1000.0);
// fast logical or
bef = System.currentTimeMillis();
for (int r = 0; r < repeat; ++r)
for (int k = 0; k < N; ++k) {
EWAHCompressedBitmap[] ewahcp = new EWAHCompressedBitmap[k + 1];
for (int j = 0; j < k + 1; ++j) {
ewahcp[j] = ewah[j];
}
EWAHCompressedBitmap ewahor = FastAggregation
.or(ewahcp);
bogus += ewahor.sizeInBits();
}
aft = System.currentTimeMillis();
line += "\t" + df.format((aft - bef) / 1000.0);
// fast logical or
bef = System.currentTimeMillis();
for (int r = 0; r < repeat; ++r)
for (int k = 0; k < N; ++k) {
EWAHCompressedBitmap[] ewahcp = new EWAHCompressedBitmap[k + 1];
for (int j = 0; j < k + 1; ++j) {
ewahcp[j] = ewah[j];
}
EWAHCompressedBitmap ewahor = FastAggregation
.bufferedor(65536,ewahcp);
bogus += ewahor.sizeInBits();
}
aft = System.currentTimeMillis();
line += "\t" + df.format((aft - bef) / 1000.0);
// fast logical or
bef = System.currentTimeMillis();
for (int r = 0; r < repeat; ++r)
for (int k = 0; k < N; ++k) {
EWAHCompressedBitmap[] ewahcp = new EWAHCompressedBitmap[k + 1];
for (int j = 0; j < k + 1; ++j) {
ewahcp[j] = ewah[j];
}
EWAHCompressedBitmap x = new EWAHCompressedBitmap();
FastAggregation.legacy_orWithContainer(x, ewahcp);
bogus += x.sizeInBits();
}
aft = System.currentTimeMillis();
line += "\t" + df.format((aft - bef) / 1000.0);
// fast logical or
bef = System.currentTimeMillis();
for (int r = 0; r < repeat; ++r)
for (int k = 0; k < N; ++k) {
IteratingRLW[] ewahcp = new IteratingRLW[k + 1];
for (int j = 0; j < k + 1; ++j) {
ewahcp[j] = new IteratingBufferedRunningLengthWord(
ewah[j]);
}
IteratingRLW ewahor = IteratorAggregation.bufferedor(ewahcp);
int wordcounter = IteratorUtil.cardinality(ewahor);
bogus += wordcounter;
}
aft = System.currentTimeMillis();
line += "\t" + df.format((aft - bef) / 1000.0);
System.out
.println("# times for: 2by2 EWAHCompressedBitmap.or FastAggregation.or experimentalor bufferedor legacygroupedor iterator-bufferedor");
System.out.println(line);
}
System.out.println("# bogus =" + bogus);
}
}
}
javaewah-JavaEWAH-0.7.9/src/main/java/com/googlecode/javaewah/benchmark/BenchmarkUnion32.java 0000664 0000000 0000000 00000012321 12240435670 0031617 0 ustar 00root root 0000000 0000000 package com.googlecode.javaewah.benchmark;
import java.text.DecimalFormat;
import com.googlecode.javaewah.FastAggregation;
import com.googlecode.javaewah32.*;
/*
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser
* Licensed under the Apache License, Version 2.0.
*/
/**
* To benchmark the logical or (union) aggregate.
*/
public class BenchmarkUnion32 {
@SuppressWarnings("javadoc")
public static void main(String args[]) {
test(10, 18, 1);
}
@SuppressWarnings({ "javadoc", "deprecation" })
public static void test(int N, int nbr, int repeat) {
long bogus = 0;
DecimalFormat df = new DecimalFormat("0.###");
ClusteredDataGenerator cdg = new ClusteredDataGenerator();
for (int sparsity = 1; sparsity < 30 - nbr; sparsity++) {
for (int times = 0; times < 2; ++times) {
String line = "";
long bef, aft;
line += sparsity;
int[][] data = new int[N][];
int Max = (1 << (nbr + sparsity));
for (int k = 0; k < N; ++k)
data[k] = cdg.generateClustered(1 << nbr, Max);
// building
EWAHCompressedBitmap32[] ewah = new EWAHCompressedBitmap32[N];
for (int k = 0; k < N; ++k) {
ewah[k] = new EWAHCompressedBitmap32();
for (int x = 0; x < data[k].length; ++x) {
ewah[k].set(data[k][x]);
}
data[k] = null;
}
// sanity check
if(true){
EWAHCompressedBitmap32 answer = ewah[0].or(ewah[1]);
for(int k = 2; k < ewah.length; ++k)
answer = answer.or(ewah[k]);
EWAHCompressedBitmap32 ewahor = EWAHCompressedBitmap32
.or(ewah);
if(!answer.equals(ewahor)) throw new RuntimeException("bug EWAHCompressedBitmap.or");
EWAHCompressedBitmap32 ewahor3 = FastAggregation
.or(ewah);
if(!ewahor.equals(ewahor3)) throw new RuntimeException("bug FastAggregation.or");
EWAHCompressedBitmap32 ewahor2 = FastAggregation32
.bufferedor(65536,ewah);
if(!ewahor.equals(ewahor2)) throw new RuntimeException("bug FastAggregation.bufferedor ");
}
// logical or
bef = System.currentTimeMillis();
for (int r = 0; r < repeat; ++r)
for (int k = 0; k < N; ++k) {
EWAHCompressedBitmap32 ewahor = ewah[0];
for (int j = 1; j < k + 1; ++j) {
ewahor = ewahor.or(ewah[j]);
}
}
aft = System.currentTimeMillis();
line += "\t" + df.format((aft - bef) / 1000.0);
// fast logical or
bef = System.currentTimeMillis();
for (int r = 0; r < repeat; ++r)
for (int k = 0; k < N; ++k) {
EWAHCompressedBitmap32[] ewahcp = new EWAHCompressedBitmap32[k + 1];
for (int j = 0; j < k + 1; ++j) {
ewahcp[j] = ewah[j];
}
EWAHCompressedBitmap32 ewahor = EWAHCompressedBitmap32
.or(ewahcp);
bogus += ewahor.sizeInBits();
}
aft = System.currentTimeMillis();
line += "\t" + df.format((aft - bef) / 1000.0);
// fast logical or
bef = System.currentTimeMillis();
for (int r = 0; r < repeat; ++r)
for (int k = 0; k < N; ++k) {
EWAHCompressedBitmap32[] ewahcp = new EWAHCompressedBitmap32[k + 1];
for (int j = 0; j < k + 1; ++j) {
ewahcp[j] = ewah[j];
}
EWAHCompressedBitmap32 ewahor = FastAggregation
.or(ewahcp);
bogus += ewahor.sizeInBits();
}
aft = System.currentTimeMillis();
line += "\t" + df.format((aft - bef) / 1000.0);
// fast logical or
bef = System.currentTimeMillis();
for (int r = 0; r < repeat; ++r)
for (int k = 0; k < N; ++k) {
EWAHCompressedBitmap32[] ewahcp = new EWAHCompressedBitmap32[k + 1];
for (int j = 0; j < k + 1; ++j) {
ewahcp[j] = ewah[j];
}
EWAHCompressedBitmap32 ewahor = FastAggregation32
.bufferedor(65536,ewahcp);
bogus += ewahor.sizeInBits();
}
aft = System.currentTimeMillis();
line += "\t" + df.format((aft - bef) / 1000.0);
// fast logical or
bef = System.currentTimeMillis();
for (int r = 0; r < repeat; ++r)
for (int k = 0; k < N; ++k) {
EWAHCompressedBitmap32[] ewahcp = new EWAHCompressedBitmap32[k + 1];
for (int j = 0; j < k + 1; ++j) {
ewahcp[j] = ewah[j];
}
EWAHCompressedBitmap32 x = new EWAHCompressedBitmap32();
FastAggregation32.legacy_orWithContainer(x, ewahcp);
bogus += x.sizeInBits();
}
aft = System.currentTimeMillis();
line += "\t" + df.format((aft - bef) / 1000.0);
// fast logical or
bef = System.currentTimeMillis();
for (int r = 0; r < repeat; ++r)
for (int k = 0; k < N; ++k) {
IteratingRLW32[] ewahcp = new IteratingRLW32[k + 1];
for (int j = 0; j < k + 1; ++j) {
ewahcp[j] = new IteratingBufferedRunningLengthWord32(ewah[j]);
}
IteratingRLW32 ewahor = IteratorAggregation32
.bufferedor(ewahcp);
int wordcounter = IteratorUtil32.cardinality(ewahor);
bogus += wordcounter;
}
aft = System.currentTimeMillis();
line += "\t" + df.format((aft - bef) / 1000.0);
System.out
.println("# times for: 2by2 EWAHCompressedBitmap.or FastAggregation.or experimentalor bufferedor legacygroupedor iterator-bufferedor");
System.out.println(line);
}
System.out.println("# bogus =" + bogus);
}
}
}
javaewah-JavaEWAH-0.7.9/src/main/java/com/googlecode/javaewah/benchmark/BenchmarkXOR.java 0000664 0000000 0000000 00000010103 12240435670 0031026 0 ustar 00root root 0000000 0000000 package com.googlecode.javaewah.benchmark;
import java.text.DecimalFormat;
import com.googlecode.javaewah.*;
/*
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser
* Licensed under the Apache License, Version 2.0.
*/
/**
* To benchmark the logical xor aggregate.
*/
public class BenchmarkXOR {
@SuppressWarnings("javadoc")
public static void main(String args[]) {
//test(10, 18, 1);
test(2, 22, 1);
}
@SuppressWarnings({ "javadoc" })
public static void test(int N, int nbr, int repeat) {
long bogus = 0;
DecimalFormat df = new DecimalFormat("0.###");
ClusteredDataGenerator cdg = new ClusteredDataGenerator();
for (int sparsity = 1; sparsity < 30 - nbr; sparsity++) {
for (int times = 0; times < 2; ++times) {
String line = "";
long bef, aft;
line += sparsity;
int[][] data = new int[N][];
int Max = (1 << (nbr + sparsity));
for (int k = 0; k < N; ++k)
data[k] = cdg.generateClustered(1 << nbr, Max);
// building
EWAHCompressedBitmap[] ewah = new EWAHCompressedBitmap[N];
for (int k = 0; k < N; ++k) {
ewah[k] = new EWAHCompressedBitmap();
for (int x = 0; x < data[k].length; ++x) {
ewah[k].set(data[k][x]);
}
data[k] = null;
}
// sanity check
if (true) {
EWAHCompressedBitmap answer = ewah[0].xor(ewah[1]);
for (int k = 2; k < ewah.length; ++k)
answer = answer.xor(ewah[k]);
EWAHCompressedBitmap ewahor3 = FastAggregation.xor(ewah);
if (!answer.equals(ewahor3))
throw new RuntimeException("bug FastAggregation.xor");
EWAHCompressedBitmap ewahor2 = FastAggregation
.bufferedxor(65536,ewah);
if (!answer.equals(ewahor2))
throw new RuntimeException(
"bug FastAggregation.bufferedxor ");
EWAHCompressedBitmap iwah = IteratorUtil.materialize(IteratorAggregation.bufferedxor(IteratorUtil.toIterators(ewah)));
if (!answer.equals(iwah))
throw new RuntimeException(
"bug xor it ");
}
// logical xor
bef = System.currentTimeMillis();
for (int r = 0; r < repeat; ++r)
for (int k = 0; k < N; ++k) {
EWAHCompressedBitmap ewahor = ewah[0];
for (int j = 1; j < k + 1; ++j) {
ewahor = ewahor.xor(ewah[j]);
}
}
aft = System.currentTimeMillis();
line += "\t" + df.format((aft - bef) / 1000.0);
// fast logical xor
bef = System.currentTimeMillis();
for (int r = 0; r < repeat; ++r)
for (int k = 0; k < N; ++k) {
EWAHCompressedBitmap[] ewahcp = new EWAHCompressedBitmap[k + 1];
for (int j = 0; j < k + 1; ++j) {
ewahcp[j] = ewah[j];
}
EWAHCompressedBitmap ewahor = FastAggregation
.xor(ewahcp);
bogus += ewahor.sizeInBits();
}
aft = System.currentTimeMillis();
line += "\t" + df.format((aft - bef) / 1000.0);
// fast logical xor
bef = System.currentTimeMillis();
for (int r = 0; r < repeat; ++r)
for (int k = 0; k < N; ++k) {
EWAHCompressedBitmap[] ewahcp = new EWAHCompressedBitmap[k + 1];
for (int j = 0; j < k + 1; ++j) {
ewahcp[j] = ewah[j];
}
EWAHCompressedBitmap ewahor = FastAggregation
.bufferedxor(65536,ewahcp);
bogus += ewahor.sizeInBits();
}
aft = System.currentTimeMillis();
line += "\t" + df.format((aft - bef) / 1000.0);
// fast logical xor
bef = System.currentTimeMillis();
for (int r = 0; r < repeat; ++r)
for (int k = 0; k < N; ++k) {
IteratingRLW[] ewahcp = new IteratingRLW[k + 1];
for (int j = 0; j < k + 1; ++j) {
ewahcp[j] = new IteratingBufferedRunningLengthWord(
ewah[j]);
}
IteratingRLW ewahor = IteratorAggregation.bufferedxor(ewahcp);
int wordcounter = IteratorUtil.cardinality(ewahor);
bogus += wordcounter;
}
aft = System.currentTimeMillis();
line += "\t" + df.format((aft - bef) / 1000.0);
System.out
.println("# times for: 2by2 FastAggregation.xor bufferedxor iterator-based");
System.out.println(line);
}
System.out.println("# bogus =" + bogus);
}
}
}
javaewah-JavaEWAH-0.7.9/src/main/java/com/googlecode/javaewah/benchmark/BenchmarkXOR32.java 0000664 0000000 0000000 00000010252 12240435670 0031200 0 ustar 00root root 0000000 0000000 package com.googlecode.javaewah.benchmark;
import java.text.DecimalFormat;
import com.googlecode.javaewah.FastAggregation;
import com.googlecode.javaewah32.*;
/*
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser
* Licensed under the Apache License, Version 2.0.
*/
/**
* To benchmark the logical xor aggregate.
*/
public class BenchmarkXOR32 {
@SuppressWarnings("javadoc")
public static void main(String args[]) {
test(10, 18, 1);
//test(2, 22, 1);
}
@SuppressWarnings({ "javadoc" })
public static void test(int N, int nbr, int repeat) {
long bogus = 0;
DecimalFormat df = new DecimalFormat("0.###");
ClusteredDataGenerator cdg = new ClusteredDataGenerator();
for (int sparsity = 1; sparsity < 30 - nbr; sparsity++) {
for (int times = 0; times < 2; ++times) {
String line = "";
long bef, aft;
line += sparsity;
int[][] data = new int[N][];
int Max = (1 << (nbr + sparsity));
for (int k = 0; k < N; ++k)
data[k] = cdg.generateClustered(1 << nbr, Max);
// building
EWAHCompressedBitmap32[] ewah = new EWAHCompressedBitmap32[N];
for (int k = 0; k < N; ++k) {
ewah[k] = new EWAHCompressedBitmap32();
for (int x = 0; x < data[k].length; ++x) {
ewah[k].set(data[k][x]);
}
data[k] = null;
}
// sanity check
if (true) {
EWAHCompressedBitmap32 answer = ewah[0].xor(ewah[1]);
for (int k = 2; k < ewah.length; ++k)
answer = answer.xor(ewah[k]);
EWAHCompressedBitmap32 ewahor3 = FastAggregation.xor(ewah);
if (!answer.equals(ewahor3))
throw new RuntimeException("bug FastAggregation.xor");
EWAHCompressedBitmap32 ewahor2 = FastAggregation32
.bufferedxor(65536,ewah);
if (!answer.equals(ewahor2))
throw new RuntimeException(
"bug FastAggregation.bufferedxor ");
EWAHCompressedBitmap32 iwah = IteratorUtil32.materialize(IteratorAggregation32.bufferedxor(IteratorUtil32.toIterators(ewah)));
if (!answer.equals(iwah))
throw new RuntimeException(
"bug xor it ");
}
// logical xor
bef = System.currentTimeMillis();
for (int r = 0; r < repeat; ++r)
for (int k = 0; k < N; ++k) {
EWAHCompressedBitmap32 ewahor = ewah[0];
for (int j = 1; j < k + 1; ++j) {
ewahor = ewahor.xor(ewah[j]);
}
}
aft = System.currentTimeMillis();
line += "\t" + df.format((aft - bef) / 1000.0);
// fast logical xor
bef = System.currentTimeMillis();
for (int r = 0; r < repeat; ++r)
for (int k = 0; k < N; ++k) {
EWAHCompressedBitmap32[] ewahcp = new EWAHCompressedBitmap32[k + 1];
for (int j = 0; j < k + 1; ++j) {
ewahcp[j] = ewah[j];
}
EWAHCompressedBitmap32 ewahor = FastAggregation
.xor(ewahcp);
bogus += ewahor.sizeInBits();
}
aft = System.currentTimeMillis();
line += "\t" + df.format((aft - bef) / 1000.0);
// fast logical xor
bef = System.currentTimeMillis();
for (int r = 0; r < repeat; ++r)
for (int k = 0; k < N; ++k) {
EWAHCompressedBitmap32[] ewahcp = new EWAHCompressedBitmap32[k + 1];
for (int j = 0; j < k + 1; ++j) {
ewahcp[j] = ewah[j];
}
EWAHCompressedBitmap32 ewahor = FastAggregation32
.bufferedxor(65536,ewahcp);
bogus += ewahor.sizeInBits();
}
aft = System.currentTimeMillis();
line += "\t" + df.format((aft - bef) / 1000.0);
// fast logical xor
bef = System.currentTimeMillis();
for (int r = 0; r < repeat; ++r)
for (int k = 0; k < N; ++k) {
IteratingRLW32[] ewahcp = new IteratingRLW32[k + 1];
for (int j = 0; j < k + 1; ++j) {
ewahcp[j] = new IteratingBufferedRunningLengthWord32(
ewah[j]);
}
IteratingRLW32 ewahor = IteratorAggregation32.bufferedxor(ewahcp);
int wordcounter = IteratorUtil32.cardinality(ewahor);
bogus += wordcounter;
}
aft = System.currentTimeMillis();
line += "\t" + df.format((aft - bef) / 1000.0);
System.out
.println("# times for: 2by2 FastAggregation.xor bufferedxor iterator-based");
System.out.println(line);
}
System.out.println("# bogus =" + bogus);
}
}
}
javaewah-JavaEWAH-0.7.9/src/main/java/com/googlecode/javaewah/benchmark/ClusteredDataGenerator.java 0000664 0000000 0000000 00000004340 12240435670 0033144 0 ustar 00root root 0000000 0000000 package com.googlecode.javaewah.benchmark;
/*
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser
* Licensed under the Apache License, Version 2.0.
*/
/**
* This class will generate lists of random integers with a "clustered" distribution.
* Reference:
* Anh VN, Moffat A. Index compression using 64-bit words. Software: Practice and Experience 2010; 40(2):131-147.
*
* @author Daniel Lemire
*/
public class ClusteredDataGenerator {
/**
*
*/
public ClusteredDataGenerator() {
this.unidg = new UniformDataGenerator();
}
/**
* @param seed random seed
*/
public ClusteredDataGenerator(final int seed) {
this.unidg = new UniformDataGenerator(seed);
}
/**
* generates randomly N distinct integers from 0 to Max.
* @param N number of integers
* @param Max maximum integer value
* @return a randomly generated array
*/
public int[] generateClustered(int N, int Max) {
int[] array = new int[N];
fillClustered(array, 0, N, 0, Max);
return array;
}
void fillClustered(int[] array, int offset, int length, int Min, int Max) {
final int range = Max - Min;
if ((range == length) || (length <= 10)) {
fillUniform(array, offset, length, Min, Max);
return;
}
final int cut = length / 2
+ ((range - length - 1 > 0) ? this.unidg.rand.nextInt(range - length - 1) : 0);
final double p = this.unidg.rand.nextDouble();
if (p < 0.25) {
fillUniform(array, offset, length / 2, Min, Min + cut);
fillClustered(array, offset + length / 2, length - length / 2, Min + cut,
Max);
} else if (p < 0.5) {
fillClustered(array, offset, length / 2, Min, Min + cut);
fillUniform(array, offset + length / 2, length - length / 2, Min + cut,
Max);
} else {
fillClustered(array, offset, length / 2, Min, Min + cut);
fillClustered(array, offset + length / 2, length - length / 2, Min + cut,
Max);
}
}
void fillUniform(int[] array, int offset, int length, int Min, int Max) {
int[] v = this.unidg.generateUniform(length, Max - Min);
for (int k = 0; k < v.length; ++k)
array[k + offset] = Min + v[k];
}
UniformDataGenerator unidg;
}
javaewah-JavaEWAH-0.7.9/src/main/java/com/googlecode/javaewah/benchmark/UniformDataGenerator.java 0000664 0000000 0000000 00000007247 12240435670 0032642 0 ustar 00root root 0000000 0000000 package com.googlecode.javaewah.benchmark;
/*
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser
* Licensed under the Apache License, Version 2.0.
*/
import java.util.Arrays;
import java.util.BitSet;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Random;
/**
* This class will generate "uniform" lists of random integers.
*
* @author Daniel Lemire
*/
public class UniformDataGenerator {
/**
* construct generator of random arrays.
*/
public UniformDataGenerator() {
this.rand = new Random();
}
/**
* @param seed random seed
*/
public UniformDataGenerator(final int seed) {
this.rand = new Random(seed);
}
/**
* generates randomly N distinct integers from 0 to Max.
*/
int[] generateUniformHash(int N, int Max) {
if (N > Max)
throw new RuntimeException("not possible");
int[] ans = new int[N];
HashSet s = new HashSet();
while (s.size() < N)
s.add(new Integer(this.rand.nextInt(Max)));
Iterator i = s.iterator();
for (int k = 0; k < N; ++k)
ans[k] = i.next().intValue();
Arrays.sort(ans);
return ans;
}
/**
* output all integers from the range [0,Max) that are not
* in the array
*/
static int[] negate(int[] x, int Max) {
int[] ans = new int[Max - x.length];
int i = 0;
int c = 0;
for (int j = 0; j < x.length; ++j) {
int v = x[j];
for (; i < v; ++i)
ans[c++] = i;
++i;
}
while (c < ans.length)
ans[c++] = i++;
return ans;
}
/**
* generates randomly N distinct integers from 0 to Max.
* @param N Number of integers to generate
* @param Max Maximum value of the integers
* @return array containing random integers
*/
public int[] generateUniform(int N, int Max) {
if(N * 2 > Max) {
return negate( generateUniform(Max - N, Max), Max );
}
if (2048 * N > Max)
return generateUniformBitmap(N, Max);
return generateUniformHash(N, Max);
}
/**
* generates randomly N distinct integers from 0 to Max using a bitmap.
* @param N Number of integers to generate
* @param Max Maximum value of the integers
* @return array containing random integers
*/
int[] generateUniformBitmap(int N, int Max) {
if (N > Max)
throw new RuntimeException("not possible");
int[] ans = new int[N];
BitSet bs = new BitSet(Max);
int cardinality = 0;
while (cardinality < N) {
int v = this.rand.nextInt(Max);
if (!bs.get(v)) {
bs.set(v);
cardinality++;
}
}
int pos = 0;
for (int i = bs.nextSetBit(0); i >= 0; i = bs.nextSetBit(i + 1)) {
ans[pos++] = i;
}
return ans;
}
Random rand = new Random();
}
javaewah-JavaEWAH-0.7.9/src/main/java/com/googlecode/javaewah32/ 0000775 0000000 0000000 00000000000 12240435670 0024120 5 ustar 00root root 0000000 0000000 javaewah-JavaEWAH-0.7.9/src/main/java/com/googlecode/javaewah32/BitCounter32.java 0000664 0000000 0000000 00000004500 12240435670 0027205 0 ustar 00root root 0000000 0000000 package com.googlecode.javaewah32;
/*
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc. and Veronika Zenz
* Licensed under the Apache License, Version 2.0.
*/
/**
* BitCounter is a fake bitset data structure. Instead of storing the actual data,
* it only records the number of set bits.
*
* @since 0.5.0
* @author Daniel Lemire and David McIntosh
*/
public final class BitCounter32 implements BitmapStorage32 {
/**
* Virtually add words directly to the bitmap
*
* @param newdata the word
*/
// @Override : causes problems with Java 1.5
@Override
public void add(final int newdata) {
this.oneBits += Integer.bitCount(newdata);
}
/**
* virtually add several literal words.
*
* @param data the literal words
* @param start the starting point in the array
* @param number the number of literal words to add
*/
// @Override : causes problems with Java 1.5
@Override
public void addStreamOfLiteralWords(int[] data, int start, int number) {
for(int i=start;i