再看不懂BitMap算法,我请你吃饭(四)

导读:本篇文章讲解 再看不懂BitMap算法,我请你吃饭(四),希望对大家有帮助,欢迎收藏,转发!站点地址:www.bmabk.com

EWAHCompressedBitmap是Google发布的类似BitMap的一个实现,先看看官方的一个例子:

/**
 * https://github.com/lemire/javaewah
 */
@Test
public void test_03() throws IOException {
	EWAHCompressedBitmap ewahBitmap1 = EWAHCompressedBitmap.bitmapOf(0, 2, 55, 64, 1 << 30);
	EWAHCompressedBitmap ewahBitmap2 = EWAHCompressedBitmap.bitmapOf(1, 3, 64, 1 << 30);
	System.out.println("bitmap 1: " + ewahBitmap1);
	System.out.println("bitmap 2: " + ewahBitmap2);

	// or
	EWAHCompressedBitmap orbitmap = ewahBitmap1.or(ewahBitmap2);
	System.out.println("bitmap 1 OR bitmap 2: " + orbitmap);
	System.out.println("memory usage: " + orbitmap.sizeInBytes() + " bytes");

	// and
	EWAHCompressedBitmap andbitmap = ewahBitmap1.and(ewahBitmap2);
	System.out.println("bitmap 1 AND bitmap 2: " + andbitmap);
	System.out.println("memory usage: " + andbitmap.sizeInBytes() + " bytes");

	// xor
	EWAHCompressedBitmap xorbitmap = ewahBitmap1.xor(ewahBitmap2);
	System.out.println("bitmap 1 XOR bitmap 2:" + xorbitmap);
	System.out.println("memory usage: " + xorbitmap.sizeInBytes() + " bytes");

	// fast aggregation over many bitmaps
	EWAHCompressedBitmap ewahBitmap3 = EWAHCompressedBitmap.bitmapOf(5, 55, 1 << 30);
	EWAHCompressedBitmap ewahBitmap4 = EWAHCompressedBitmap.bitmapOf(4, 66, 1 << 30);
	System.out.println("bitmap 3: " + ewahBitmap3);
	System.out.println("bitmap 4: " + ewahBitmap4);

	andbitmap = EWAHCompressedBitmap.and(ewahBitmap1, ewahBitmap2, ewahBitmap3, ewahBitmap4);
	System.out.println("b1 AND b2 AND b3 AND b4: " + andbitmap);

	// serialization
	ByteArrayOutputStream bos = new ByteArrayOutputStream();
	// Note: you could use a file output steam instead of ByteArrayOutputStream
	ewahBitmap1.serialize(new DataOutputStream(bos));
	EWAHCompressedBitmap ewahBitmap1new = new EWAHCompressedBitmap();
	byte[] bout = bos.toByteArray();
	ewahBitmap1new.deserialize(new DataInputStream(new ByteArrayInputStream(bout)));
	System.out.println("bitmap 1 (recovered) : " + ewahBitmap1new);
	if (!ewahBitmap1.equals(ewahBitmap1new)) throw new RuntimeException("Will not happen");
	//
	// we can use a ByteBuffer as backend for a bitmap
	// which allows memory-mapped bitmaps
	//
	ByteBuffer bb = ByteBuffer.wrap(bout);
	EWAHCompressedBitmap rmap = new EWAHCompressedBitmap(bb);
	System.out.println("bitmap 1 (mapped) : " + rmap);

	if (!rmap.equals(ewahBitmap1)) throw new RuntimeException("Will not happen");
	//
	// support for threshold function (new as of version 0.8.0):
	// mark as true a bit that occurs at least T times in the source
	// bitmaps
	//
	EWAHCompressedBitmap threshold2 = EWAHCompressedBitmap.threshold(2,
			ewahBitmap1, ewahBitmap2, ewahBitmap3, ewahBitmap4);
	System.out.println("threshold 2 : " + threshold2);
}

EWAHCompressedBitmap.java中重要的一个属性LongArray.java,测试代码如下,注意包名:

package com.googlecode.javaewah;

import org.junit.Assert;
import org.junit.Test;

public class LongArrayTest {

    @Test
    public void testXxx() {
        /**
         * 未指定大小时,long类型数组的大小默认为4,默认值为0
         */
        LongArray longArray = new LongArray();

        for (int position = 0; position < 4; position++) {
            long word = longArray.getWord(position);
            Assert.assertEquals(0L, word);
        }

        /**
         * 足以证明,long类型数组里的大小确实为4
         */
        try {
            longArray.getWord(4);
            Assert.fail("不应该到这里");
        } catch (Exception e) {
        }

        /**
         * setWord(...)方法不会改变“sizeInWords”属性的值,sizeInWords默认为1
         */
        longArray.setWord(0, 10L);
        int sizeInWords = longArray.sizeInWords();
        Assert.assertEquals(1, sizeInWords);

        longArray.setWord(2, 12L);
        sizeInWords = longArray.sizeInWords();
        Assert.assertEquals(1, sizeInWords);

        /**
         * push_back(...)方法会改变“sizeInWords”属性的值,使其加1
         * push_back前:{10, 0, 12, 0}
         * push_back后:{10, 9527, 12, 0}
         */
        longArray.push_back(9527L);
        sizeInWords = longArray.sizeInWords();
        Assert.assertEquals(2, sizeInWords);

        /**
         * push_back(...)方法会改变“sizeInWords”属性的值,使其加1
         * push_back前:{10, 9527, 12, 0}
         * push_back后:{10, 9527, 9528, 0}
         */
        longArray.push_back(9528L);
        sizeInWords = longArray.sizeInWords();
        Assert.assertEquals(3, sizeInWords);

        /**
         * push_back(...)方法会改变“sizeInWords”属性的值,使其加1
         * push_back前:{10, 9527, 9528, 0}
         * push_back后:{10, 9527, 9528, 9529, 0, 0, 0, 0}
         */
        longArray.push_back(9529L);
        sizeInWords = longArray.sizeInWords();
        Assert.assertEquals(4, sizeInWords);

        /**
         * push_back(...)方法会改变“sizeInWords”属性的值,使其加1
         * push_back前:{10, 9527, 9528, 9529, 0, 0, 0, 0}
         * push_back后:{10, 9527, 9528, 9529, 9530, 0, 0, 0}
         */
        longArray.push_back(9530L);
        sizeInWords = longArray.sizeInWords();
        Assert.assertEquals(5, sizeInWords);

        /**
         * push_back(...)方法会改变“sizeInWords”属性的值,使其加1
         * push_back前:{10, 9527, 9528, 9529, 9530, 0, 0, 0}
         * push_back后:{10, 9527, 9528, 9529, 9530, 9531, 0, 0}
         */
        longArray.push_back(9531L);
        sizeInWords = longArray.sizeInWords();
        Assert.assertEquals(6, sizeInWords);

        /**
         * push_back(...)方法会改变“sizeInWords”属性的值,使其加1
         * push_back前:{10, 9527, 9528, 9529, 9530, 9531, 0, 0}
         * push_back后:{10, 9527, 9528, 9529, 9530, 9531, 9532, 0}
         */
        longArray.push_back(9532L);
        sizeInWords = longArray.sizeInWords();
        Assert.assertEquals(7, sizeInWords);

        /**
         * push_back(...)方法会改变“sizeInWords”属性的值,使其加1
         * push_back前:{10, 9527, 9528, 9529, 9530, 9531, 9532, 0}
         * push_back后:{10, 9527, 9528, 9529, 9530, 9531, 9532, 9533, 0, 0, 0, 0, 0, 0, 0, 0}
         */
        longArray.push_back(9533L);
        sizeInWords = longArray.sizeInWords();
        Assert.assertEquals(8, sizeInWords);

        /**
         * push_back(...)方法会改变“sizeInWords”属性的值,使其加1
         * push_back前:{10, 9527, 9528, 9529, 9530, 9531, 9532, 9533, 0, 0, 0, 0, 0, 0, 0, 0}
         * push_back后:{10, 9527, 9528, 9529, 9530, 9531, 9532, 9533, 9999999999, 0, 0, 0, 0, 0, 0, 0}
         */
        longArray.push_back(9999999999L);
        sizeInWords = longArray.sizeInWords();
        Assert.assertEquals(9, sizeInWords);
    }

}

官方源码中的一个单元测试方法:

@Test
public void testEWAHCompressedBitmap() {
    System.out.println("testing EWAH");
    long zero = 0;
    long specialval = 1l | (1l << 4) | (1l << 63);
    long notzero = ~zero;
    EWAHCompressedBitmap myarray1 = new EWAHCompressedBitmap();
    myarray1.addWord(zero);
    myarray1.addWord(zero);
    myarray1.addWord(zero);
    myarray1.addWord(specialval);
    myarray1.addWord(specialval);
    myarray1.addWord(notzero);
    myarray1.addWord(zero);
    Assert.assertEquals(myarray1.toList().size(), 6 + 64);
    EWAHCompressedBitmap myarray2 = new EWAHCompressedBitmap();
    myarray2.addWord(zero);
    myarray2.addWord(specialval);
    myarray2.addWord(specialval);
    myarray2.addWord(notzero);
    myarray2.addWord(zero);
    myarray2.addWord(zero);
    myarray2.addWord(zero);
    Assert.assertEquals(myarray2.toList().size(), 6 + 64);
    List<Integer> data1 = myarray1.toList();
    List<Integer> data2 = myarray2.toList();
    Vector<Integer> logicalor = new Vector<Integer>();
    {
        HashSet<Integer> tmp = new HashSet<Integer>();
        tmp.addAll(data1);
        tmp.addAll(data2);
        logicalor.addAll(tmp);
    }
    Collections.sort(logicalor);
    Vector<Integer> logicaland = new Vector<Integer>();
    logicaland.addAll(data1);
    logicaland.retainAll(data2);
    Collections.sort(logicaland);
    EWAHCompressedBitmap arrayand = myarray1.and(myarray2);
    Assert.assertTrue(arrayand.toList().equals(logicaland));
    EWAHCompressedBitmap arrayor = myarray1.or(myarray2);
    Assert.assertTrue(arrayor.toList().equals(logicalor));
    EWAHCompressedBitmap arrayandbis = myarray2.and(myarray1);
    Assert.assertTrue(arrayandbis.toList().equals(logicaland));
    EWAHCompressedBitmap arrayorbis = myarray2.or(myarray1);
    Assert.assertTrue(arrayorbis.toList().equals(logicalor));
    EWAHCompressedBitmap x = new EWAHCompressedBitmap();
    for (Integer i : myarray1.toList()) {
        x.set(i);
    }
    Assert.assertTrue(x.toList().equals(
            myarray1.toList()));
    x = new EWAHCompressedBitmap();
    for (Integer i : myarray2.toList()) {
        x.set(i);
    }
    Assert.assertTrue(x.toList().equals(
            myarray2.toList()));
    x = new EWAHCompressedBitmap();
    for (Iterator<Integer> k = myarray1.iterator(); k.hasNext(); ) {
        x.set(extracted(k));
    }
    Assert.assertTrue(x.toList().equals(
            myarray1.toList()));
    x = new EWAHCompressedBitmap();
    for (Iterator<Integer> k = myarray2.iterator(); k.hasNext(); ) {
        x.set(extracted(k));
    }
    Assert.assertTrue(x.toList().equals(
            myarray2.toList()));
}

EWAHCompressedBitmap.bitmapOf(…)

静态工厂方法,很好理解。

and(…)、or(…)、xor(…)

看看这篇里说的,想统计出“00后、且是程序员”的总用户数,就是可以使用and(…)方法来实现。

sizeInBytes()

TODO

threshold(…)

TODO

API对比

java.util.BitSet 自定义的BitMapV1 EWAHCompressedBitmap
BitSet.set(…) BitMapV1.add(…) addWord(…)
BitSet.get(…) BitMapV1.isExist(…) get(…)
BitSet.clear(…) BitMapV1.clear(…) clear(…)

版权声明:本文内容由互联网用户自发贡献,该文观点仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 举报,一经查实,本站将立刻删除。

文章由极客之家整理,本文链接:https://www.bmabk.com/index.php/post/17224.html

(0)
小半的头像小半

相关推荐

极客之家——专业性很强的中文编程技术网站,欢迎收藏到浏览器,订阅我们!