From 2b13c2e8e9fd465a88e73d0342ac9851f5e0d4ae Mon Sep 17 00:00:00 2001
From: Navaneeth Suresh <navaneeths1998@gmail.com>
Date: Wed, 1 Jul 2020 20:23:17 +0530
Subject: [PATCH 1/2] BLD: add ewah-bool-utils as a buildtime and runtime
 dependency

Co-authored-by: Navaneeth Suresh <navaneeths1998@gmail.com>
---
 .gitignore                                    |    1 -
 pyproject.toml                                |    3 +
 setup.py                                      |    3 +-
 setupext.py                                   |    8 +
 yt/geometry/particle_geometry_handler.py      |    2 +-
 yt/geometry/particle_oct_container.pyx        |   27 +-
 yt/utilities/lib/ewah_bool_array.pxd          |  103 -
 yt/utilities/lib/ewah_bool_wrap.pxd           |  163 --
 yt/utilities/lib/ewah_bool_wrap.pyx           | 1769 -----------------
 yt/utilities/lib/ewahboolarray/LICENSE        |  191 --
 yt/utilities/lib/ewahboolarray/README         |    8 -
 yt/utilities/lib/ewahboolarray/boolarray.h    |  488 -----
 yt/utilities/lib/ewahboolarray/ewah-inl.h     | 1670 ----------------
 yt/utilities/lib/ewahboolarray/ewah.h         |  712 -------
 yt/utilities/lib/ewahboolarray/ewahutil.h     |  233 ---
 .../lib/ewahboolarray/runninglengthword.h     |  551 -----
 16 files changed, 28 insertions(+), 5904 deletions(-)
 delete mode 100644 yt/utilities/lib/ewah_bool_array.pxd
 delete mode 100644 yt/utilities/lib/ewah_bool_wrap.pxd
 delete mode 100644 yt/utilities/lib/ewah_bool_wrap.pyx
 delete mode 100644 yt/utilities/lib/ewahboolarray/LICENSE
 delete mode 100644 yt/utilities/lib/ewahboolarray/README
 delete mode 100644 yt/utilities/lib/ewahboolarray/boolarray.h
 delete mode 100644 yt/utilities/lib/ewahboolarray/ewah-inl.h
 delete mode 100644 yt/utilities/lib/ewahboolarray/ewah.h
 delete mode 100644 yt/utilities/lib/ewahboolarray/ewahutil.h
 delete mode 100644 yt/utilities/lib/ewahboolarray/runninglengthword.h

diff --git a/.gitignore b/.gitignore
index b251fe0d797..d148f64f694 100644
--- a/.gitignore
+++ b/.gitignore
@@ -45,7 +45,6 @@ yt/utilities/lib/cyoctree.c
 yt/utilities/lib/depth_first_octree.c
 yt/utilities/lib/distance_queue.c
 yt/utilities/lib/element_mappings.c
-yt/utilities/lib/ewah_bool_wrap.cpp
 yt/utilities/lib/fnv_hash.c
 yt/utilities/lib/fortran_reader.c
 yt/utilities/lib/freetype_writer.c
diff --git a/pyproject.toml b/pyproject.toml
index a2bad16ec2e..c99ffc2dd47 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -10,6 +10,7 @@ requires = [
   # https://github.com/yt-project/yt/issues/4355
   "Cython>=0.29.33,<3.0",
   "oldest-supported-numpy",
+  "ewah-bool-utils>=1.0.2",
 ]
 
 [project]
@@ -45,6 +46,7 @@ keywords = [
 requires-python = ">=3.8"
 dependencies = [
     "cmyt>=1.1.2",
+    "ewah-bool-utils>=1.0.2",
     "ipywidgets>=8.0.0",
     "matplotlib!=3.4.2,>=3.2", # keep in sync with tests/windows_conda_requirements.txt
     "more-itertools>=8.4",
@@ -204,6 +206,7 @@ mapserver = [
 ]
 minimal = [
     "cmyt==1.1.2",
+    "ewah-bool-utils==1.0.2",
     "ipywidgets==8.0.0",
     "matplotlib==3.2",
     "more-itertools==8.4",
diff --git a/setup.py b/setup.py
index 8ec14a8396f..7e040b76a79 100644
--- a/setup.py
+++ b/setup.py
@@ -10,6 +10,7 @@
     check_for_openmp,
     check_for_pyembree,
     create_build_ext,
+    get_ewah_bool_utils_path,
     install_ccompiler,
 )
 
@@ -41,7 +42,6 @@
 
 cythonize_aliases = {
     "LIB_DIR": "yt/utilities/lib/",
-    "LIB_DIR_EWAH": ["yt/utilities/lib/", "yt/utilities/lib/ewahboolarray/"],
     "LIB_DIR_GEOM": ["yt/utilities/lib/", "yt/geometry/"],
     "LIB_DIR_GEOM_ARTIO": [
         "yt/utilities/lib/",
@@ -49,6 +49,7 @@
         "yt/frontends/artio/artio_headers/",
     ],
     "STD_LIBS": std_libs,
+    "EWAH_LIBS": std_libs + [get_ewah_bool_utils_path()],
     "OMP_ARGS": omp_args,
     "FIXED_INTERP": "yt/utilities/lib/fixed_interpolator.cpp",
     "ARTIO_SOURCE": glob.glob("yt/frontends/artio/artio_headers/*.c"),
diff --git a/setupext.py b/setupext.py
index a6143e4657b..c4085976818 100644
--- a/setupext.py
+++ b/setupext.py
@@ -13,6 +13,7 @@
 from distutils.sysconfig import customize_compiler
 from subprocess import PIPE, Popen
 from sys import platform as _platform
+import ewah_bool_utils
 from setuptools.command.build_ext import build_ext as _build_ext
 from setuptools.command.sdist import sdist as _sdist
 from setuptools.errors import CompileError, LinkError
@@ -203,6 +204,12 @@ def check_CPP14_flags(possible_compile_flags):
     )
     return []
 
+def get_ewah_bool_utils_path():
+    if sys.version_info >= (3, 9):
+        return os.path.abspath(importlib.resources.files("ewah_bool_utils"))
+    else:
+        from pkg_resources import resource_filename
+        return os.path.dirname(os.path.abspath(resource_filename("ewah_bool_utils", "ewah_bool_wrap.pxd")))
 
 def check_for_pyembree(std_libs):
     embree_libs = []
@@ -400,6 +407,7 @@ def finalize_options(self):
             import numpy
 
             self.include_dirs.append(numpy.get_include())
+            self.include_dirs.append(ewah_bool_utils.get_include())
 
         def build_extensions(self):
             self.check_extensions_list(self.extensions)
diff --git a/yt/geometry/particle_geometry_handler.py b/yt/geometry/particle_geometry_handler.py
index bf9ad9eabc7..42be7dacf4a 100644
--- a/yt/geometry/particle_geometry_handler.py
+++ b/yt/geometry/particle_geometry_handler.py
@@ -5,12 +5,12 @@
 import weakref
 
 import numpy as np
+from ewah_bool_utils.ewah_bool_wrap import BoolArrayCollection
 
 from yt.data_objects.index_subobjects.particle_container import ParticleContainer
 from yt.funcs import get_pbar, only_on_root
 from yt.geometry.geometry_handler import Index, YTDataChunk
 from yt.geometry.particle_oct_container import ParticleBitmap
-from yt.utilities.lib.ewah_bool_wrap import BoolArrayCollection
 from yt.utilities.lib.fnv_hash import fnv_hash
 from yt.utilities.logger import ytLogger as mylog
 from yt.utilities.parallel_tools.parallel_analysis_interface import parallel_objects
diff --git a/yt/geometry/particle_oct_container.pyx b/yt/geometry/particle_oct_container.pyx
index df9d1f74e9b..dc114860574 100644
--- a/yt/geometry/particle_oct_container.pyx
+++ b/yt/geometry/particle_oct_container.pyx
@@ -1,7 +1,7 @@
-# distutils: include_dirs = LIB_DIR_EWAH
 # distutils: language = c++
 # distutils: extra_compile_args = CPP14_FLAG
-# distutils: libraries = STD_LIBS
+# distutils: include_dirs = LIB_DIR
+# distutils: libraries = EWAH_LIBS
 """
 Oct container tuned for Particles
 
@@ -10,17 +10,16 @@ Oct container tuned for Particles
 """
 
 
-from libc.math cimport ceil, log2
-from libc.stdlib cimport free, malloc
-from libcpp.map cimport map as cmap
-from libcpp.vector cimport vector
-
-from yt.utilities.lib.ewah_bool_array cimport (
+from ewah_bool_utils.ewah_bool_array cimport (
     bool_array,
     ewah_bool_array,
     ewah_bool_iterator,
     ewah_word_type,
 )
+from libc.math cimport ceil, log2
+from libc.stdlib cimport free, malloc
+from libcpp.map cimport map as cmap
+from libcpp.vector cimport vector
 
 import numpy as np
 
@@ -55,19 +54,21 @@ from .selection_routines cimport AlwaysSelector, SelectorObject
 
 from yt.funcs import get_pbar
 
-from ..utilities.lib.ewah_bool_wrap cimport BoolArrayCollection
+from ewah_bool_utils.ewah_bool_wrap cimport BoolArrayCollection
 
 import os
 
-
-_bitmask_version = np.uint64(5)
-
-from ..utilities.lib.ewah_bool_wrap cimport (
+from ewah_bool_utils.ewah_bool_wrap cimport (
     BoolArrayCollectionUncompressed as BoolArrayColl,
     FileBitmasks,
     SparseUnorderedRefinedBitmaskSet as SparseUnorderedRefinedBitmask,
 )
 
+
+_bitmask_version = np.uint64(5)
+
+
+
 ctypedef cmap[np.uint64_t, bool_array] CoarseRefinedSets
 
 cdef class ParticleOctreeContainer(OctreeContainer):
diff --git a/yt/utilities/lib/ewah_bool_array.pxd b/yt/utilities/lib/ewah_bool_array.pxd
deleted file mode 100644
index be745cae049..00000000000
--- a/yt/utilities/lib/ewah_bool_array.pxd
+++ /dev/null
@@ -1,103 +0,0 @@
-"""
-Wrapper for EWAH Bool Array: https://github.com/lemire/EWAHBoolArray
-
-
-
-"""
-
-
-from libc.stdint cimport uint32_t, uint64_t
-from libcpp cimport bool
-from libcpp.map cimport map as cmap
-from libcpp.string cimport string
-from libcpp.vector cimport vector
-
-
-# Streams req for c++ IO
-cdef extern from "<ostream>" namespace "std":
-    cdef cppclass ostream[T]:
-        pass
-cdef extern from "<istream>" namespace "std":
-    cdef cppclass istream[T]:
-        pass
-
-cdef extern from "<sstream>" namespace "std":
-    cdef cppclass stringstream:
-        stringstream() except +
-        string str()
-        ostream write(char *, size_t)
-        istream read(char *, size_t)
-        bint eof()
-
-cdef extern from "ewah.h" namespace "ewah":
-    cppclass EWAHBoolArraySetBitForwardIterator[uword]:
-        # EWAHBoolArraySetBitForwardIterator()
-        EWAHBoolArraySetBitForwardIterator(const EWAHBoolArraySetBitForwardIterator &o)
-        size_t operator*()
-        EWAHBoolArraySetBitForwardIterator &operator++()
-        bint operator==(EWAHBoolArraySetBitForwardIterator &x)
-        bint operator!=(EWAHBoolArraySetBitForwardIterator &x)
-    # ctypedef EWAHBoolArraySetBitForwardIterator[unsigned long long] const_iterator
-    cdef cppclass EWAHBoolArray[uword]:
-        # We are going to skip the varargs here; it is too tricky to assemble.
-        bint get(const size_t pos)
-        bint set(size_t i)
-        void makeSameSize(EWAHBoolArray &a)
-        vector[size_t] toArray()
-        void logicaland(EWAHBoolArray &a, EWAHBoolArray &container)
-        void logicalor(EWAHBoolArray &a, EWAHBoolArray &container)
-        void logicalxor(EWAHBoolArray &a, EWAHBoolArray &container)
-        bint intersects(EWAHBoolArray &a)
-        void reset()
-        size_t sizeInBits()
-        size_t sizeInBytes()
-        bint operator==(EWAHBoolArray &x)
-        bint operator!=(EWAHBoolArray &x)
-        void append(EWAHBoolArray &x)
-        # Recommended container is "vector[size_t]"
-        void appendRowIDs[container](container &out, const size_t offset)
-        void appendSetBits[container](container &out, const size_t offset)
-        size_t numberOfOnes()
-        void logicalnot(EWAHBoolArray &x)
-        void inplace_logicalnot()
-        void swap(EWAHBoolArray &x)
-        void read(stringstream &incoming, bint savesizeinbits)
-        void readBuffer(stringstream &incoming, const size_t buffersize)
-        void write(stringstream &out, bint savesizeinbits)
-        void writeBuffer(stringstream &out)
-        size_t addWord(uword newdata)
-        vector[uword] &getBuffer()
-        # const_iterator begin()
-        # const_iterator end()
-        EWAHBoolArraySetBitForwardIterator begin()
-        EWAHBoolArraySetBitForwardIterator end()
-
-cdef extern from "boolarray.h" namespace "ewah":
-    cppclass BoolArray[uword]:
-        void setSizeInBits(size_t sizeib)
-        void set(size_t pos)
-        void unset(size_t pos)
-        bool get(size_t pos)
-        void reset()
-        size_t sizeInBits()
-        size_t sizeInBytes()
-        size_t numberOfOnes()
-        void inplace_logicalxor(BoolArray &other)
-        void inplace_logicalnot()
-        size_t padWithZeroes(size_t totalbits)
-        uword getWord(size_t pos)
-        size_t wordinbits
-
-cimport cython
-cimport numpy as np
-
-IF UNAME_SYSNAME == "Windows":
-    ctypedef uint32_t ewah_word_type
-ELSE:
-    ctypedef np.uint32_t ewah_word_type
-ctypedef EWAHBoolArray[ewah_word_type] ewah_bool_array
-ctypedef EWAHBoolArraySetBitForwardIterator[ewah_word_type] ewah_bool_iterator
-ctypedef vector[size_t] bitset_array
-ctypedef cmap[np.uint64_t, ewah_bool_array] ewah_map
-ctypedef stringstream sstream
-ctypedef BoolArray[ewah_word_type] bool_array
diff --git a/yt/utilities/lib/ewah_bool_wrap.pxd b/yt/utilities/lib/ewah_bool_wrap.pxd
deleted file mode 100644
index cdb015d5518..00000000000
--- a/yt/utilities/lib/ewah_bool_wrap.pxd
+++ /dev/null
@@ -1,163 +0,0 @@
-cimport numpy as np
-from libcpp.pair cimport pair
-from libcpp.set cimport set as cset
-from libcpp.vector cimport vector
-
-from yt.utilities.lib.ewah_bool_array cimport (
-    ewah_bool_array,
-    ewah_bool_iterator,
-    ewah_map,
-    sstream,
-)
-
-ctypedef bint bitarrtype
-ctypedef pair[np.uint64_t, np.uint64_t] ind_pair
-
-cdef class FileBitmasks:
-    cdef np.uint32_t nfiles
-    cdef ewah_map** ewah_coll
-    cdef ewah_bool_array** ewah_keys
-    cdef ewah_bool_array** ewah_refn
-
-    cdef void _reset(self)
-    cdef bint _iseq(self, FileBitmasks solf)
-    cdef BoolArrayCollection _get_bitmask(self, np.uint32_t ifile)
-    cdef tuple _find_collisions(self, BoolArrayCollection coll, bint verbose=*)
-    cdef tuple _find_collisions_coarse(self, BoolArrayCollection coll, bint
-                verbose=*, file_list=*)
-    cdef tuple _find_collisions_refined(self, BoolArrayCollection coll, bint verbose=*)
-    cdef void _set(self, np.uint32_t ifile, np.uint64_t i1, np.uint64_t i2=*)
-    cdef void _set_coarse(self, np.uint32_t ifile, np.uint64_t i1)
-    cdef void _set_refined(self, np.uint32_t ifile, np.uint64_t i1, np.uint64_t i2)
-    cdef void _set_coarse_array(self, np.uint32_t ifile, np.uint8_t[:] arr)
-    cdef void _set_refined_array(self, np.uint32_t ifile, np.uint64_t mi1, np.uint8_t[:] arr)
-    cdef void _set_refined_index_array(self, np.uint32_t ifile, np.int64_t nsub_mi,
-                                       np.ndarray[np.uint64_t, ndim=1] sub_mi1,
-                                       np.ndarray[np.uint64_t, ndim=1] sub_mi2)
-    cdef void _set_map(self, np.uint32_t ifile, np.uint64_t i1, np.uint64_t i2)
-    cdef void _set_refn(self, np.uint32_t ifile, np.uint64_t i1)
-    cdef bint _get(self, np.uint32_t ifile, np.uint64_t i1, np.uint64_t i2=*)
-    cdef bint _get_coarse(self, np.uint32_t ifile, np.uint64_t i1)
-    cdef void _get_coarse_array(self, np.uint32_t ifile, np.uint64_t imax, np.uint8_t[:] arr) except *
-    cdef bint _isref(self, np.uint32_t ifile, np.uint64_t i)
-    cdef np.uint64_t _count_total(self, np.uint32_t ifile)
-    cdef np.uint64_t _count_refined(self, np.uint32_t ifile)
-    cdef np.uint64_t _count_coarse(self, np.uint32_t ifile)
-    cdef void _append(self, np.uint32_t ifile, BoolArrayCollection solf)
-    cdef bint _intersects(self, np.uint32_t ifile, BoolArrayCollection solf)
-    cdef void _logicalxor(self, np.uint32_t ifile, BoolArrayCollection solf, BoolArrayCollection out)
-    cdef void _logicaland(self, np.uint32_t ifile, BoolArrayCollection solf, BoolArrayCollection out)
-    cdef void _select_contaminated(self, np.uint32_t ifile, BoolArrayCollection mask, np.uint8_t[:] out,
-               np.uint8_t[:] secondary_files, BoolArrayCollection mask2=*)
-    cdef void _select_uncontaminated(self, np.uint32_t ifile, BoolArrayCollection mask, np.uint8_t[:] out,
-               BoolArrayCollection mask2=*)
-    cdef bytes _dumps(self, np.uint32_t ifile)
-    cdef bint _loads(self, np.uint32_t ifile, bytes s)
-    cdef bint _check(self)
-
-cdef class BoolArrayCollection:
-    cdef ewah_map* ewah_coll
-    cdef ewah_bool_array* ewah_keys
-    cdef ewah_bool_array* ewah_refn
-    cdef ewah_bool_array* ewah_coar
-
-    cdef void _reset(self)
-    cdef int _richcmp(self, BoolArrayCollection solf, int op) except -1
-    cdef void _set(self, np.uint64_t i1, np.uint64_t i2=*)
-    cdef void _set_coarse(self, np.uint64_t i1)
-    cdef void _set_refined(self, np.uint64_t i1, np.uint64_t i2)
-    cdef void _set_coarse_array(self, np.uint8_t[:] arr)
-    cdef void _set_refined_array(self, np.uint64_t mi1, np.uint8_t[:] arr)
-    cdef void _set_map(self, np.uint64_t i1, np.uint64_t i2)
-    cdef void _set_refn(self, np.uint64_t i1)
-    cdef bint _get(self, np.uint64_t i1, np.uint64_t i2=*)
-    cdef bint _get_coarse(self, np.uint64_t i1)
-    cdef void _get_coarse_array(self, np.uint64_t imax, np.uint8_t[:] arr) except *
-    cdef bint _contains(self, np.uint64_t i)
-    cdef bint _isref(self, np.uint64_t i)
-    cdef void _ewah_coarse(self)
-    cdef np.uint64_t _count_total(self)
-    cdef np.uint64_t _count_refined(self)
-    cdef np.uint64_t _count_coarse(self)
-    cdef void _append(self, BoolArrayCollection solf)
-    cdef void _logicalor(self, BoolArrayCollection solf, BoolArrayCollection out)
-    cdef bint _intersects(self, BoolArrayCollection solf)
-    cdef void _logicalxor(self, BoolArrayCollection solf, BoolArrayCollection out)
-    cdef void _logicaland(self, BoolArrayCollection solf, BoolArrayCollection out)
-    cdef void _select_contaminated(self, BoolArrayCollection mask, np.uint8_t[:] out,
-        BoolArrayCollection mask2=*)
-    cdef void _select_uncontaminated(self, BoolArrayCollection mask, np.uint8_t[:] out,
-        BoolArrayCollection mask2=*)
-    cdef void _get_ghost_zones(self, int ngz, int order1, int order2,
-                               bint periodicity[3], BoolArrayCollection out_ewah,
-                               bint coarse_ghosts=*)
-    cdef bytes _dumps(self)
-    cdef bint _loads(self, bytes s)
-    cdef bint _check(self)
-
-cdef class BoolArrayCollectionUncompressed:
-    cdef int nele1
-    cdef int nele2
-    cdef ewah_map* ewah_coll
-    cdef bitarrtype* ewah_keys
-    cdef bitarrtype* ewah_refn
-
-    cdef void _set(self, np.uint64_t i1, np.uint64_t i2=*)
-    cdef void _set_coarse(self, np.uint64_t i1)
-    cdef void _set_refined(self, np.uint64_t i1, np.uint64_t i2)
-    cdef void _set_coarse_array(self, np.uint8_t[:] arr)
-    cdef void _set_coarse_array_ptr(self, np.uint8_t *arr)
-    cdef void _set_refined_array(self, np.uint64_t mi1, np.uint8_t[:] arr)
-    cdef void _set_refined_array_ptr(self, np.uint64_t mi1, np.uint8_t *arr)
-    cdef void _set_map(self, np.uint64_t i1, np.uint64_t i2)
-    cdef void _set_refn(self, np.uint64_t i1)
-    cdef bint _get(self, np.uint64_t i1, np.uint64_t i2=*)
-    cdef bint _get_coarse(self, np.uint64_t i1)
-    cdef bint _isref(self, np.uint64_t i)
-    cdef np.uint64_t _count_total(self)
-    cdef np.uint64_t _count_refined(self)
-    cdef void _append(self, BoolArrayCollectionUncompressed solf)
-    cdef bint _intersects(self, BoolArrayCollectionUncompressed solf)
-    cdef void _compress(self, BoolArrayCollection solf)
-
-cdef class SparseUnorderedBitmaskSet:
-    cdef cset[np.uint64_t] entries
-    cdef void _set(self, np.uint64_t ind)
-    cdef void _fill(self, np.uint8_t[:] mask)
-    cdef void _fill_ewah(self, BoolArrayCollection mm)
-    cdef void _fill_bool(self, BoolArrayCollectionUncompressed mm)
-    cdef void _reset(self)
-    cdef to_array(self)
-
-cdef class SparseUnorderedBitmaskVector:
-    cdef int total
-    cdef vector[np.uint64_t] entries
-    cdef void _set(self, np.uint64_t ind)
-    cdef void _fill(self, np.uint8_t[:] mask)
-    cdef void _fill_ewah(self, BoolArrayCollection mm)
-    cdef void _fill_bool(self, BoolArrayCollectionUncompressed mm)
-    cdef void _reset(self)
-    cdef to_array(self)
-    cdef void _remove_duplicates(self)
-    cdef void _prune(self)
-
-cdef class SparseUnorderedRefinedBitmaskSet:
-    cdef cset[ind_pair] entries
-    cdef void _set(self, np.uint64_t ind1, np.uint64_t ind2)
-    cdef void _fill(self, np.uint8_t[:] mask1, np.uint8_t[:])
-    cdef void _fill_ewah(self, BoolArrayCollection mm)
-    cdef void _fill_bool(self, BoolArrayCollectionUncompressed mm)
-    cdef void _reset(self)
-    cdef to_array(self)
-
-cdef class SparseUnorderedRefinedBitmaskVector:
-    cdef int total
-    cdef vector[ind_pair] entries
-    cdef void _set(self, np.uint64_t ind1, np.uint64_t ind2)
-    cdef void _fill(self, np.uint8_t[:] mask1, np.uint8_t[:])
-    cdef void _fill_ewah(self, BoolArrayCollection mm)
-    cdef void _fill_bool(self, BoolArrayCollectionUncompressed mm)
-    cdef void _reset(self)
-    cdef to_array(self)
-    cdef void _remove_duplicates(self)
-    cdef void _prune(self)
diff --git a/yt/utilities/lib/ewah_bool_wrap.pyx b/yt/utilities/lib/ewah_bool_wrap.pyx
deleted file mode 100644
index 8b54dad35e0..00000000000
--- a/yt/utilities/lib/ewah_bool_wrap.pyx
+++ /dev/null
@@ -1,1769 +0,0 @@
-# distutils: language = c++
-# distutils: include_dirs = LIB_DIR_EWAH
-# distutils: extra_compile_args = CPP14_FLAG
-"""
-Wrapper for EWAH Bool Array: https://github.com/lemire/EWAHBoolArray
-
-
-
-"""
-
-
-import struct
-
-from cython.operator cimport dereference, preincrement
-from libc.stdlib cimport free, malloc
-from libcpp.algorithm cimport sort
-from libcpp.map cimport map as cmap
-
-import numpy as np
-
-cimport cython
-cimport numpy as np
-
-from yt.utilities.lib.geometry_utils cimport (
-    morton_neighbors_coarse,
-    morton_neighbors_refined,
-)
-
-
-cdef extern from "<algorithm>" namespace "std" nogil:
-    Iter unique[Iter](Iter first, Iter last)
-
-cdef np.uint64_t FLAG = ~(<np.uint64_t>0)
-cdef np.uint64_t MAX_VECTOR_SIZE = <np.uint64_t>1e7
-
-ctypedef cmap[np.uint64_t, ewah_bool_array] ewahmap
-ctypedef cmap[np.uint64_t, ewah_bool_array].iterator ewahmap_it
-ctypedef pair[np.uint64_t, ewah_bool_array] ewahmap_p
-
-cdef class FileBitmasks:
-
-    def __cinit__(self, np.uint32_t nfiles):
-        cdef int i
-        self.nfiles = nfiles
-        self.ewah_keys = <ewah_bool_array **>malloc(nfiles*sizeof(ewah_bool_array*))
-        self.ewah_refn = <ewah_bool_array **>malloc(nfiles*sizeof(ewah_bool_array*))
-        self.ewah_coll = <ewah_map **>malloc(nfiles*sizeof(ewah_map*))
-        for i in range(nfiles):
-            self.ewah_keys[i] = new ewah_bool_array()
-            self.ewah_refn[i] = new ewah_bool_array()
-            self.ewah_coll[i] = new ewah_map()
-
-    cdef void _reset(self):
-        cdef np.int32_t ifile
-        for ifile in range(self.nfiles):
-            self.ewah_keys[ifile].reset()
-            self.ewah_refn[ifile].reset()
-            self.ewah_coll[ifile].clear()
-
-    cdef bint _iseq(self, FileBitmasks solf):
-        cdef np.int32_t ifile
-        cdef ewah_bool_array* arr1
-        cdef ewah_bool_array* arr2
-        cdef ewahmap *map1
-        cdef ewahmap *map2
-        cdef ewahmap_p pair1, pair2
-        cdef ewahmap_it it_map1, it_map2
-        if self.nfiles != solf.nfiles:
-            return 0
-        for ifile in range(self.nfiles):
-            # Keys
-            arr1 = (<ewah_bool_array **> self.ewah_keys)[ifile]
-            arr2 = (<ewah_bool_array **> solf.ewah_keys)[ifile]
-            if arr1[0] != arr2[0]:
-                return 0
-            # Refn
-            arr1 = (<ewah_bool_array **> self.ewah_refn)[ifile]
-            arr2 = (<ewah_bool_array **> solf.ewah_refn)[ifile]
-            if arr1[0] != arr2[0]:
-                return 0
-            # Map
-            map1 = (<ewahmap **> self.ewah_coll)[ifile]
-            map2 = (<ewahmap **> solf.ewah_coll)[ifile]
-            for pair1 in map1[0]:
-                it_map2 = map2[0].find(pair1.first)
-                if it_map2 == map2[0].end():
-                    return 0
-                if pair1.second != dereference(it_map2).second:
-                    return 0
-            for pair2 in map2[0]:
-                it_map1 = map1[0].find(pair2.first)
-                if it_map1 == map1[0].end():
-                    return 0
-                if pair2.second != dereference(it_map1).second:
-                    return 0
-            # Match
-            return 1
-
-    def iseq(self, solf):
-        return self._iseq(solf)
-
-    cdef BoolArrayCollection _get_bitmask(self, np.uint32_t ifile):
-        cdef BoolArrayCollection out = BoolArrayCollection()
-        cdef ewah_bool_array **ewah_keys = <ewah_bool_array **>self.ewah_keys
-        cdef ewah_bool_array **ewah_refn = <ewah_bool_array **>self.ewah_refn
-        cdef ewah_map **ewah_coll = <ewah_map **>self.ewah_coll
-        # This version actually copies arrays, which can be costly
-        cdef ewah_bool_array *ewah_keys_out = <ewah_bool_array *>out.ewah_keys
-        cdef ewah_bool_array *ewah_refn_out = <ewah_bool_array *>out.ewah_refn
-        cdef ewah_map *ewah_coll_out = <ewah_map *>out.ewah_coll
-        ewah_keys_out[0] = ewah_keys[ifile][0]
-        ewah_refn_out[0] = ewah_refn[ifile][0]
-        ewah_coll_out[0] = ewah_coll[ifile][0]
-        # This version only copies pointers which can lead to deallocation of
-        # the source when the copy is deleted.
-        # out.ewah_keys = <void *>ewah_keys[ifile]
-        # out.ewah_refn = <void *>ewah_refn[ifile]
-        # out.ewah_coll = <void *>ewah_coll[ifile]
-        return out
-
-    cdef tuple _find_collisions(self, BoolArrayCollection coll, bint verbose = 0):
-        cdef tuple cc, cr
-        cc = self._find_collisions_coarse(coll, verbose)
-        cr = self._find_collisions_refined(coll, verbose)
-        return cc, cr
-
-    cdef tuple _find_collisions_coarse(self, BoolArrayCollection coll, bint
-                        verbose = 0, file_list = None):
-        cdef np.int32_t ifile
-        cdef ewah_bool_array arr_two, arr_swap, arr_keys, arr_refn
-        cdef ewah_bool_array* iarr
-        cdef ewah_bool_array* coll_keys
-        cdef ewah_bool_array* coll_refn
-        coll_keys = (<ewah_bool_array*> coll.ewah_keys)
-        coll_refn = (<ewah_bool_array*> coll.ewah_refn)
-        if file_list is None:
-            file_list = range(self.nfiles)
-        for ifile in file_list:
-            iarr = (<ewah_bool_array **>self.ewah_keys)[ifile]
-            arr_keys.logicaland(iarr[0], arr_two)
-            arr_keys.logicalor(iarr[0], arr_swap)
-            arr_keys.swap(arr_swap)
-            arr_refn.logicalor(arr_two, arr_swap)
-            arr_refn.swap(arr_swap)
-        coll_keys[0].swap(arr_keys)
-        coll_refn[0].swap(arr_refn)
-        # Print
-        cdef int nc, nm
-        nc = coll_refn[0].numberOfOnes()
-        nm = coll_keys[0].numberOfOnes()
-        cdef tuple nout = (nc, nm)
-        if verbose == 1:
-            print("{: 10d}/{: 10d} collisions at coarse refinement.  ({: 10.5f}%)".format(nc,nm,100.0*float(nc)/nm))
-        return nout
-
-    cdef tuple _find_collisions_refined(self, BoolArrayCollection coll, bint verbose = 0):
-        cdef np.int32_t ifile
-        cdef ewah_bool_array iarr, arr_two, arr_swap
-        cdef ewah_bool_array* coll_refn
-        cdef cmap[np.uint64_t, ewah_bool_array] map_keys, map_refn
-        cdef cmap[np.uint64_t, ewah_bool_array]* coll_coll
-        cdef cmap[np.uint64_t, ewah_bool_array]* map_bitmask
-        coll_refn = <ewah_bool_array*> coll.ewah_refn
-        if coll_refn[0].numberOfOnes() == 0:
-            if verbose == 1:
-                print("{: 10d}/{: 10d} collisions at refined refinement. ({: 10.5f}%)".format(0,0,0))
-            return (0,0)
-        coll_coll = <cmap[np.uint64_t, ewah_bool_array]*> coll.ewah_coll
-        for ifile in range(self.nfiles):
-            map_bitmask = (<cmap[np.uint64_t, ewah_bool_array]**> self.ewah_coll)[ifile]
-            for it_mi1 in map_bitmask[0]:
-                mi1 = it_mi1.first
-                iarr = it_mi1.second
-                map_keys[mi1].logicaland(iarr, arr_two)
-                map_keys[mi1].logicalor(iarr, arr_swap)
-                map_keys[mi1].swap(arr_swap)
-                map_refn[mi1].logicalor(arr_two, arr_swap)
-                map_refn[mi1].swap(arr_swap)
-        coll_coll[0] = map_refn
-        # Count
-        cdef int nc, nm
-        nc = 0
-        nm = 0
-        for it_mi1 in map_refn:
-            mi1 = it_mi1.first
-            iarr = it_mi1.second
-            nc += iarr.numberOfOnes()
-            iarr = map_keys[mi1]
-            nm += iarr.numberOfOnes()
-        cdef tuple nout = (nc, nm)
-        # Print
-        if verbose == 1:
-            if nm == 0:
-                print("{: 10d}/{: 10d} collisions at refined refinement. ({: 10.5f}%)".format(nc,nm,0.0))
-            else:
-                print("{: 10d}/{: 10d} collisions at refined refinement. ({: 10.5f}%)".format(nc,nm,100.0*float(nc)/nm))
-        return nout
-
-    cdef void _set(self, np.uint32_t ifile, np.uint64_t i1, np.uint64_t i2 = FLAG):
-        cdef ewah_bool_array *ewah_keys = (<ewah_bool_array **> self.ewah_keys)[ifile]
-        cdef ewah_bool_array *ewah_refn = (<ewah_bool_array **> self.ewah_refn)[ifile]
-        cdef ewah_map *ewah_coll = (<ewah_map **> self.ewah_coll)[ifile]
-        ewah_keys[0].set(i1)
-        if i2 != FLAG:
-            ewah_refn[0].set(i1)
-            ewah_coll[0][i1].set(i2)
-
-    cdef void _set_coarse(self, np.uint32_t ifile, np.uint64_t i1):
-        cdef ewah_bool_array *ewah_keys = (<ewah_bool_array **> self.ewah_keys)[ifile]
-        ewah_keys[0].set(i1)
-
-    cdef void _set_refined(self, np.uint32_t ifile, np.uint64_t i1, np.uint64_t i2):
-        cdef ewah_bool_array *ewah_refn = (<ewah_bool_array **> self.ewah_refn)[ifile]
-        cdef ewah_map *ewah_coll = (<ewah_map **> self.ewah_coll)[ifile]
-        ewah_refn[0].set(i1)
-        ewah_coll[0][i1].set(i2)
-
-    @cython.boundscheck(False)
-    @cython.wraparound(False)
-    @cython.cdivision(True)
-    @cython.initializedcheck(False)
-    cdef void _set_coarse_array(self, np.uint32_t ifile, np.uint8_t[:] arr):
-        cdef ewah_bool_array *ewah_keys = (<ewah_bool_array **> self.ewah_keys)[ifile]
-        cdef np.uint64_t i1
-        for i1 in range(arr.shape[0]):
-            if arr[i1] == 1:
-                ewah_keys[0].set(i1)
-
-    @cython.boundscheck(False)
-    @cython.wraparound(False)
-    @cython.cdivision(True)
-    @cython.initializedcheck(False)
-    cdef void _set_refined_array(self, np.uint32_t ifile, np.uint64_t i1, np.uint8_t[:] arr):
-        cdef ewah_bool_array *ewah_refn = (<ewah_bool_array **> self.ewah_refn)[ifile]
-        cdef ewah_map *ewah_coll = (<ewah_map **> self.ewah_coll)[ifile]
-        cdef np.uint64_t i2
-        for i2 in range(arr.shape[0]):
-            if arr[i2] == 1:
-                ewah_refn[0].set(i1)
-                ewah_coll[0][i1].set(i2)
-
-    @cython.boundscheck(False)
-    @cython.wraparound(False)
-    @cython.cdivision(True)
-    @cython.initializedcheck(False)
-    cdef void _set_refined_index_array(self, np.uint32_t ifile, np.int64_t nsub_mi,
-                                       np.ndarray[np.uint64_t, ndim=1] sub_mi1,
-                                       np.ndarray[np.uint64_t, ndim=1] sub_mi2):
-        cdef np.ndarray[np.int64_t, ndim=1] ind = np.lexsort((sub_mi2[:nsub_mi],
-                                                              sub_mi1[:nsub_mi]))
-        cdef np.int64_t i, p
-        cdef BoolArrayCollection temp
-        if self._count_refined(ifile) == 0:
-            # Add to file bitmask in order
-            for i in range(nsub_mi):
-                p = ind[i]
-                self._set_refined(ifile, sub_mi1[p], sub_mi2[p])
-        else:
-            # Add to dummy bitmask in order, then combine
-            temp = BoolArrayCollection()
-            for i in range(nsub_mi):
-                p = ind[i]
-                temp._set_coarse(sub_mi1[p])
-                temp._set_refined(sub_mi1[p], sub_mi2[p])
-                self._append(ifile, temp)
-
-    cdef void _set_map(self, np.uint32_t ifile, np.uint64_t i1, np.uint64_t i2):
-        cdef ewah_map *ewah_coll = (<ewah_map **> self.ewah_coll)[ifile]
-        ewah_coll[0][i1].set(i2)
-
-    cdef void _set_refn(self, np.uint32_t ifile, np.uint64_t i1):
-        cdef ewah_bool_array *ewah_refn = (<ewah_bool_array **> self.ewah_refn)[ifile]
-        ewah_refn[0].set(i1)
-
-    cdef bint _get(self, np.uint32_t ifile, np.uint64_t i1, np.uint64_t i2 = FLAG):
-        cdef ewah_bool_array *ewah_keys = (<ewah_bool_array **> self.ewah_keys)[ifile]
-        cdef ewah_bool_array *ewah_refn = (<ewah_bool_array **> self.ewah_refn)[ifile]
-        cdef ewah_map *ewah_coll = (<ewah_map **> self.ewah_coll)[ifile]
-        if (ewah_keys[0].get(i1) == 0): return 0
-        if (i2 == FLAG) or (ewah_refn[0].get(i1) == 0):
-            return 1
-        return ewah_coll[0][i1].get(i2)
-
-    cdef bint _get_coarse(self, np.uint32_t ifile, np.uint64_t i1):
-        cdef ewah_bool_array *ewah_keys = (<ewah_bool_array **> self.ewah_keys)[ifile]
-        return ewah_keys[0].get(i1)
-
-    @cython.boundscheck(False)
-    @cython.wraparound(False)
-    @cython.cdivision(True)
-    @cython.initializedcheck(False)
-    cdef void _get_coarse_array(self, np.uint32_t ifile, np.uint64_t imax,
-                                np.uint8_t[:] arr) except *:
-        cdef ewah_bool_array *ewah_keys = (<ewah_bool_array **> self.ewah_keys)[ifile]
-        cdef ewah_bool_iterator *iter_set = new ewah_bool_iterator(ewah_keys[0].begin())
-        cdef ewah_bool_iterator *iter_end = new ewah_bool_iterator(ewah_keys[0].end())
-        cdef np.uint64_t iset
-        while iter_set[0] != iter_end[0]:
-            iset = dereference(iter_set[0])
-            if iset >= imax:
-                raise IndexError("Index {} exceedes max {}.".format(iset, imax))
-            arr[iset] = 1
-            preincrement(iter_set[0])
-
-    cdef bint _isref(self, np.uint32_t ifile, np.uint64_t i):
-        cdef ewah_bool_array *ewah_refn = (<ewah_bool_array **> self.ewah_refn)[ifile]
-        return ewah_refn[0].get(i)
-
-    def count_coarse(self, ifile):
-        return self._count_coarse(ifile)
-
-    def count_total(self, ifile):
-        return self._count_total(ifile)
-
-    def count_refined(self, ifile):
-        return self._count_refined(ifile)
-
-    cdef np.uint64_t _count_coarse(self, np.uint32_t ifile):
-        return self._count_total(ifile) - self._count_refined(ifile)
-
-    cdef np.uint64_t _count_total(self, np.uint32_t ifile):
-        cdef ewah_bool_array *ewah_keys = (<ewah_bool_array **> self.ewah_keys)[ifile]
-        cdef np.uint64_t out = ewah_keys[0].numberOfOnes()
-        return out
-
-    cdef np.uint64_t _count_refined(self, np.uint32_t ifile):
-        cdef ewah_bool_array *ewah_refn = (<ewah_bool_array **> self.ewah_refn)[ifile]
-        cdef np.uint64_t out = ewah_refn[0].numberOfOnes()
-        return out
-
-    def append(self, np.uint32_t ifile, BoolArrayCollection solf):
-        if solf is None: return
-        self._append(ifile, solf)
-
-    cdef void _append(self, np.uint32_t ifile, BoolArrayCollection solf):
-        cdef ewah_bool_array *ewah_keys1 = (<ewah_bool_array **> self.ewah_keys)[ifile]
-        cdef ewah_bool_array *ewah_refn1 = (<ewah_bool_array **> self.ewah_refn)[ifile]
-        cdef ewah_map *ewah_coll1 = (<ewah_map **> self.ewah_coll)[ifile]
-        cdef ewah_bool_array *ewah_keys2 = <ewah_bool_array *> solf.ewah_keys
-        cdef ewah_bool_array *ewah_refn2 = <ewah_bool_array *> solf.ewah_refn
-        cdef ewahmap *ewah_coll2 = <ewahmap *> solf.ewah_coll
-        cdef ewahmap_it it_map1, it_map2
-        cdef ewah_bool_array swap, mi1_ewah1, mi1_ewah2
-        cdef np.uint64_t mi1
-        # Keys
-        ewah_keys1[0].logicalor(ewah_keys2[0], swap)
-        ewah_keys1[0].swap(swap)
-        # Refined
-        ewah_refn1[0].logicalor(ewah_refn2[0], swap)
-        ewah_refn1[0].swap(swap)
-        # Map
-        it_map2 = ewah_coll2[0].begin()
-        while it_map2 != ewah_coll2[0].end():
-            mi1 = dereference(it_map2).first
-            mi1_ewah2 = dereference(it_map2).second
-            it_map1 = ewah_coll1[0].find(mi1)
-            if it_map1 == ewah_coll1[0].end():
-                ewah_coll1[0][mi1] = mi1_ewah2
-            else:
-                mi1_ewah1 = dereference(it_map1).second
-                mi1_ewah1.logicalor(mi1_ewah2, swap)
-                mi1_ewah1.swap(swap)
-            preincrement(it_map2)
-
-    cdef bint _intersects(self, np.uint32_t ifile, BoolArrayCollection solf):
-        cdef ewah_bool_array *ewah_keys1 = (<ewah_bool_array **> self.ewah_keys)[ifile]
-        cdef ewah_bool_array *ewah_refn1 = (<ewah_bool_array **> self.ewah_refn)[ifile]
-        cdef ewah_map *ewah_coll1 = (<ewah_map **> self.ewah_coll)[ifile]
-        cdef ewah_bool_array *ewah_keys2 = <ewah_bool_array *> solf.ewah_keys
-        cdef ewah_bool_array *ewah_refn2 = <ewah_bool_array *> solf.ewah_refn
-        cdef ewahmap *ewah_coll2 = <ewahmap *> solf.ewah_coll
-        cdef ewahmap_it it_map1, it_map2
-        cdef ewah_bool_array mi1_ewah1, mi1_ewah2
-        cdef np.uint64_t mi1
-        cdef ewah_bool_array ewah_coar1, ewah_coar2
-        # No intersection
-        if ewah_keys1[0].intersects(ewah_keys2[0]) == 0:
-            return 0
-        # Intersection at coarse level
-        ewah_keys1[0].logicalxor(ewah_refn1[0],ewah_coar1)
-        ewah_keys2[0].logicalxor(ewah_refn2[0],ewah_coar2)
-        if ewah_coar1.intersects(ewah_keys2[0]) == 1:
-            return 1
-        if ewah_coar2.intersects(ewah_keys1[0]) == 1:
-            return 1
-        # Intersection at refined level
-        if ewah_refn1[0].intersects(ewah_refn2[0]) == 1:
-            it_map1 = ewah_coll1[0].begin()
-            while (it_map1 != ewah_coll1[0].end()):
-                mi1 = dereference(it_map1).first
-                it_map2 = ewah_coll2[0].find(mi1)
-                if it_map2 != ewah_coll2[0].end():
-                    mi1_ewah1 = dereference(it_map1).second
-                    mi1_ewah2 = dereference(it_map2).second
-                    if mi1_ewah1.intersects(mi1_ewah2):
-                        return 1
-                preincrement(it_map1)
-        return 0
-
-    cdef void _logicalxor(self, np.uint32_t ifile, BoolArrayCollection solf, BoolArrayCollection out):
-        cdef ewah_bool_array *ewah_keys1 = (<ewah_bool_array **> self.ewah_keys)[ifile]
-        cdef ewah_bool_array *ewah_refn1 = (<ewah_bool_array **> self.ewah_refn)[ifile]
-        cdef ewah_map *ewah_coll1 = (<ewah_map **> self.ewah_coll)[ifile]
-        cdef ewah_bool_array *ewah_keys2 = <ewah_bool_array *> solf.ewah_keys
-        cdef ewah_bool_array *ewah_refn2 = <ewah_bool_array *> solf.ewah_refn
-        cdef ewahmap *ewah_coll2 = <ewahmap *> solf.ewah_coll
-        cdef ewah_bool_array *ewah_keys_out = <ewah_bool_array *> out.ewah_keys
-        cdef ewah_bool_array *ewah_refn_out = <ewah_bool_array *> out.ewah_refn
-        cdef ewah_map *ewah_coll_out = <ewah_map *> out.ewah_coll
-        cdef ewahmap_it it_map1, it_map2
-        cdef ewah_bool_array mi1_ewah1, mi1_ewah2, swap
-        cdef np.uint64_t mi1
-        # Keys
-        ewah_keys1[0].logicalxor(ewah_keys2[0],ewah_keys_out[0])
-        # Refn
-        ewah_refn1[0].logicalxor(ewah_refn2[0],ewah_refn_out[0])
-        # Coll
-        it_map1 = ewah_coll1[0].begin()
-        while (it_map1 != ewah_coll1[0].end()):
-            mi1 = dereference(it_map1).first
-            mi1_ewah1 = dereference(it_map1).second
-            it_map2 = ewah_coll2[0].find(mi1)
-            if it_map2 == ewah_coll2[0].end():
-                ewah_coll_out[0][mi1] = mi1_ewah1
-            else:
-                mi1_ewah2 = dereference(it_map2).second
-                mi1_ewah1.logicalxor(mi1_ewah2, swap)
-                ewah_coll_out[0][mi1] = swap
-            preincrement(it_map1)
-        it_map2 = ewah_coll2[0].begin()
-        while (it_map2 != ewah_coll2[0].end()):
-            mi1 = dereference(it_map2).first
-            mi1_ewah2 = dereference(it_map2).second
-            it_map1 = ewah_coll1[0].find(mi1)
-            if it_map1 == ewah_coll1[0].end():
-                ewah_coll_out[0][mi1] = mi1_ewah2
-            preincrement(it_map2)
-
-    def logicalxor(self, ifile, solf, out):
-        return self._logicalxor(ifile, solf, out)
-
-    cdef void _logicaland(self, np.uint32_t ifile, BoolArrayCollection solf, BoolArrayCollection out):
-        cdef ewah_bool_array *ewah_keys1 = (<ewah_bool_array **> self.ewah_keys)[ifile]
-        cdef ewah_bool_array *ewah_refn1 = (<ewah_bool_array **> self.ewah_refn)[ifile]
-        cdef ewah_map *ewah_coll1 = (<ewah_map **> self.ewah_coll)[ifile]
-        cdef ewah_bool_array *ewah_keys2 = <ewah_bool_array *> solf.ewah_keys
-        cdef ewah_bool_array *ewah_refn2 = <ewah_bool_array *> solf.ewah_refn
-        cdef ewahmap *ewah_coll2 = <ewahmap *> solf.ewah_coll
-        cdef ewah_bool_array *ewah_keys_out = <ewah_bool_array *> out.ewah_keys
-        cdef ewah_bool_array *ewah_refn_out = <ewah_bool_array *> out.ewah_refn
-        cdef ewah_map *ewah_coll_out = <ewah_map *> out.ewah_coll
-        cdef ewahmap_it it_map1, it_map2
-        cdef ewah_bool_array mi1_ewah1, mi1_ewah2, swap
-        cdef np.uint64_t mi1
-        # Keys
-        ewah_keys1[0].logicaland(ewah_keys2[0],ewah_keys_out[0])
-        # Refn
-        ewah_refn1[0].logicaland(ewah_refn2[0],ewah_refn_out[0])
-        # Coll
-        if ewah_refn_out[0].numberOfOnes() > 0:
-            it_map1 = ewah_coll1[0].begin()
-            while (it_map1 != ewah_coll1[0].end()):
-                mi1 = dereference(it_map1).first
-                it_map2 = ewah_coll2[0].find(mi1)
-                if it_map2 != ewah_coll2[0].end():
-                    mi1_ewah1 = dereference(it_map1).second
-                    mi1_ewah2 = dereference(it_map2).second
-                    mi1_ewah1.logicaland(mi1_ewah2, swap)
-                    ewah_coll_out[0][mi1] = swap
-                preincrement(it_map1)
-
-    def logicaland(self, ifile, solf, out):
-        return self._logicaland(ifile, solf, out)
-
-    cdef void _select_contaminated(self, np.uint32_t ifile,
-                                   BoolArrayCollection mask, np.uint8_t[:] out,
-                                   np.uint8_t[:] secondary_files,
-                                   BoolArrayCollection mask2 = None):
-        # Fill mask at indices owned by this file that are also contaminated by
-        # other files.
-        cdef ewah_bool_array *ewah_refn = (<ewah_bool_array **> self.ewah_refn)[ifile]
-        cdef ewah_bool_array ewah_mask
-        cdef ewah_bool_array *ewah_mask1
-        cdef ewah_bool_array *ewah_mask2
-        cdef ewah_bool_array ewah_slct
-        cdef ewah_bool_array *ewah_file
-        cdef np.uint64_t iset
-        # Merge masks as necessary
-        if mask2 is None:
-            ewah_mask = (<ewah_bool_array *> mask.ewah_keys)[0]
-        else:
-            ewah_mask1 = <ewah_bool_array *> mask.ewah_keys
-            ewah_mask2 = <ewah_bool_array *> mask2.ewah_keys
-            ewah_mask1[0].logicalor(ewah_mask2[0],ewah_mask)
-        # Get just refined cells owned by this file
-        ewah_mask.logicaland(ewah_refn[0], ewah_slct)
-        # Set array values
-        cdef ewah_bool_iterator *iter_set = new ewah_bool_iterator(ewah_slct.begin())
-        cdef ewah_bool_iterator *iter_end = new ewah_bool_iterator(ewah_slct.end())
-        while iter_set[0] != iter_end[0]:
-            iset = dereference(iter_set[0])
-            out[iset] = 1
-            preincrement(iter_set[0])
-        # Find files that intersect this one
-        cdef np.uint32_t isfile
-        for isfile in range(self.nfiles):
-            if isfile == ifile: continue
-            ewah_file = (<ewah_bool_array **> self.ewah_keys)[isfile]
-            if ewah_slct.intersects(ewah_file[0]) == 1:
-                secondary_files[isfile] = 1
-
-    cdef void _select_uncontaminated(self, np.uint32_t ifile,
-                                     BoolArrayCollection mask, np.uint8_t[:] out,
-                                     BoolArrayCollection mask2 = None):
-        # Fill mask at indices that are owned by this file and no other.
-        cdef ewah_bool_array *ewah_keys = (<ewah_bool_array **> self.ewah_keys)[ifile]
-        cdef ewah_bool_array *ewah_refn = (<ewah_bool_array **> self.ewah_refn)[ifile]
-        cdef ewah_bool_array ewah_mask
-        cdef ewah_bool_array *ewah_mask1
-        cdef ewah_bool_array *ewah_mask2
-        cdef ewah_bool_array ewah_slct
-        cdef ewah_bool_array ewah_coar
-        cdef np.uint64_t iset
-        # Merge masks if necessary
-        if mask2 is None:
-            ewah_mask = (<ewah_bool_array *> mask.ewah_keys)[0]
-        else:
-            ewah_mask1 = <ewah_bool_array *> mask.ewah_keys
-            ewah_mask2 = <ewah_bool_array *> mask2.ewah_keys
-            ewah_mask1[0].logicalor(ewah_mask2[0],ewah_mask)
-        # Get coarse cells owned by this file
-        ewah_keys[0].logicalxor(ewah_refn[0],ewah_coar)
-        ewah_coar.logicaland(ewah_mask,ewah_slct)
-        # Set array elements
-        cdef ewah_bool_iterator *iter_set = new ewah_bool_iterator(ewah_slct.begin())
-        cdef ewah_bool_iterator *iter_end = new ewah_bool_iterator(ewah_slct.end())
-        while iter_set[0] != iter_end[0]:
-            iset = dereference(iter_set[0])
-            out[iset] = 1
-            preincrement(iter_set[0])
-
-    cdef bytes _dumps(self, np.uint32_t ifile):
-        # TODO: write word size
-        cdef sstream ss
-        cdef ewah_bool_array *ewah_keys = (<ewah_bool_array **> self.ewah_keys)[ifile]
-        cdef ewah_bool_array *ewah_refn = (<ewah_bool_array **> self.ewah_refn)[ifile]
-        cdef ewah_map *ewah_coll = (<ewah_map **> self.ewah_coll)[ifile]
-        cdef ewahmap_it it_map
-        cdef np.uint64_t nrefn, mi1
-        cdef ewah_bool_array mi1_ewah
-        # Write mi1 ewah & refinement ewah
-        ewah_keys[0].write(ss,1)
-        ewah_refn[0].write(ss,1)
-        # Number of refined bool arrays
-        nrefn = <np.uint64_t>(ewah_refn[0].numberOfOnes())
-        ss.write(<const char *> &nrefn, sizeof(nrefn))
-        # Loop over refined bool arrays
-        it_map = ewah_coll[0].begin()
-        while it_map != ewah_coll[0].end():
-            mi1 = dereference(it_map).first
-            mi1_ewah = dereference(it_map).second
-            ss.write(<const char *> &mi1, sizeof(mi1))
-            mi1_ewah.write(ss,1)
-            preincrement(it_map)
-        # Return type cast python bytes string
-        return <bytes>ss.str()
-
-    cdef bint _loads(self, np.uint32_t ifile, bytes s):
-        # TODO: write word size
-        cdef sstream ss
-        cdef ewah_bool_array *ewah_keys = (<ewah_bool_array **> self.ewah_keys)[ifile]
-        cdef ewah_bool_array *ewah_refn = (<ewah_bool_array **> self.ewah_refn)[ifile]
-        cdef ewah_map *ewah_coll = (<ewah_map **> self.ewah_coll)[ifile]
-        cdef np.uint64_t nrefn, mi1
-        nrefn = mi1 = 0
-        # Write string to string stream
-        if len(s) == 0: return 1
-        ss.write(s, len(s))
-        # Read keys and refinement arrays
-        ewah_keys[0].read(ss,1)
-        if ss.eof(): return 1
-        ewah_refn[0].read(ss,1)
-        # Read and check number of refined cells
-        ss.read(<char *> (&nrefn), sizeof(nrefn))
-        if nrefn != ewah_refn[0].numberOfOnes():
-            raise Exception("Error in read. File indicates {} refinements, but bool array has {}.".format(nrefn,ewah_refn[0].numberOfOnes()))
-        # Loop over refined cells
-        for _ in range(nrefn):
-            ss.read(<char *> (&mi1), sizeof(mi1))
-            if ss.eof(): return 1
-            ewah_coll[0][mi1].read(ss,1)
-            # or...
-            #mi1_ewah.read(ss,1)
-            #ewah_coll[0][mi1].swap(mi1_ewah)
-        return 1
-
-    cdef bint _check(self):
-        cdef np.uint32_t ifile
-        cdef ewah_bool_array *ewah_keys
-        cdef ewah_bool_array *ewah_refn
-        cdef ewah_bool_array tmp1, tmp2
-        cdef np.uint64_t nchk
-        cdef str msg
-        # Check individual files
-        for ifile in range(self.nfiles):
-            ewah_keys = (<ewah_bool_array **> self.ewah_keys)[ifile]
-            ewah_refn = (<ewah_bool_array **> self.ewah_refn)[ifile]
-            # Check that there are not any refn that are not keys
-            ewah_keys[0].logicalxor(ewah_refn[0], tmp1)
-            ewah_refn[0].logicaland(tmp1, tmp2)
-            nchk = tmp2.numberOfOnes()
-            if nchk > 0:
-                msg = "File {}: There are {} refined cells that are not set on coarse level.".format(ifile,nchk)
-                print(msg)
-                return 0
-                # raise Exception(msg)
-        return 1
-
-    def check(self):
-        return self._check()
-
-    def __dealloc__(self):
-        for ifile in range(self.nfiles):
-            del self.ewah_keys[ifile]
-            del self.ewah_refn[ifile]
-            del self.ewah_coll[ifile]
-
-    def print_info(self, ifile, prefix=''):
-        print("{}{: 8d} coarse, {: 8d} refined, {: 8d} total".format(
-            prefix,
-            self._count_coarse(ifile),
-            self._count_refined(ifile),
-            self._count_total(ifile)))
-
-cdef class BoolArrayCollection:
-
-    def __cinit__(self):
-        self.ewah_keys = new ewah_bool_array()
-        self.ewah_refn = new ewah_bool_array()
-        self.ewah_coar = new ewah_bool_array()
-        self.ewah_coll = new ewah_map()
-
-    cdef void _reset(self):
-        self.ewah_keys[0].reset()
-        self.ewah_refn[0].reset()
-        self.ewah_coar[0].reset()
-        self.ewah_coll[0].clear()
-
-    cdef int _richcmp(self, BoolArrayCollection solf, int op) except -1:
-
-        cdef ewah_bool_array *arr1
-        cdef ewah_bool_array *arr2
-        cdef ewahmap *map1
-        cdef ewahmap *map2
-        cdef ewahmap_it it_map1, it_map2
-        # ==
-        if op == 2:
-            # Keys
-            arr1 = <ewah_bool_array *> self.ewah_keys
-            arr2 = <ewah_bool_array *> solf.ewah_keys
-            if arr1[0] != arr2[0]:
-                return 0
-            # Refn
-            arr1 = <ewah_bool_array *> self.ewah_refn
-            arr2 = <ewah_bool_array *> solf.ewah_refn
-            if arr1[0] != arr2[0]:
-                return 0
-            # Map
-            map1 = <ewahmap *> self.ewah_coll
-            map2 = <ewahmap *> solf.ewah_coll
-            it_map1 = map1[0].begin()
-            while (it_map1 != map1[0].end()):
-                it_map2 = map2[0].find(dereference(it_map1).first)
-                if it_map2 == map2[0].end():
-                    return 0
-                if dereference(it_map1).second != dereference(it_map2).second:
-                    return 0
-                preincrement(it_map1)
-            it_map2 =map2[0].begin()
-            while (it_map2 != map2[0].end()):
-                it_map1 = map1[0].find(dereference(it_map2).first)
-                if it_map1 == map1[0].end():
-                    return 0
-                if dereference(it_map2).second != dereference(it_map1).second:
-                    return 0
-                preincrement(it_map2)
-            # Match
-            return 1
-        # !=
-        elif op == 3:
-            if self._richcmp(solf, 2) == 1:
-                return 0
-            return 1
-        else:
-            return -1
-            # options = ['<','<=','==','!=','>','>=']
-            # raise NotImplementedError("Operator {} is not yet implemented.".format(options[op]))
-
-    def __richcmp__(BoolArrayCollection self, BoolArrayCollection solf, int op):
-        if self._richcmp(solf, op) == 1:
-            return True
-        else:
-            return False
-
-    cdef void _set(self, np.uint64_t i1, np.uint64_t i2 = FLAG):
-        cdef ewah_bool_array *ewah_keys = <ewah_bool_array *> self.ewah_keys
-        cdef ewah_bool_array *ewah_refn = <ewah_bool_array *> self.ewah_refn
-        cdef ewah_map *ewah_coll = <ewah_map *> self.ewah_coll
-        ewah_keys[0].set(i1)
-        # Note the 0 here, for dereferencing
-        if i2 != FLAG:
-            ewah_refn[0].set(i1)
-            ewah_coll[0][i1].set(i2)
-
-    def set(self, i1, i2 = FLAG):
-        self._set(i1, i2)
-
-    @cython.boundscheck(False)
-    @cython.wraparound(False)
-    @cython.cdivision(True)
-    @cython.initializedcheck(False)
-    def set_from(self, np.uint64_t[:] ids):
-        cdef ewah_bool_array *ewah_keys = <ewah_bool_array *> self.ewah_keys
-        cdef np.uint64_t i
-        cdef np.uint64_t last = 0
-        for i in range(ids.shape[0]):
-            if ids[i] < last:
-                raise RuntimeError
-            self._set(ids[i])
-            last = ids[i]
-        print("Set from %s array and ended up with %s bytes" % (
-            ids.size, ewah_keys[0].sizeInBytes()))
-
-    cdef void _set_coarse(self, np.uint64_t i1):
-        cdef ewah_bool_array *ewah_keys = <ewah_bool_array *> self.ewah_keys
-        ewah_keys[0].set(i1)
-
-    def set_coarse(self, i1):
-        return self._set_coarse(i1)
-
-    cdef void _set_refined(self, np.uint64_t i1, np.uint64_t i2):
-        cdef ewah_bool_array *ewah_refn = <ewah_bool_array *> self.ewah_refn
-        cdef ewah_map *ewah_coll = <ewah_map *> self.ewah_coll
-        # Note the 0 here, for dereferencing
-        ewah_refn[0].set(i1)
-        ewah_coll[0][i1].set(i2)
-
-    @cython.boundscheck(False)
-    @cython.wraparound(False)
-    @cython.cdivision(True)
-    @cython.initializedcheck(False)
-    cdef void _set_coarse_array(self, np.uint8_t[:] arr):
-        cdef ewah_bool_array *ewah_keys = <ewah_bool_array *> self.ewah_keys
-        cdef np.uint64_t i1
-        for i1 in range(arr.shape[0]):
-            if arr[i1] == 1:
-                ewah_keys[0].set(i1)
-                # self._set_coarse(i1)
-
-    @cython.boundscheck(False)
-    @cython.wraparound(False)
-    @cython.cdivision(True)
-    @cython.initializedcheck(False)
-    cdef void _set_refined_array(self, np.uint64_t i1, np.uint8_t[:] arr):
-        cdef ewah_bool_array *ewah_refn = <ewah_bool_array *> self.ewah_refn
-        cdef ewah_map *ewah_coll = <ewah_map *> self.ewah_coll
-        cdef np.uint64_t i2
-        for i2 in range(arr.shape[0]):
-            if arr[i2] == 1:
-                ewah_refn[0].set(i1)
-                ewah_coll[0][i1].set(i2)
-                # self._set_refined(i1, i2)
-
-    def set_refined(self, i1, i2):
-        return self._set_refined(i1, i2)
-
-    cdef void _set_map(self, np.uint64_t i1, np.uint64_t i2):
-        cdef ewah_map *ewah_coll = <ewah_map *> self.ewah_coll
-        ewah_coll[0][i1].set(i2)
-
-    def set_map(self, i1, i2):
-        self._set_map(i1, i2)
-
-    cdef void _set_refn(self, np.uint64_t i1):
-        cdef ewah_bool_array *ewah_refn = <ewah_bool_array *> self.ewah_refn
-        ewah_refn[0].set(i1)
-
-    def set_refn(self, i1):
-        self._set_refn(i1)
-
-    cdef bint _get(self, np.uint64_t i1, np.uint64_t i2 = FLAG):
-        cdef ewah_bool_array *ewah_keys = <ewah_bool_array *> self.ewah_keys
-        cdef ewah_bool_array *ewah_refn = <ewah_bool_array *> self.ewah_refn
-        cdef ewah_map *ewah_coll = <ewah_map *> self.ewah_coll
-        # Note the 0 here, for dereferencing
-        if (ewah_keys[0].get(i1) == 0): return 0
-        if (ewah_refn[0].get(i1) == 0) or (i2 == FLAG):
-            return 1
-        return ewah_coll[0][i1].get(i2)
-
-    def get(self, i1, i2 = FLAG):
-        return self._get(i1, i2)
-
-    cdef bint _get_coarse(self, np.uint64_t i1):
-        cdef ewah_bool_array *ewah_keys = <ewah_bool_array *> self.ewah_keys
-        return ewah_keys[0].get(i1)
-
-    def get_coarse(self, i1):
-        return self._get_coarse(i1)
-
-    @cython.boundscheck(False)
-    @cython.wraparound(False)
-    @cython.cdivision(True)
-    @cython.initializedcheck(False)
-    cdef void _get_coarse_array(self, np.uint64_t imax, np.uint8_t[:] arr) except *:
-        cdef ewah_bool_array *ewah_keys = <ewah_bool_array *> self.ewah_keys
-        cdef ewah_bool_iterator *iter_set = new ewah_bool_iterator(ewah_keys[0].begin())
-        cdef ewah_bool_iterator *iter_end = new ewah_bool_iterator(ewah_keys[0].end())
-        cdef np.uint64_t iset
-        while iter_set[0] != iter_end[0]:
-            iset = dereference(iter_set[0])
-            if iset >= imax:
-                raise IndexError("Index {} exceedes max {}.".format(iset, imax))
-            arr[iset] = 1
-            preincrement(iter_set[0])
-
-    def get_coarse_array(self, imax, arr):
-        return self._get_coarse_array(imax, arr)
-
-    cdef bint _contains(self, np.uint64_t i):
-        cdef ewah_bool_array *ewah_keys = <ewah_bool_array *> self.ewah_keys
-        return ewah_keys[0].get(i)
-
-    def contains(self, np.uint64_t i):
-        return self._contains(i)
-
-    cdef bint _isref(self, np.uint64_t i):
-        cdef ewah_bool_array *ewah_refn = <ewah_bool_array *> self.ewah_refn
-        return ewah_refn[0].get(i)
-
-    def isref(self, np.uint64_t i):
-        return self._isref(i)
-
-    cdef void _ewah_coarse(self):
-        cdef ewah_bool_array *ewah_keys = <ewah_bool_array *> self.ewah_keys
-        cdef ewah_bool_array *ewah_refn = <ewah_bool_array *> self.ewah_refn
-        cdef ewah_bool_array *ewah_coar = <ewah_bool_array *> self.ewah_coar
-        ewah_coar[0].reset()
-        ewah_keys[0].logicalxor(ewah_refn[0],ewah_coar[0])
-        return
-
-    def ewah_coarse(self):
-        return self._ewah_coarse()
-
-    cdef np.uint64_t _count_total(self):
-        cdef ewah_bool_array *ewah_keys = <ewah_bool_array *> self.ewah_keys
-        cdef np.uint64_t out = ewah_keys.numberOfOnes()
-        return out
-
-    def count_total(self):
-        return self._count_total()
-
-    cdef np.uint64_t _count_refined(self):
-        cdef ewah_bool_array *ewah_refn = <ewah_bool_array *> self.ewah_refn
-        cdef np.uint64_t out = ewah_refn.numberOfOnes()
-        return out
-
-    def count_refined(self):
-        return self._count_refined()
-
-    cdef np.uint64_t _count_coarse(self):
-        self._ewah_coarse()
-        cdef ewah_bool_array *ewah_coar = <ewah_bool_array *> self.ewah_coar
-        cdef np.uint64_t out = ewah_coar.numberOfOnes()
-        return out
-
-    def count_coarse(self):
-        return self._count_coarse()
-
-    cdef void _logicalor(self, BoolArrayCollection solf, BoolArrayCollection out):
-        cdef ewah_bool_array *ewah_keys1 = self.ewah_keys
-        cdef ewah_bool_array *ewah_refn1 = self.ewah_refn
-        cdef ewahmap *ewah_coll1 = self.ewah_coll
-        cdef ewah_bool_array *ewah_keys2 = solf.ewah_keys
-        cdef ewah_bool_array *ewah_refn2 = solf.ewah_refn
-        cdef ewahmap *ewah_coll2 = solf.ewah_coll
-        cdef ewah_bool_array *ewah_keys3 = out.ewah_keys
-        cdef ewah_bool_array *ewah_refn3 = out.ewah_refn
-        cdef ewahmap *ewah_coll3 = out.ewah_coll
-        cdef ewahmap_it it_map1, it_map2
-        cdef ewah_bool_array mi1_ewah1, mi1_ewah2
-        cdef np.uint64_t mi1
-        # Keys
-        ewah_keys1[0].logicalor(ewah_keys2[0], ewah_keys3[0])
-        # Refined
-        ewah_refn1[0].logicalor(ewah_refn2[0], ewah_refn3[0])
-        # Map
-        it_map1 = ewah_coll1[0].begin()
-        while it_map1 != ewah_coll1[0].end():
-            mi1 = dereference(it_map1).first
-            mi1_ewah1 = dereference(it_map1).second
-            ewah_coll3[0][mi1] = mi1_ewah1
-            preincrement(it_map1)
-        it_map2 = ewah_coll2[0].begin()
-        while it_map2 != ewah_coll2[0].end():
-            mi1 = dereference(it_map2).first
-            mi1_ewah2 = dereference(it_map2).second
-            it_map1 = ewah_coll1[0].find(mi1)
-            if it_map1 != ewah_coll1[0].end():
-                mi1_ewah1 = dereference(it_map1).second
-                mi1_ewah1.logicalor(mi1_ewah2, ewah_coll3[0][mi1])
-            else:
-                ewah_coll3[0][mi1] = mi1_ewah2
-            preincrement(it_map2)
-
-    cdef void _append(self, BoolArrayCollection solf):
-        cdef ewah_bool_array *ewah_keys1 = <ewah_bool_array *> self.ewah_keys
-        cdef ewah_bool_array *ewah_refn1 = <ewah_bool_array *> self.ewah_refn
-        cdef ewahmap *ewah_coll1 = <ewahmap *> self.ewah_coll
-        cdef ewah_bool_array *ewah_keys2 = <ewah_bool_array *> solf.ewah_keys
-        cdef ewah_bool_array *ewah_refn2 = <ewah_bool_array *> solf.ewah_refn
-        cdef ewahmap *ewah_coll2 = <ewahmap *> solf.ewah_coll
-        cdef ewahmap_it it_map1, it_map2
-        cdef ewah_bool_array swap, mi1_ewah1, mi1_ewah2
-        cdef np.uint64_t mi1
-        # Keys
-        ewah_keys1[0].logicalor(ewah_keys2[0], swap)
-        ewah_keys1[0].swap(swap)
-        # Refined
-        ewah_refn1[0].logicalor(ewah_refn2[0], swap)
-        ewah_refn1[0].swap(swap)
-        # Map
-        it_map2 = ewah_coll2[0].begin()
-        while it_map2 != ewah_coll2[0].end():
-            mi1 = dereference(it_map2).first
-            mi1_ewah2 = dereference(it_map2).second
-            it_map1 = ewah_coll1[0].find(mi1)
-            if it_map1 == ewah_coll1[0].end():
-                ewah_coll1[0][mi1] = mi1_ewah2
-            else:
-                mi1_ewah1 = dereference(it_map1).second
-                mi1_ewah1.logicalor(mi1_ewah2, swap)
-                mi1_ewah1.swap(swap)
-            preincrement(it_map2)
-
-    def append(self, solf):
-        return self._append(solf)
-
-    cdef bint _intersects(self, BoolArrayCollection solf):
-        cdef ewah_bool_array *ewah_keys1 = <ewah_bool_array *> self.ewah_keys
-        cdef ewah_bool_array *ewah_refn1 = <ewah_bool_array *> self.ewah_refn
-        cdef ewahmap *ewah_coll1 = <ewahmap *> self.ewah_coll
-        cdef ewah_bool_array *ewah_keys2 = <ewah_bool_array *> solf.ewah_keys
-        cdef ewah_bool_array *ewah_refn2 = <ewah_bool_array *> solf.ewah_refn
-        cdef ewahmap *ewah_coll2 = <ewahmap *> solf.ewah_coll
-        cdef ewahmap_it it_map1, it_map2
-        cdef ewah_bool_array mi1_ewah1, mi1_ewah2
-        cdef np.uint64_t mi1
-        cdef ewah_bool_array ewah_coar1, ewah_coar2
-        # No intersection
-        if ewah_keys1[0].intersects(ewah_keys2[0]) == 0:
-            return 0
-        # Intersection at coarse level
-        ewah_keys1[0].logicalxor(ewah_refn1[0],ewah_coar1)
-        ewah_keys2[0].logicalxor(ewah_refn2[0],ewah_coar2)
-        if ewah_coar1.intersects(ewah_keys2[0]) == 1:
-            return 1
-        if ewah_coar2.intersects(ewah_keys1[0]) == 1:
-            return 1
-        # Intersection at refined level
-        if ewah_refn1[0].intersects(ewah_refn2[0]) == 1:
-            it_map1 = ewah_coll1[0].begin()
-            while (it_map1 != ewah_coll1[0].end()):
-                mi1 = dereference(it_map1).first
-                it_map2 = ewah_coll2[0].find(mi1)
-                if it_map2 != ewah_coll2[0].end():
-                    mi1_ewah1 = dereference(it_map1).second
-                    mi1_ewah2 = dereference(it_map2).second
-                    if mi1_ewah1.intersects(mi1_ewah2):
-                        return 1
-                preincrement(it_map1)
-        return 0
-
-    cdef void _logicalxor(self, BoolArrayCollection solf, BoolArrayCollection out):
-        cdef ewah_bool_array *ewah_keys1 = <ewah_bool_array *> self.ewah_keys
-        cdef ewah_bool_array *ewah_refn1 = <ewah_bool_array *> self.ewah_refn
-        cdef ewah_map *ewah_coll1 = <ewah_map *> self.ewah_coll
-        cdef ewah_bool_array *ewah_keys2 = <ewah_bool_array *> solf.ewah_keys
-        cdef ewah_bool_array *ewah_refn2 = <ewah_bool_array *> solf.ewah_refn
-        cdef ewahmap *ewah_coll2 = <ewahmap *> solf.ewah_coll
-        cdef ewah_bool_array *ewah_keys_out = <ewah_bool_array *> out.ewah_keys
-        cdef ewah_bool_array *ewah_refn_out = <ewah_bool_array *> out.ewah_refn
-        cdef ewah_map *ewah_coll_out = <ewah_map *> out.ewah_coll
-        cdef ewahmap_it it_map1, it_map2
-        cdef ewah_bool_array mi1_ewah1, mi1_ewah2, swap
-        cdef np.uint64_t mi1
-        # Keys
-        ewah_keys1[0].logicalxor(ewah_keys2[0],ewah_keys_out[0])
-        # Refn
-        ewah_refn1[0].logicalxor(ewah_refn2[0],ewah_refn_out[0])
-        # Coll
-        it_map1 = ewah_coll1[0].begin()
-        while (it_map1 != ewah_coll1[0].end()):
-            mi1 = dereference(it_map1).first
-            mi1_ewah1 = dereference(it_map1).second
-            it_map2 = ewah_coll2[0].find(mi1)
-            if it_map2 == ewah_coll2[0].end():
-                ewah_coll_out[0][mi1] = mi1_ewah1
-            else:
-                mi1_ewah2 = dereference(it_map2).second
-                mi1_ewah1.logicalxor(mi1_ewah2, swap)
-                ewah_coll_out[0][mi1] = swap
-            preincrement(it_map1)
-        it_map2 = ewah_coll2[0].begin()
-        while (it_map2 != ewah_coll2[0].end()):
-            mi1 = dereference(it_map2).first
-            mi1_ewah2 = dereference(it_map2).second
-            it_map1 = ewah_coll1[0].find(mi1)
-            if it_map1 == ewah_coll1[0].end():
-                ewah_coll_out[0][mi1] = mi1_ewah2
-            preincrement(it_map2)
-
-    def logicalxor(self, solf, out):
-        return self._logicalxor(solf, out)
-
-    cdef void _logicaland(self, BoolArrayCollection solf, BoolArrayCollection out):
-        cdef ewah_bool_array *ewah_keys1 = <ewah_bool_array *> self.ewah_keys
-        cdef ewah_bool_array *ewah_refn1 = <ewah_bool_array *> self.ewah_refn
-        cdef ewah_map *ewah_coll1 = <ewah_map *> self.ewah_coll
-        cdef ewah_bool_array *ewah_keys2 = <ewah_bool_array *> solf.ewah_keys
-        cdef ewah_bool_array *ewah_refn2 = <ewah_bool_array *> solf.ewah_refn
-        cdef ewahmap *ewah_coll2 = <ewahmap *> solf.ewah_coll
-        cdef ewah_bool_array *ewah_keys_out = <ewah_bool_array *> out.ewah_keys
-        cdef ewah_bool_array *ewah_refn_out = <ewah_bool_array *> out.ewah_refn
-        cdef ewah_map *ewah_coll_out = <ewah_map *> out.ewah_coll
-        cdef ewahmap_it it_map1, it_map2
-        cdef ewah_bool_array mi1_ewah1, mi1_ewah2, swap
-        cdef np.uint64_t mi1
-        # Keys
-        ewah_keys1[0].logicaland(ewah_keys2[0],ewah_keys_out[0])
-        # Refn
-        ewah_refn1[0].logicaland(ewah_refn2[0],ewah_refn_out[0])
-        # Coll
-        if ewah_refn_out[0].numberOfOnes() > 0:
-            it_map1 = ewah_coll1[0].begin()
-            while (it_map1 != ewah_coll1[0].end()):
-                mi1 = dereference(it_map1).first
-                mi1_ewah1 = dereference(it_map1).second
-                it_map2 = ewah_coll2[0].find(mi1)
-                if it_map2 != ewah_coll2[0].end():
-                    mi1_ewah2 = dereference(it_map2).second
-                    mi1_ewah1.logicaland(mi1_ewah2, swap)
-                    ewah_coll_out[0][mi1] = swap
-                preincrement(it_map1)
-
-    def logicaland(self, solf, out):
-        return self._logicaland(solf, out)
-
-    cdef void _select_contaminated(self, BoolArrayCollection mask, np.uint8_t[:] out,
-                                   BoolArrayCollection mask2 = None):
-        cdef ewah_bool_array *ewah_refn = <ewah_bool_array *> self.ewah_refn
-        cdef ewah_bool_array ewah_mask
-        cdef ewah_bool_array *ewah_mask1
-        cdef ewah_bool_array *ewah_mask2
-        if mask2 is None:
-            ewah_mask = (<ewah_bool_array *> mask.ewah_keys)[0]
-        else:
-            ewah_mask1 = <ewah_bool_array *> mask.ewah_keys
-            ewah_mask2 = <ewah_bool_array *> mask2.ewah_keys
-            ewah_mask1[0].logicalor(ewah_mask2[0],ewah_mask)
-        cdef ewah_bool_array ewah_slct
-        ewah_refn[0].logicaland(ewah_mask,ewah_slct)
-        cdef np.uint64_t iset
-        cdef ewah_bool_iterator *iter_set = new ewah_bool_iterator(ewah_slct.begin())
-        cdef ewah_bool_iterator *iter_end = new ewah_bool_iterator(ewah_slct.end())
-        while iter_set[0] != iter_end[0]:
-            iset = dereference(iter_set[0])
-            out[iset] = 1
-            preincrement(iter_set[0])
-
-    cdef void _select_uncontaminated(self, BoolArrayCollection mask, np.uint8_t[:] out,
-                                     BoolArrayCollection mask2 = None):
-        cdef ewah_bool_array *ewah_keys = <ewah_bool_array *> self.ewah_keys
-        cdef ewah_bool_array *ewah_refn = <ewah_bool_array *> self.ewah_refn
-        cdef ewah_bool_array ewah_mask
-        cdef ewah_bool_array *ewah_mask1
-        cdef ewah_bool_array *ewah_mask2
-        if mask2 is None:
-            ewah_mask = (<ewah_bool_array *> mask.ewah_keys)[0]
-        else:
-            ewah_mask1 = <ewah_bool_array *> mask.ewah_keys
-            ewah_mask2 = <ewah_bool_array *> mask2.ewah_keys
-            ewah_mask1[0].logicalor(ewah_mask2[0],ewah_mask)
-        cdef ewah_bool_array ewah_slct
-        cdef ewah_bool_array ewah_coar
-        ewah_keys[0].logicalxor(ewah_refn[0],ewah_coar)
-        ewah_coar.logicaland(ewah_mask,ewah_slct)
-        cdef np.uint64_t iset
-        cdef ewah_bool_iterator *iter_set = new ewah_bool_iterator(ewah_slct.begin())
-        cdef ewah_bool_iterator *iter_end = new ewah_bool_iterator(ewah_slct.end())
-        while iter_set[0] != iter_end[0]:
-            iset = dereference(iter_set[0])
-            out[iset] = 1
-            preincrement(iter_set[0])
-
-    cdef void _get_ghost_zones(self, int ngz, int order1, int order2,
-                               bint periodicity[3], BoolArrayCollection out_ewah,
-                               bint coarse_ghosts = 0):
-        cdef ewah_bool_array *ewah_keys = <ewah_bool_array *> self.ewah_keys
-        cdef ewah_bool_array *ewah_refn = <ewah_bool_array *> self.ewah_refn
-        cdef ewahmap *ewah_coll = <ewahmap *> self.ewah_coll
-        cdef ewah_bool_iterator *iter_set1 = new ewah_bool_iterator(ewah_keys.begin())
-        cdef ewah_bool_iterator *iter_end1 = new ewah_bool_iterator(ewah_keys.end())
-        cdef ewah_bool_iterator *iter_set2
-        cdef ewah_bool_iterator *iter_end2
-        cdef np.uint64_t max_index1 = <np.uint64_t>(1 << order1)
-        cdef np.uint64_t max_index2 = <np.uint64_t>(1 << order2)
-        cdef np.uint64_t nele1 = <np.uint64_t>(max_index1**3)
-        cdef np.uint64_t nele2 = <np.uint64_t>(max_index2**3)
-        cdef BoolArrayCollectionUncompressed temp_bool = BoolArrayCollectionUncompressed(nele1, nele2)
-        cdef BoolArrayCollectionUncompressed out_bool = BoolArrayCollectionUncompressed(nele1, nele2)
-        cdef np.uint64_t mi1, mi2, mi1_n, mi2_n
-        cdef np.uint32_t ntot, i
-        cdef void* pointers[7]
-        pointers[0] = malloc( sizeof(np.int32_t) * (2*ngz+1)*3)
-        pointers[1] = malloc( sizeof(np.uint64_t) * (2*ngz+1)*3)
-        pointers[2] = malloc( sizeof(np.uint64_t) * (2*ngz+1)*3)
-        pointers[3] = malloc( sizeof(np.uint64_t) * (2*ngz+1)**3)
-        pointers[4] = malloc( sizeof(np.uint64_t) * (2*ngz+1)**3)
-        pointers[5] = malloc( sizeof(np.uint8_t) * nele1)
-        pointers[6] = malloc( sizeof(np.uint8_t) * nele2)
-        cdef np.uint32_t[:,:] index = <np.uint32_t[:2*ngz+1,:3]> pointers[0]
-        cdef np.uint64_t[:,:] ind1_n = <np.uint64_t[:2*ngz+1,:3]> pointers[1]
-        cdef np.uint64_t[:,:] ind2_n = <np.uint64_t[:2*ngz+1,:3]> pointers[2]
-        cdef np.uint64_t[:] neighbor_list1 = <np.uint64_t[:((2*ngz+1)**3)]> pointers[3]
-        cdef np.uint64_t[:] neighbor_list2 = <np.uint64_t[:((2*ngz+1)**3)]> pointers[4]
-        cdef np.uint8_t *bool_keys = <np.uint8_t *> pointers[5]
-        cdef np.uint8_t *bool_coll = <np.uint8_t *> pointers[6]
-        cdef SparseUnorderedRefinedBitmaskSet list_coll = SparseUnorderedRefinedBitmaskSet()
-        for i in range(nele1):
-            bool_keys[i] = 0
-        while iter_set1[0] != iter_end1[0]:
-            mi1 = dereference(iter_set1[0])
-            if (coarse_ghosts == 1) or (ewah_refn[0].get(mi1) == 0):
-                # Coarse neighbors
-                ntot = morton_neighbors_coarse(mi1, max_index1, periodicity, ngz,
-                                               index, ind1_n, neighbor_list1)
-                for i in range(ntot):
-                    mi1_n = neighbor_list1[i]
-                    if ewah_keys[0].get(mi1_n) == 0:
-                        bool_keys[mi1_n] = 1
-            else:
-                for i in range(nele2):
-                    bool_coll[i] = 0
-                # Refined neighbors
-                iter_set2 = new ewah_bool_iterator(ewah_coll[0][mi1].begin())
-                iter_end2 = new ewah_bool_iterator(ewah_coll[0][mi1].end())
-                while iter_set2[0] != iter_end2[0]:
-                    mi2 = dereference(iter_set2[0])
-                    ntot = morton_neighbors_refined(mi1, mi2,
-                                                    max_index1, max_index2,
-                                                    periodicity, ngz, index,
-                                                    ind1_n, ind2_n,
-                                                    neighbor_list1,
-                                                    neighbor_list2)
-                    for i in range(ntot):
-                        mi1_n = neighbor_list1[i]
-                        mi2_n = neighbor_list2[i]
-                        if mi1_n == mi1:
-                            if ewah_coll[0][mi1].get(mi2_n) == 0:
-                                bool_keys[mi1_n] = 1
-                                bool_coll[mi2_n] = 1
-                        else:
-                            if ewah_refn[0].get(mi1_n) == 1:
-                                if ewah_coll[0][mi1_n].get(mi2_n) == 0:
-                                    bool_keys[mi1_n] = 1
-                                    list_coll._set(mi1_n, mi2_n)
-                            else:
-                                if ewah_keys[0].get(mi1_n) == 0:
-                                    bool_keys[mi1_n] = 1
-                    preincrement(iter_set2[0])
-                # Add to running list
-                temp_bool._set_refined_array_ptr(mi1, bool_coll)
-            preincrement(iter_set1[0])
-        # Set keys
-        out_bool._set_coarse_array_ptr(bool_keys)
-        list_coll._fill_bool(out_bool)
-        out_bool._append(temp_bool)
-        out_bool._compress(out_ewah)
-        # Free things
-        for i in range(7):
-            free(pointers[i])
-
-    cdef bytes _dumps(self):
-        # TODO: write word size
-        cdef sstream ss
-        cdef ewah_bool_array *ewah_keys = <ewah_bool_array *> self.ewah_keys
-        cdef ewah_bool_array *ewah_refn = <ewah_bool_array *> self.ewah_refn
-        cdef ewahmap *ewah_coll = <ewahmap *> self.ewah_coll
-        cdef ewahmap_it it_map
-        cdef np.uint64_t nrefn, mi1
-        cdef ewah_bool_array mi1_ewah
-        # Write mi1 ewah & refinement ewah
-        ewah_keys[0].write(ss,1)
-        ewah_refn[0].write(ss,1)
-        # Number of refined bool arrays
-        nrefn = <np.uint64_t>(ewah_refn[0].numberOfOnes())
-        ss.write(<const char *> &nrefn, sizeof(nrefn))
-        # Loop over refined bool arrays
-        it_map = ewah_coll[0].begin()
-        while it_map != ewah_coll[0].end():
-            mi1 = dereference(it_map).first
-            mi1_ewah = dereference(it_map).second
-            ss.write(<const char *> &mi1, sizeof(mi1))
-            mi1_ewah.write(ss,1)
-            preincrement(it_map)
-        # Return type cast python bytes string
-        return <bytes>ss.str()
-
-    def dumps(self):
-        return self._dumps()
-
-    cdef bint _loads(self, bytes s):
-        # TODO: write word size
-        cdef sstream ss
-        cdef ewah_bool_array *ewah_keys = <ewah_bool_array *> self.ewah_keys
-        cdef ewah_bool_array *ewah_refn = <ewah_bool_array *> self.ewah_refn
-        cdef ewahmap *ewah_coll = <ewahmap *> self.ewah_coll
-        cdef np.uint64_t nrefn, mi1
-        nrefn = mi1 = 0
-        # Write string to string stream
-        if len(s) == 0: return 1
-        ss.write(s, len(s))
-        # Read keys and refinement arrays
-        if ss.eof(): return 1
-        ewah_keys[0].read(ss,1)
-        if ss.eof(): return 1
-        ewah_refn[0].read(ss,1)
-        # Read and check number of refined cells
-        if ss.eof(): return 1
-        ss.read(<char *> (&nrefn), sizeof(nrefn))
-        if nrefn != ewah_refn[0].numberOfOnes():
-            raise Exception("Error in read. File indicates {} refinements, but bool array has {}.".format(nrefn,ewah_refn[0].numberOfOnes()))
-        # Loop over refined cells
-        for _ in range(nrefn):
-            ss.read(<char *> (&mi1), sizeof(mi1))
-            if ss.eof():
-                # A brief note about why we do this!
-                # In previous versions of the EWAH code, which were more
-                # susceptible to issues with differences in sizes of size_t
-                # etc, the ewah_coll.read would use instance variables as
-                # destinations; these were initialized to zero.  In recent
-                # versions, it uses (uninitialized) temporary variables.  We
-                # were passing in streams that were already at EOF - so the
-                # uninitialized memory would not be written to, and it would
-                # retain the previous values, which would invariably be really
-                # really big!  So we do a check for EOF here to make sure we're
-                # not up to no good.
-                break
-            ewah_coll[0][mi1].read(ss,1)
-            # or...
-            #mi1_ewah.read(ss,1)
-            #ewah_coll[0][mi1].swap(mi1_ewah)
-        return 1
-
-    def loads(self, s):
-        return self._loads(s)
-
-    def save(self, fname):
-        cdef bytes serial_BAC
-        f = open(fname,'wb')
-        serial_BAC = self._dumps()
-        f.write(struct.pack('Q',len(serial_BAC)))
-        f.write(serial_BAC)
-        f.close()
-
-    def load(self, fname):
-        cdef np.uint64_t size_serial
-        cdef bint flag_read
-        f = open(fname,'rb')
-        size_serial, = struct.unpack('Q',f.read(struct.calcsize('Q')))
-        flag_read = self._loads(f.read(size_serial))
-        f.close()
-        return flag_read
-
-    cdef bint _check(self):
-        cdef ewah_bool_array *ewah_keys = <ewah_bool_array *> self.ewah_keys
-        cdef ewah_bool_array *ewah_refn = <ewah_bool_array *> self.ewah_refn
-        cdef ewah_bool_array tmp1, tmp2
-        cdef np.uint64_t nchk
-        cdef str msg
-        # Check that there are not any refn that are not keys
-        ewah_keys[0].logicalxor(ewah_refn[0], tmp1)
-        ewah_refn[0].logicaland(tmp1, tmp2)
-        nchk = tmp2.numberOfOnes()
-        if nchk > 0:
-            msg = "There are {} refined cells that are not set on coarse level.".format(nchk)
-            print(msg)
-            return 0
-            # raise Exception(msg)
-        return 1
-
-    def __dealloc__(self):
-        cdef ewah_bool_array *ewah_keys = <ewah_bool_array *> self.ewah_keys
-        cdef ewah_bool_array *ewah_refn = <ewah_bool_array *> self.ewah_refn
-        cdef ewah_bool_array *ewah_coar = <ewah_bool_array *> self.ewah_coar
-        cdef ewah_map *ewah_coll = <ewah_map *> self.ewah_coll
-        del ewah_keys
-        del ewah_refn
-        del ewah_coar
-        del ewah_coll
-
-    def print_info(self, prefix=''):
-        print("{}{: 8d} coarse, {: 8d} refined, {: 8d} total".format(prefix,
-                                                                     self._count_coarse(),
-                                                                     self._count_refined(),
-                                                                     self._count_total()))
-
-cdef class BoolArrayCollectionUncompressed:
-
-    def __cinit__(self, np.uint64_t nele1, np.uint64_t nele2):
-        self.nele1 = <int>nele1
-        self.nele2 = <int>nele2
-        self.ewah_coll = new ewah_map()
-        cdef np.uint64_t i
-        self.ewah_keys = <bitarrtype *>malloc(sizeof(bitarrtype)*nele1)
-        self.ewah_refn = <bitarrtype *>malloc(sizeof(bitarrtype)*nele1)
-        for i in range(nele1):
-            self.ewah_keys[i] = 0
-            self.ewah_refn[i] = 0
-
-    def reset(self):
-        self.__dealloc__()
-        self.__init__(self.nele1,self.nele2)
-
-    cdef void _compress(self, BoolArrayCollection solf):
-        cdef np.uint64_t i
-        cdef ewah_bool_array *ewah_keys = <ewah_bool_array *> solf.ewah_keys
-        cdef ewah_bool_array *ewah_refn = <ewah_bool_array *> solf.ewah_refn
-        cdef bitarrtype *bool_keys = <bitarrtype *> self.ewah_keys
-        cdef bitarrtype *bool_refn = <bitarrtype *> self.ewah_refn
-        for i in range(self.nele1):
-            if bool_keys[i] == 1:
-                ewah_keys[0].set(i)
-            if bool_refn[i] == 1:
-                ewah_refn[0].set(i)
-        cdef ewah_map *ewah_coll1 = <ewah_map *> self.ewah_coll
-        cdef ewah_map *ewah_coll2 = <ewah_map *> solf.ewah_coll
-        ewah_coll2[0] = ewah_coll1[0]
-
-    cdef void _set(self, np.uint64_t i1, np.uint64_t i2 = FLAG):
-        cdef bitarrtype *ewah_keys = <bitarrtype *> self.ewah_keys
-        cdef bitarrtype *ewah_refn = <bitarrtype *> self.ewah_refn
-        cdef ewah_map *ewah_coll = <ewah_map *> self.ewah_coll
-        ewah_keys[i1] = 1
-        # Note the 0 here, for dereferencing
-        if i2 != FLAG:
-            ewah_refn[i1] = 1
-            ewah_coll[0][i1].set(i2)
-
-    cdef void _set_coarse(self, np.uint64_t i1):
-        cdef bitarrtype *ewah_keys = <bitarrtype *> self.ewah_keys
-        ewah_keys[i1] = 1
-
-    cdef void _set_refined(self, np.uint64_t i1, np.uint64_t i2):
-        cdef bitarrtype *ewah_refn = <bitarrtype *> self.ewah_refn
-        cdef ewah_map *ewah_coll = <ewah_map *> self.ewah_coll
-        # Note the 0 here, for dereferencing
-        ewah_refn[i1] = 1
-        ewah_coll[0][i1].set(i2)
-
-    @cython.boundscheck(False)
-    @cython.wraparound(False)
-    @cython.cdivision(True)
-    @cython.initializedcheck(False)
-    cdef void _set_coarse_array(self, np.uint8_t[:] arr):
-        cdef bitarrtype *ewah_keys = <bitarrtype *> self.ewah_keys
-        cdef np.uint64_t i1
-        for i1 in range(arr.shape[0]):
-            if arr[i1] == 1:
-                ewah_keys[i1] = 1
-
-    @cython.boundscheck(False)
-    @cython.wraparound(False)
-    @cython.cdivision(True)
-    @cython.initializedcheck(False)
-    cdef void _set_coarse_array_ptr(self, np.uint8_t *arr):
-        # TODO: memcpy?
-        cdef bitarrtype *ewah_keys = <bitarrtype *> self.ewah_keys
-        cdef np.uint64_t i1
-        for i1 in range(self.nele1):
-            if arr[i1] == 1:
-                ewah_keys[i1] = 1
-
-    @cython.boundscheck(False)
-    @cython.wraparound(False)
-    @cython.cdivision(True)
-    @cython.initializedcheck(False)
-    cdef void _set_refined_array(self, np.uint64_t i1, np.uint8_t[:] arr):
-        cdef bitarrtype *ewah_refn = <bitarrtype *> self.ewah_refn
-        cdef ewah_map *ewah_coll = <ewah_map *> self.ewah_coll
-        cdef np.uint64_t i2
-        for i2 in range(arr.shape[0]):
-            if arr[i2] == 1:
-                ewah_refn[i1] = 1
-                ewah_coll[0][i1].set(i2)
-
-    @cython.boundscheck(False)
-    @cython.wraparound(False)
-    @cython.cdivision(True)
-    @cython.initializedcheck(False)
-    cdef void _set_refined_array_ptr(self, np.uint64_t i1, np.uint8_t *arr):
-        cdef bitarrtype *ewah_refn = <bitarrtype *> self.ewah_refn
-        cdef ewah_map *ewah_coll = <ewah_map *> self.ewah_coll
-        cdef np.uint64_t i2
-        cdef ewah_bool_array *barr = &ewah_coll[0][i1]
-        for i2 in range(self.nele2):
-            if arr[i2] == 1:
-                ewah_refn[i1] = 1
-                barr.set(i2)
-
-    cdef void _set_map(self, np.uint64_t i1, np.uint64_t i2):
-        cdef ewah_map *ewah_coll = <ewah_map *> self.ewah_coll
-        ewah_coll[0][i1].set(i2)
-
-    cdef void _set_refn(self, np.uint64_t i1):
-        cdef bitarrtype *ewah_refn = <bitarrtype *> self.ewah_refn
-        ewah_refn[i1] = 1
-
-    cdef bint _get(self, np.uint64_t i1, np.uint64_t i2 = FLAG):
-        cdef bitarrtype *ewah_keys = <bitarrtype *> self.ewah_keys
-        cdef bitarrtype *ewah_refn = <bitarrtype *> self.ewah_refn
-        cdef ewah_map *ewah_coll = <ewah_map *> self.ewah_coll
-        # Note the 0 here, for dereferencing
-        if ewah_keys[i1] == 0: return 0
-        if (ewah_refn[i1] == 0) or (i2 == FLAG):
-            return 1
-        return ewah_coll[0][i1].get(i2)
-
-    cdef bint _get_coarse(self, np.uint64_t i1):
-        cdef bitarrtype *ewah_keys = <bitarrtype *> self.ewah_keys
-        return <bint>ewah_keys[i1]
-        # if (ewah_keys[i1] == 0): return 0
-        # return 1
-
-    cdef bint _isref(self, np.uint64_t i):
-        cdef bitarrtype *ewah_refn = <bitarrtype *> self.ewah_refn
-        return <bint>ewah_refn[i]
-
-    cdef np.uint64_t _count_total(self):
-        cdef bitarrtype *ewah_keys = <bitarrtype *> self.ewah_keys
-        cdef np.uint64_t i
-        cdef np.uint64_t out = 0
-        for i in range(self.nele1):
-            out += ewah_keys[i]
-        return out
-
-    cdef np.uint64_t _count_refined(self):
-        cdef bitarrtype *ewah_refn = <bitarrtype *> self.ewah_refn
-        cdef np.uint64_t i
-        cdef np.uint64_t out = 0
-        for i in range(self.nele1):
-            out += ewah_refn[i]
-        return out
-
-    cdef void _append(self, BoolArrayCollectionUncompressed solf):
-        cdef bitarrtype *ewah_keys1 = <bitarrtype *> self.ewah_keys
-        cdef bitarrtype *ewah_refn1 = <bitarrtype *> self.ewah_refn
-        cdef bitarrtype *ewah_keys2 = <bitarrtype *> solf.ewah_keys
-        cdef bitarrtype *ewah_refn2 = <bitarrtype *> solf.ewah_refn
-        cdef ewahmap *ewah_coll1 = <ewahmap *> self.ewah_coll
-        cdef ewahmap *ewah_coll2 = <ewahmap *> solf.ewah_coll
-        cdef ewahmap_it it_map1, it_map2
-        cdef ewah_bool_array swap, mi1_ewah1, mi1_ewah2
-        cdef np.uint64_t mi1
-        # TODO: Check if nele1 is equal?
-        # Keys
-        for mi1 in range(solf.nele1):
-            if ewah_keys2[mi1] == 1:
-                ewah_keys1[mi1] = 1
-        # Refined
-        for mi1 in range(solf.nele1):
-            if ewah_refn2[mi1] == 1:
-                ewah_refn1[mi1] = 1
-        # Map
-        it_map2 = ewah_coll2[0].begin()
-        while it_map2 != ewah_coll2[0].end():
-            mi1 = dereference(it_map2).first
-            mi1_ewah2 = dereference(it_map2).second
-            it_map1 = ewah_coll1[0].find(mi1)
-            if it_map1 == ewah_coll1[0].end():
-                ewah_coll1[0][mi1] = mi1_ewah2
-            else:
-                mi1_ewah1 = dereference(it_map1).second
-                mi1_ewah1.logicalor(mi1_ewah2, swap)
-                mi1_ewah1.swap(swap)
-            preincrement(it_map2)
-
-    cdef bint _intersects(self, BoolArrayCollectionUncompressed solf):
-        cdef bitarrtype *ewah_keys1 = <bitarrtype *> self.ewah_keys
-        cdef bitarrtype *ewah_refn1 = <bitarrtype *> self.ewah_refn
-        cdef bitarrtype *ewah_keys2 = <bitarrtype *> solf.ewah_keys
-        cdef bitarrtype *ewah_refn2 = <bitarrtype *> solf.ewah_refn
-        cdef ewahmap *ewah_coll1 = <ewahmap *> self.ewah_coll
-        cdef ewahmap *ewah_coll2 = <ewahmap *> solf.ewah_coll
-        cdef ewahmap_it it_map1, it_map2
-        cdef ewah_bool_array mi1_ewah1, mi1_ewah2
-        cdef np.uint64_t mi1
-        # No intersection
-        for mi1 in range(self.nele1):
-            if (ewah_keys1[mi1] == 1) and (ewah_keys2[mi1] == 1):
-                break
-        if (mi1 < self.nele1):
-            return 0
-        mi1 = self.nele1 # This is to get rid of a warning
-        # Intersection at refined level
-        for mi1 in range(self.nele1):
-            if (ewah_refn1[mi1] == 1) and (ewah_refn2[mi1] == 1):
-                it_map1 = ewah_coll1[0].begin()
-                while (it_map1 != ewah_coll1[0].end()):
-                    mi1 = dereference(it_map1).first
-                    it_map2 = ewah_coll2[0].find(mi1)
-                    if it_map2 != ewah_coll2[0].end():
-                        mi1_ewah1 = dereference(it_map1).second
-                        mi1_ewah2 = dereference(it_map2).second
-                        if mi1_ewah1.intersects(mi1_ewah2):
-                            return 1
-                    preincrement(it_map1)
-                break
-        # Intersection at coarse level or refined inside coarse
-        if mi1 == self.nele1:
-            return 1
-        return 0
-
-    def __dealloc__(self):
-        cdef bitarrtype *ewah_keys = <bitarrtype *> self.ewah_keys
-        cdef bitarrtype *ewah_refn = <bitarrtype *> self.ewah_refn
-        free(ewah_keys)
-        free(ewah_refn)
-        cdef ewah_map *ewah_coll = <ewah_map *> self.ewah_coll
-        del ewah_coll
-
-    def print_info(self, prefix=''):
-        cdef np.uint64_t nrefn = self._count_refined()
-        cdef np.uint64_t nkeys = self._count_total()
-        print("{}{: 8d} coarse, {: 8d} refined, {: 8d} total".format(prefix,
-                                                                     nkeys - nrefn,
-                                                                     nrefn,
-                                                                     nkeys))
-
-
-
-# Vector version
-cdef class SparseUnorderedBitmaskVector:
-    def __cinit__(self):
-        self.total = 0
-
-    cdef void _set(self, np.uint64_t ind):
-        self.entries.push_back(ind)
-        self.total += 1
-
-    def set(self, ind):
-        self._set(ind)
-
-    cdef void _fill(self, np.uint8_t[:] mask):
-        cdef np.uint64_t i, ind
-        for i in range(self.entries.size()):
-            ind = self.entries[i]
-            mask[ind] = 1
-
-    cdef void _fill_ewah(self, BoolArrayCollection mm):
-        self._remove_duplicates()
-        cdef np.uint64_t i, ind
-        for i in range(self.entries.size()):
-            ind = self.entries[i]
-            mm._set_coarse(ind)
-
-    cdef void _fill_bool(self, BoolArrayCollectionUncompressed mm):
-        self._remove_duplicates()
-        cdef np.uint64_t i, ind
-        for i in range(self.entries.size()):
-            ind = self.entries[i]
-            mm._set_coarse(ind)
-
-    cdef void _reset(self):
-        self.entries.erase(self.entries.begin(), self.entries.end())
-        self.total = 0
-
-    cdef to_array(self):
-        self._remove_duplicates()
-        cdef np.ndarray[np.uint64_t, ndim=1] rv
-        rv = np.empty(self.entries.size(), dtype='uint64')
-        for i in range(self.entries.size()):
-            rv[i] = self.entries[i]
-        return rv
-
-    cdef void _remove_duplicates(self):
-        cdef vector[np.uint64_t].iterator last
-        sort(self.entries.begin(), self.entries.end())
-        last = unique(self.entries.begin(), self.entries.end())
-        self.entries.erase(last, self.entries.end())
-
-    cdef void _prune(self):
-        if self.total > MAX_VECTOR_SIZE:
-            self._remove_duplicates()
-            self.total = 0
-
-    def __dealloc__(self):
-        self.entries.clear()
-
-# Set version
-cdef class SparseUnorderedBitmaskSet:
-    cdef void _set(self, np.uint64_t ind):
-        self.entries.insert(ind)
-
-    def set(self, ind):
-        self._set(ind)
-
-    cdef void _fill(self, np.uint8_t[:] mask):
-        for it in self.entries:
-            mask[it] = 1
-
-    cdef void _fill_ewah(self, BoolArrayCollection mm):
-        for it in self.entries:
-            mm._set_coarse(it)
-
-    cdef void _fill_bool(self, BoolArrayCollectionUncompressed mm):
-        for it in self.entries:
-            mm._set_coarse(it)
-
-    cdef void _reset(self):
-        self.entries.clear()
-
-    cdef to_array(self):
-        cdef np.uint64_t ind
-        cdef np.ndarray[np.uint64_t, ndim=1] rv
-        cdef cset[np.uint64_t].iterator it
-        rv = np.empty(self.entries.size(), dtype='uint64')
-        it = self.entries.begin()
-        i = 0
-        while it != self.entries.end():
-            ind = dereference(it)
-            rv[i] = ind
-            preincrement(it)
-            i += 1
-        return rv
-
-    def __dealloc__(self):
-        self.entries.clear()
-
-# vector version
-cdef class SparseUnorderedRefinedBitmaskVector:
-    def __cinit__(self):
-        self.total = 0
-
-    cdef void _set(self, np.uint64_t ind1, np.uint64_t ind2):
-        cdef ind_pair ind
-        ind.first = ind1
-        ind.second = ind2
-        self.entries.push_back(ind)
-        self.total += 1
-
-    def set(self, ind1, ind2):
-        self._set(ind1, ind2)
-
-    cdef void _fill(self, np.uint8_t[:] mask1, np.uint8_t[:] mask2):
-        for it in self.entries:
-            mask1[it.first] = mask2[it.second] = 1
-
-    cdef void _fill_ewah(self, BoolArrayCollection mm):
-        self._remove_duplicates()
-        for it in self.entries:
-            mm._set_refined(it.first, it.second)
-
-    cdef void _fill_bool(self, BoolArrayCollectionUncompressed mm):
-        self._remove_duplicates()
-        for it in self.entries:
-            mm._set_refined(it.first, it.second)
-
-    cdef void _reset(self):
-        self.entries.erase(self.entries.begin(), self.entries.end())
-        self.total = 0
-
-    cdef to_array(self):
-        cdef np.uint64_t i
-        cdef np.ndarray[np.uint64_t, ndim=2] rv
-        self._remove_duplicates()
-        rv = np.empty((self.entries.size(),2),dtype='uint64')
-        i = 0
-        for it in self.entries:
-            rv[i,0] = it.first
-            rv[i,1] = it.second
-            i += 1
-        return rv
-
-    cdef void _remove_duplicates(self):
-        cdef vector[ind_pair].iterator last
-        sort(self.entries.begin(), self.entries.end())
-        last = unique(self.entries.begin(), self.entries.end())
-        self.entries.erase(last, self.entries.end())
-        # http://stackoverflow.com/questions/16970982/find-unique-rows-in-numpy-array
-        # cdef np.ndarray[np.uint64_t, ndim=2] rv
-        # cdef np.ndarray[np.uint64_t, ndim=2] rv_uni
-        # cdef np.uint64_t m
-        # cdef vector[np.uint64_t].iterator last1
-        # cdef vector[np.uint64_t].iterator last2
-        # # cdef np.ndarray[np.uint64_t, ndim=1] _
-        # cdef vector[np.uint64_t] *entries1 = <vector[np.uint64_t]*> self.entries1
-        # cdef vector[np.uint64_t] *entries2 = <vector[np.uint64_t]*> self.entries2
-        # rv = np.empty((entries1[0].size(),2),dtype='uint64')
-        # for i in range(entries1[0].size()):
-        #     rv[i,0] = entries1[0][i]
-        #     rv[i,1] = entries2[0][i]
-        # rv_uni = np.unique(np.ascontiguousarray(rv).view(np.dtype((np.void, rv.dtype.itemsize * rv.shape[1])))).view(rv.dtype).reshape(-1,rv.shape[1])
-        # last1 = entries1[0].begin() + rv_uni.shape[0]
-        # last2 = entries2[0].begin() + rv_uni.shape[0]
-        # for m in range(rv_uni.shape[0]):
-        #     entries1[0][m] = rv_uni[m,0]
-        #     entries2[0][m] = rv_uni[m,1]
-        # entries1[0].erase(last1, entries1[0].end())
-        # entries2[0].erase(last2, entries2[0].end())
-
-    cdef void _prune(self):
-        if self.total > MAX_VECTOR_SIZE:
-            self._remove_duplicates()
-            self.total = 0
-
-    def __dealloc__(self):
-        self.entries.clear()
-
-# Set version
-cdef class SparseUnorderedRefinedBitmaskSet:
-    cdef void _set(self, np.uint64_t ind1, np.uint64_t ind2):
-        cdef ind_pair ind
-        ind.first = ind1
-        ind.second = ind2
-        self.entries.insert(ind)
-
-    def set(self, ind1, ind2):
-        self._set(ind1, ind2)
-
-    cdef void _fill(self, np.uint8_t[:] mask1, np.uint8_t[:] mask2):
-        for p in self.entries:
-            mask1[p.first] = mask2[p.second] = 1
-
-    cdef void _fill_ewah(self, BoolArrayCollection mm):
-        for it in self.entries:
-            mm._set_refined(it.first, it.second)
-
-    cdef void _fill_bool(self, BoolArrayCollectionUncompressed mm):
-        for it in self.entries:
-            mm._set_refined(it.first, it.second)
-
-    cdef void _reset(self):
-        self.entries.clear()
-
-    cdef to_array(self):
-        cdef np.uint64_t i
-        cdef np.ndarray[np.uint64_t, ndim=2] rv
-        rv = np.empty((self.entries.size(),2),dtype='uint64')
-        i = 0
-        for it in self.entries:
-            rv[i,0] = it.first
-            rv[i,1] = it.second
-            i += 1
-        return rv
-
-    def __dealloc__(self):
-        self.entries.clear()
diff --git a/yt/utilities/lib/ewahboolarray/LICENSE b/yt/utilities/lib/ewahboolarray/LICENSE
deleted file mode 100644
index 37ec93a14fd..00000000000
--- a/yt/utilities/lib/ewahboolarray/LICENSE
+++ /dev/null
@@ -1,191 +0,0 @@
-Apache License
-Version 2.0, January 2004
-http://www.apache.org/licenses/
-
-TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-
-1. Definitions.
-
-"License" shall mean the terms and conditions for use, reproduction, and
-distribution as defined by Sections 1 through 9 of this document.
-
-"Licensor" shall mean the copyright owner or entity authorized by the copyright
-owner that is granting the License.
-
-"Legal Entity" shall mean the union of the acting entity and all other entities
-that control, are controlled by, or are under common control with that entity.
-For the purposes of this definition, "control" means (i) the power, direct or
-indirect, to cause the direction or management of such entity, whether by
-contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the
-outstanding shares, or (iii) beneficial ownership of such entity.
-
-"You" (or "Your") shall mean an individual or Legal Entity exercising
-permissions granted by this License.
-
-"Source" form shall mean the preferred form for making modifications, including
-but not limited to software source code, documentation source, and configuration
-files.
-
-"Object" form shall mean any form resulting from mechanical transformation or
-translation of a Source form, including but not limited to compiled object code,
-generated documentation, and conversions to other media types.
-
-"Work" shall mean the work of authorship, whether in Source or Object form, made
-available under the License, as indicated by a copyright notice that is included
-in or attached to the work (an example is provided in the Appendix below).
-
-"Derivative Works" shall mean any work, whether in Source or Object form, that
-is based on (or derived from) the Work and for which the editorial revisions,
-annotations, elaborations, or other modifications represent, as a whole, an
-original work of authorship. For the purposes of this License, Derivative Works
-shall not include works that remain separable from, or merely link (or bind by
-name) to the interfaces of, the Work and Derivative Works thereof.
-
-"Contribution" shall mean any work of authorship, including the original version
-of the Work and any modifications or additions to that Work or Derivative Works
-thereof, that is intentionally submitted to Licensor for inclusion in the Work
-by the copyright owner or by an individual or Legal Entity authorized to submit
-on behalf of the copyright owner. For the purposes of this definition,
-"submitted" means any form of electronic, verbal, or written communication sent
-to the Licensor or its representatives, including but not limited to
-communication on electronic mailing lists, source code control systems, and
-issue tracking systems that are managed by, or on behalf of, the Licensor for
-the purpose of discussing and improving the Work, but excluding communication
-that is conspicuously marked or otherwise designated in writing by the copyright
-owner as "Not a Contribution."
-
-"Contributor" shall mean Licensor and any individual or Legal Entity on behalf
-of whom a Contribution has been received by Licensor and subsequently
-incorporated within the Work.
-
-2. Grant of Copyright License.
-
-Subject to the terms and conditions of this License, each Contributor hereby
-grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free,
-irrevocable copyright license to reproduce, prepare Derivative Works of,
-publicly display, publicly perform, sublicense, and distribute the Work and such
-Derivative Works in Source or Object form.
-
-3. Grant of Patent License.
-
-Subject to the terms and conditions of this License, each Contributor hereby
-grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free,
-irrevocable (except as stated in this section) patent license to make, have
-made, use, offer to sell, sell, import, and otherwise transfer the Work, where
-such license applies only to those patent claims licensable by such Contributor
-that are necessarily infringed by their Contribution(s) alone or by combination
-of their Contribution(s) with the Work to which such Contribution(s) was
-submitted. If You institute patent litigation against any entity (including a
-cross-claim or counterclaim in a lawsuit) alleging that the Work or a
-Contribution incorporated within the Work constitutes direct or contributory
-patent infringement, then any patent licenses granted to You under this License
-for that Work shall terminate as of the date such litigation is filed.
-
-4. Redistribution.
-
-You may reproduce and distribute copies of the Work or Derivative Works thereof
-in any medium, with or without modifications, and in Source or Object form,
-provided that You meet the following conditions:
-
-You must give any other recipients of the Work or Derivative Works a copy of
-this License; and
-You must cause any modified files to carry prominent notices stating that You
-changed the files; and
-You must retain, in the Source form of any Derivative Works that You distribute,
-all copyright, patent, trademark, and attribution notices from the Source form
-of the Work, excluding those notices that do not pertain to any part of the
-Derivative Works; and
-If the Work includes a "NOTICE" text file as part of its distribution, then any
-Derivative Works that You distribute must include a readable copy of the
-attribution notices contained within such NOTICE file, excluding those notices
-that do not pertain to any part of the Derivative Works, in at least one of the
-following places: within a NOTICE text file distributed as part of the
-Derivative Works; within the Source form or documentation, if provided along
-with the Derivative Works; or, within a display generated by the Derivative
-Works, if and wherever such third-party notices normally appear. The contents of
-the NOTICE file are for informational purposes only and do not modify the
-License. You may add Your own attribution notices within Derivative Works that
-You distribute, alongside or as an addendum to the NOTICE text from the Work,
-provided that such additional attribution notices cannot be construed as
-modifying the License.
-You may add Your own copyright statement to Your modifications and may provide
-additional or different license terms and conditions for use, reproduction, or
-distribution of Your modifications, or for any such Derivative Works as a whole,
-provided Your use, reproduction, and distribution of the Work otherwise complies
-with the conditions stated in this License.
-
-5. Submission of Contributions.
-
-Unless You explicitly state otherwise, any Contribution intentionally submitted
-for inclusion in the Work by You to the Licensor shall be under the terms and
-conditions of this License, without any additional terms or conditions.
-Notwithstanding the above, nothing herein shall supersede or modify the terms of
-any separate license agreement you may have executed with Licensor regarding
-such Contributions.
-
-6. Trademarks.
-
-This License does not grant permission to use the trade names, trademarks,
-service marks, or product names of the Licensor, except as required for
-reasonable and customary use in describing the origin of the Work and
-reproducing the content of the NOTICE file.
-
-7. Disclaimer of Warranty.
-
-Unless required by applicable law or agreed to in writing, Licensor provides the
-Work (and each Contributor provides its Contributions) on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied,
-including, without limitation, any warranties or conditions of TITLE,
-NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are
-solely responsible for determining the appropriateness of using or
-redistributing the Work and assume any risks associated with Your exercise of
-permissions under this License.
-
-8. Limitation of Liability.
-
-In no event and under no legal theory, whether in tort (including negligence),
-contract, or otherwise, unless required by applicable law (such as deliberate
-and grossly negligent acts) or agreed to in writing, shall any Contributor be
-liable to You for damages, including any direct, indirect, special, incidental,
-or consequential damages of any character arising as a result of this License or
-out of the use or inability to use the Work (including but not limited to
-damages for loss of goodwill, work stoppage, computer failure or malfunction, or
-any and all other commercial damages or losses), even if such Contributor has
-been advised of the possibility of such damages.
-
-9. Accepting Warranty or Additional Liability.
-
-While redistributing the Work or Derivative Works thereof, You may choose to
-offer, and charge a fee for, acceptance of support, warranty, indemnity, or
-other liability obligations and/or rights consistent with this License. However,
-in accepting such obligations, You may act only on Your own behalf and on Your
-sole responsibility, not on behalf of any other Contributor, and only if You
-agree to indemnify, defend, and hold each Contributor harmless for any liability
-incurred by, or claims asserted against, such Contributor by reason of your
-accepting any such warranty or additional liability.
-
-END OF TERMS AND CONDITIONS
-
-APPENDIX: How to apply the Apache License to your work
-
-To apply the Apache License to your work, attach the following boilerplate
-notice, with the fields enclosed by brackets "[]" replaced with your own
-identifying information. (Don't include the brackets!) The text should be
-enclosed in the appropriate comment syntax for the file format. We also
-recommend that a file or class name and description of purpose be included on
-the same "printed page" as the copyright notice for easier identification within
-third-party archives.
-
-   Copyright [yyyy] [name of copyright owner]
-
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
diff --git a/yt/utilities/lib/ewahboolarray/README b/yt/utilities/lib/ewahboolarray/README
deleted file mode 100644
index b86d316c9ad..00000000000
--- a/yt/utilities/lib/ewahboolarray/README
+++ /dev/null
@@ -1,8 +0,0 @@
-This code was bundled from the EWAHBoolArray project by Daniel Lemire,
-available at:
-
-https://github.com/lemire/EWAHBoolArray
-
-Currently this is at revision 88b25a3345b82353ccd97a7de6064e6c179a7cc2
-
-This code is available under the Apache2.0 license.
diff --git a/yt/utilities/lib/ewahboolarray/boolarray.h b/yt/utilities/lib/ewahboolarray/boolarray.h
deleted file mode 100644
index 0a2f60aa2e9..00000000000
--- a/yt/utilities/lib/ewahboolarray/boolarray.h
+++ /dev/null
@@ -1,488 +0,0 @@
-/**
- * This code is released under the
- * Apache License Version 2.0 http://www.apache.org/licenses/.
- *
- * (c) Daniel Lemire, http://lemire.me/en/
- */
-
-#ifndef BOOLARRAY_H
-#define BOOLARRAY_H
-#include <cassert>
-#include <iostream>
-#include <iso646.h> // mostly for Microsoft compilers
-#include <sstream>
-#include <stdarg.h>
-#include <stdexcept>
-#include <vector>
-
-#include "ewahutil.h"
-
-namespace ewah {
-/**
- * A dynamic bitset implementation. (without compression).
- */
-template <class uword = uint32_t> class BoolArray {
-public:
-  BoolArray(const size_t n, const uword initval = 0)
-      : buffer(n / wordinbits + (n % wordinbits == 0 ? 0 : 1), initval),
-        sizeinbits(n) {}
-
-  BoolArray() : buffer(), sizeinbits(0) {}
-
-  BoolArray(const BoolArray &ba)
-      : buffer(ba.buffer), sizeinbits(ba.sizeinbits) {}
-  static BoolArray bitmapOf(size_t n, ...) {
-    BoolArray ans;
-    va_list vl;
-    va_start(vl, n);
-    for (size_t i = 0; i < n; i++) {
-      ans.set(static_cast<size_t>(va_arg(vl, int)));
-    }
-    va_end(vl);
-    return ans;
-  }
-  size_t sizeInBytes() const { return buffer.size() * sizeof(uword); }
-
-  void read(std::istream &in) {
-    sizeinbits = 0;
-    in.read(reinterpret_cast<char *>(&sizeinbits), sizeof(sizeinbits));
-    buffer.resize(sizeinbits / wordinbits +
-                  (sizeinbits % wordinbits == 0 ? 0 : 1));
-    if (buffer.size() == 0)
-      return;
-    in.read(reinterpret_cast<char *>(&buffer[0]),
-            static_cast<std::streamsize>(buffer.size() * sizeof(uword)));
-  }
-
-  void readBuffer(std::istream &in, const size_t size) {
-    buffer.resize(size);
-    sizeinbits = size * sizeof(uword) * 8;
-    if (buffer.empty())
-      return;
-    in.read(reinterpret_cast<char *>(&buffer[0]),
-            buffer.size() * sizeof(uword));
-  }
-
-  void setSizeInBits(const size_t sizeib) { sizeinbits = sizeib; }
-
-  void write(std::ostream &out) { write(out, sizeinbits); }
-
-  void write(std::ostream &out, const size_t numberofbits) const {
-    const size_t size =
-        numberofbits / wordinbits + (numberofbits % wordinbits == 0 ? 0 : 1);
-    out.write(reinterpret_cast<const char *>(&numberofbits),
-              sizeof(numberofbits));
-    if (numberofbits == 0)
-      return;
-    out.write(reinterpret_cast<const char *>(&buffer[0]),
-              static_cast<std::streamsize>(size * sizeof(uword)));
-  }
-
-  void writeBuffer(std::ostream &out, const size_t numberofbits) const {
-    const size_t size =
-        numberofbits / wordinbits + (numberofbits % wordinbits == 0 ? 0 : 1);
-    if (size == 0)
-      return;
-#ifdef EWAHASSERT
-    assert(buffer.size() >= size);
-#endif
-    out.write(reinterpret_cast<const char *>(&buffer[0]), size * sizeof(uword));
-  }
-
-  size_t sizeOnDisk() const {
-    size_t size =
-        sizeinbits / wordinbits + (sizeinbits % wordinbits == 0 ? 0 : 1);
-    return sizeof(sizeinbits) + size * sizeof(uword);
-  }
-
-  BoolArray &operator=(const BoolArray &x) {
-    this->buffer = x.buffer;
-    this->sizeinbits = x.sizeinbits;
-    return *this;
-  }
-
-  bool operator==(const BoolArray &x) const {
-    if (sizeinbits != x.sizeinbits)
-      return false;
-    for (size_t k = 0; k < buffer.size(); ++k)
-      if (buffer[k] != x.buffer[k])
-        return false;
-    return true;
-  }
-
-  bool operator!=(const BoolArray &x) const { return !operator==(x); }
-
-  void setWord(const size_t pos, const uword val) {
-#ifdef EWAHASSERT
-    assert(pos < buffer.size());
-#endif
-    buffer[pos] = val;
-  }
-
-  void addWord(const uword val) {
-    if (sizeinbits % wordinbits != 0)
-      throw std::invalid_argument("you probably didn't want to do this");
-    sizeinbits += wordinbits;
-    buffer.push_back(val);
-  }
-
-  uword getWord(const size_t pos) const {
-#ifdef EWAHASSERT
-    assert(pos < buffer.size());
-#endif
-    return buffer[pos];
-  }
-
-  /**
-   * set to true (whether it was already set to true or not)
-   */
-  void set(const size_t pos) {
-    if (pos >= sizeinbits)
-      padWithZeroes(pos + 1);
-    buffer[pos / wordinbits] |= static_cast<uword>((static_cast<uword>(1) << (pos % wordinbits)));
-  }
-
-  /**
-   * set to false (whether it was already set to false or not)
-   *
-   */
-  void unset(const size_t pos) {
-    if (pos < sizeinbits)
-      buffer[pos / wordinbits] &=
-          ~(static_cast<uword>(1) << (pos % wordinbits));
-  }
-
-  /**
-   * true of false? (set or unset)
-   */
-  bool get(const size_t pos) const {
-#ifdef EWAHASSERT
-    assert(pos / wordinbits < buffer.size());
-#endif
-    return (buffer[pos / wordinbits] &
-            (static_cast<uword>(1) << (pos % wordinbits))) != 0;
-  }
-
-  /**
-   * set all bits to 0
-   */
-  void reset() {
-    if (buffer.size() > 0)
-      memset(&buffer[0], 0, sizeof(uword) * buffer.size());
-    sizeinbits = 0;
-  }
-
-  size_t sizeInBits() const { return sizeinbits; }
-
-  ~BoolArray() {}
-
-  /**
-   * Computes the logical and and writes to the provided BoolArray (out).
-   * The current bitmaps is unchanged.
-   */
-  void logicaland(const BoolArray &ba, BoolArray &out) const {
-    if (ba.buffer.size() < buffer.size())
-      out.setToSize(ba);
-    else
-      out.setToSize(*this);
-    for (size_t i = 0; i < out.buffer.size(); ++i)
-      out.buffer[i] = buffer[i] & ba.buffer[i];
-  }
-
-  /**
-   * Computes the logical and and return the result.
-   * The current bitmaps is unchanged.
-   */
-  BoolArray logicaland(const BoolArray &a) const {
-    BoolArray answer;
-    logicaland(a, answer);
-    return answer;
-  }
-
-  void inplace_logicaland(const BoolArray &ba) {
-    if (ba.buffer.size() < buffer.size())
-      setToSize(ba);
-    for (size_t i = 0; i < buffer.size(); ++i)
-      buffer[i] = buffer[i] & ba.buffer[i];
-  }
-
-  /**
-   * Computes the logical andnot and writes to the provided BoolArray (out).
-   * The current bitmaps is unchanged.
-   */
-  void logicalandnot(const BoolArray &ba, BoolArray &out) const {
-    out.setToSize(*this);
-    size_t upto = out.buffer.size() < ba.buffer.size() ? out.buffer.size()
-                                                       : ba.buffer.size();
-    for (size_t i = 0; i < upto; ++i)
-      out.buffer[i] = static_cast<uword>(buffer[i] & (~ba.buffer[i]));
-    for (size_t i = upto; i < out.buffer.size(); ++i)
-      out.buffer[i] = buffer[i];
-    out.clearBogusBits();
-  }
-
-  /**
-   * Computes the logical andnot and return the result.
-   * The current bitmaps is unchanged.
-   */
-  BoolArray logicalandnot(const BoolArray &a) const {
-    BoolArray answer;
-    logicalandnot(a, answer);
-    return answer;
-  }
-
-  void inplace_logicalandnot(const BoolArray &ba) {
-    size_t upto =
-        buffer.size() < ba.buffer.size() ? buffer.size() : ba.buffer.size();
-    for (size_t i = 0; i < upto; ++i)
-      buffer[i] = buffer[i] & (~ba.buffer[i]);
-    clearBogusBits();
-  }
-
-  /**
-   * Computes the logical or and writes to the provided BoolArray (out).
-   * The current bitmaps is unchanged.
-   */
-  void logicalor(const BoolArray &ba, BoolArray &out) const {
-    const BoolArray *smallest;
-    const BoolArray *largest;
-    if (ba.buffer.size() > buffer.size()) {
-      smallest = this;
-      largest = &ba;
-      out.setToSize(ba);
-    } else {
-      smallest = &ba;
-      largest = this;
-      out.setToSize(*this);
-    }
-    for (size_t i = 0; i < smallest->buffer.size(); ++i)
-      out.buffer[i] = buffer[i] | ba.buffer[i];
-    for (size_t i = smallest->buffer.size(); i < largest->buffer.size(); ++i)
-      out.buffer[i] = largest->buffer[i];
-  }
-
-  /**
-   * Computes the logical or and return the result.
-   * The current bitmaps is unchanged.
-   */
-  BoolArray logicalor(const BoolArray &a) const {
-    BoolArray answer;
-    logicalor(a, answer);
-    return answer;
-  }
-
-  void inplace_logicalor(const BoolArray &ba) { logicalor(ba, *this); }
-
-  /**
-   * Computes the logical xor and writes to the provided BoolArray (out).
-   * The current bitmaps is unchanged.
-   */
-  void logicalxor(const BoolArray &ba, BoolArray &out) const {
-    const BoolArray *smallest;
-    const BoolArray *largest;
-    if (ba.buffer.size() > buffer.size()) {
-      smallest = this;
-      largest = &ba;
-      out.setToSize(ba);
-    } else {
-      smallest = &ba;
-      largest = this;
-      out.setToSize(*this);
-    }
-    for (size_t i = 0; i < smallest->buffer.size(); ++i)
-      out.buffer[i] = buffer[i] ^ ba.buffer[i];
-    for (size_t i = smallest->buffer.size(); i < largest->buffer.size(); ++i)
-      out.buffer[i] = largest->buffer[i];
-  }
-
-  /**
-   * Computes the logical xor and return the result.
-   * The current bitmaps is unchanged.
-   */
-  BoolArray logicalxor(const BoolArray &a) const {
-    BoolArray answer;
-    logicalxor(a, answer);
-    return answer;
-  }
-
-  void inplace_logicalxor(const BoolArray &ba) { logicalxor(ba, *this); }
-
-  /**
-   * Computes the logical not and writes to the provided BoolArray (out).
-   * The current bitmaps is unchanged.
-   */
-  void logicalnot(BoolArray &out) const {
-    out.setToSize(*this);
-    for (size_t i = 0; i < buffer.size(); ++i)
-      out.buffer[i] = ~buffer[i];
-    out.clearBogusBits();
-  }
-
-  /**
-   * Computes the logical not and return the result.
-   * The current bitmaps is unchanged.
-   */
-  BoolArray logicalandnot() const {
-    BoolArray answer;
-    logicalnot(answer);
-    return answer;
-  }
-
-  void inplace_logicalnot() {
-    for (size_t i = 0; i < buffer.size(); ++i)
-      buffer[i] = ~buffer[i];
-    clearBogusBits();
-  }
-
-  /**
-   * Returns the number of bits set to the value 1.
-   * The running time complexity is proportional to the
-   *  size of the bitmap.
-   *
-   * This is sometimes called the cardinality.
-   */
-  size_t numberOfOnes() const {
-    size_t count = 0;
-    for (size_t i = 0; i < buffer.size(); ++i) {
-      count += countOnes((UWORD)buffer[i]);
-    }
-    return count;
-  }
-
-  inline void printout(std::ostream &o = std::cout) {
-    for (size_t k = 0; k < sizeinbits; ++k)
-      o << get(k) << " ";
-    o << std::endl;
-  }
-
-  /**
-   * Make sure the two bitmaps have the same size (padding with zeroes
-   * if necessary). It has constant running time complexity.
-   */
-  void makeSameSize(BoolArray &a) {
-    if (a.sizeinbits < sizeinbits)
-      a.padWithZeroes(sizeinbits);
-    else if (sizeinbits < a.sizeinbits)
-      padWithZeroes(a.sizeinbits);
-  }
-  /**
-   * Make sure the current bitmap has the size of the provided bitmap.
-   */
-  void setToSize(const BoolArray &a) {
-    sizeinbits = a.sizeinbits;
-    buffer.resize(a.buffer.size());
-  }
-
-  /**
-   * make sure the size of the array is totalbits bits by padding with zeroes.
-   * returns the number of words added (storage cost increase)
-   */
-  size_t padWithZeroes(const size_t totalbits) {
-    size_t currentwordsize = (sizeinbits + wordinbits - 1) / wordinbits;
-    size_t neededwordsize = (totalbits + wordinbits - 1) / wordinbits;
-#ifdef EWAHASSERT
-    assert(neededwordsize >= currentwordsize);
-#endif
-    buffer.resize(neededwordsize);
-    sizeinbits = totalbits;
-    return static_cast<size_t>(neededwordsize - currentwordsize);
-  }
-
-  void append(const BoolArray &a);
-
-  enum { wordinbits = sizeof(uword) * 8 };
-
-  std::vector<size_t> toArray() const {
-    std::vector<size_t> ans;
-    for (size_t k = 0; k < buffer.size(); ++k) {
-      uword myword = buffer[k];
-      while (myword != 0) {
-        uint32_t ntz = numberOfTrailingZeros(myword);
-        ans.push_back(sizeof(uword) * 8 * k + ntz);
-        myword ^= (static_cast<uword>(1) << ntz);
-      }
-    }
-    return ans;
-  }
-
-  /**
-   * Transform into a string that presents a list of set bits.
-   * The running time is linear in the size of the bitmap.
-   */
-  operator std::string() const {
-    std::stringstream ss;
-    ss << *this;
-    return ss.str();
-  }
-
-  friend std::ostream &operator<<(std::ostream &out, const BoolArray &a) {
-    std::vector<size_t> v = a.toArray();
-    out << "{";
-    for (std::vector<size_t>::const_iterator i = v.begin(); i != v.end();) {
-      out << *i;
-      ++i;
-      if (i != v.end())
-        out << ",";
-    }
-    out << "}";
-    return out;
-
-    return (out << static_cast<std::string>(a));
-  }
-
-private:
-  void clearBogusBits() {
-    if ((sizeinbits % wordinbits) != 0) {
-      const uword maskbogus =
-          static_cast<uword>((static_cast<uword>(1) << (sizeinbits % wordinbits)) - 1);
-      buffer[buffer.size() - 1] &= maskbogus;
-    }
-  }
-
-  std::vector<uword> buffer;
-  size_t sizeinbits;
-};
-
-/**
- * computes the logical or (union) between "n" bitmaps (referenced by a
- * pointer).
- * The answer gets written out in container. This might be faster than calling
- * logicalor n-1 times.
- */
-template <class uword>
-void fast_logicalor_tocontainer(size_t n, const BoolArray<uword> **inputs,
-                                BoolArray<uword> &container) {
-  if (n == 0) {
-    container.reset();
-    return;
-  }
-  container = *inputs[0];
-  for (size_t i = 0; i < n; i++) {
-    container.inplace_logicalor(*inputs[i]);
-  }
-}
-
-/**
- * computes the logical or (union) between "n" bitmaps (referenced by a
- * pointer).
- * Returns the answer. This might be faster than calling
- * logicalor n-1 times.
- */
-template <class uword>
-BoolArray<uword> fast_logicalor(size_t n, const BoolArray<uword> **inputs) {
-  BoolArray<uword> answer;
-  fast_logicalor_tocontainer(n, inputs, answer);
-  return answer;
-}
-
-template <class uword> void BoolArray<uword>::append(const BoolArray &a) {
-  if (sizeinbits % wordinbits == 0) {
-    buffer.insert(buffer.end(), a.buffer.begin(), a.buffer.end());
-  } else {
-    throw std::invalid_argument(
-        "Cannot append if parent does not meet boundary");
-  }
-  sizeinbits += a.sizeinbits;
-}
-} // namespace ewah
-#endif
diff --git a/yt/utilities/lib/ewahboolarray/ewah-inl.h b/yt/utilities/lib/ewahboolarray/ewah-inl.h
deleted file mode 100644
index 892b32d6e1e..00000000000
--- a/yt/utilities/lib/ewahboolarray/ewah-inl.h
+++ /dev/null
@@ -1,1670 +0,0 @@
-#ifndef EWAH_INL_H
-#define EWAH_INL_H
-
-#include "ewah.h"
-
-namespace ewah {
-
-/**
- * computes the logical or (union) between "n" bitmaps (referenced by a
- * pointer).
- * The answer gets written out in container. This might be faster than calling
- * logicalor n-1 times.
- */
-template <class uword>
-void fast_logicalor_tocontainer(size_t n, const EWAHBoolArray<uword> **inputs,
-                                EWAHBoolArray<uword> &container);
-
-/**
- * computes the logical or (union) between "n" bitmaps (referenced by a
- * pointer).
- * Returns the answer. This might be faster than calling
- * logicalor n-1 times.
- */
-template <class uword>
-EWAHBoolArray<uword> fast_logicalor(size_t n,
-                                    const EWAHBoolArray<uword> **inputs) {
-  EWAHBoolArray<uword> answer;
-  fast_logicalor_tocontainer(n, inputs, answer);
-  return answer;
-}
-
-/**
- * Iterate over words of bits from a compressed bitmap.
- */
-template <class uword> class EWAHBoolArrayIterator {
-public:
-  /**
-   * is there a new word?
-   */
-  bool hasNext() const { return pointer < myparent.size(); }
-
-  /**
-   * return next word.
-   */
-  uword next() {
-    uword returnvalue;
-    if (compressedwords < rl) {
-      ++compressedwords;
-      if (b)
-        returnvalue = notzero;
-      else
-        returnvalue = zero;
-    } else {
-      ++literalwords;
-      ++pointer;
-      returnvalue = myparent[pointer];
-    }
-    if ((compressedwords == rl) && (literalwords == lw)) {
-      ++pointer;
-      if (pointer < myparent.size())
-        readNewRunningLengthWord();
-    }
-    return returnvalue;
-  }
-
-  EWAHBoolArrayIterator(const EWAHBoolArrayIterator<uword> &other)
-      : pointer(other.pointer), myparent(other.myparent),
-        compressedwords(other.compressedwords),
-        literalwords(other.literalwords), rl(other.rl), lw(other.lw),
-        b(other.b) {}
-
-  static const uword zero = 0;
-  static const uword notzero = static_cast<uword>(~zero);
-
-private:
-  EWAHBoolArrayIterator(const std::vector<uword> &parent);
-  void readNewRunningLengthWord();
-  friend class EWAHBoolArray<uword>;
-  size_t pointer;
-  const std::vector<uword> &myparent;
-  uword compressedwords;
-  uword literalwords;
-  uword rl, lw;
-  bool b;
-};
-
-/**
- * Used to go through the set bits. Not optimally fast, but convenient.
- */
-template <class uword> class EWAHBoolArraySetBitForwardIterator {
-public:
-  typedef std::forward_iterator_tag iterator_category;
-  typedef size_t *pointer;
-  typedef size_t &reference_type;
-  typedef size_t value_type;
-  typedef ptrdiff_t difference_type;
-  typedef EWAHBoolArraySetBitForwardIterator<uword> type_of_iterator;
-  /**
-   * Provides the location of the set bit.
-   */
-  inline size_t operator*() const { return answer; }
-
-  bool operator<(const type_of_iterator &o) const {
-    if (!o.hasValue)
-      return true;
-    if (!hasValue)
-      return false;
-    return answer < o.answer;
-  }
-
-  bool operator<=(const type_of_iterator &o) const {
-    if (!o.hasValue)
-      return true;
-    if (!hasValue)
-      return false;
-    return answer <= o.answer;
-  }
-
-  bool operator>(const type_of_iterator &o) const { return !((*this) <= o); }
-
-  bool operator>=(const type_of_iterator &o) const { return !((*this) < o); }
-
-  EWAHBoolArraySetBitForwardIterator &operator++() { //++i
-    if (hasNext)
-      next();
-    else
-      hasValue = false;
-    return *this;
-  }
-
-  EWAHBoolArraySetBitForwardIterator operator++(int) { // i++
-    EWAHBoolArraySetBitForwardIterator old(*this);
-    if (hasNext)
-      next();
-    else
-      hasValue = false;
-    return old;
-  }
-
-  bool operator==(const EWAHBoolArraySetBitForwardIterator<uword> &o) const {
-    if ((!hasValue) && (!o.hasValue))
-      return true;
-    return (hasValue == o.hasValue) && (answer == o.answer);
-  }
-
-  bool operator!=(const EWAHBoolArraySetBitForwardIterator<uword> &o) const {
-    return !(*this == o);
-  }
-
-  static EWAHBoolArraySetBitForwardIterator<uword> &end() {
-    static EWAHBoolArraySetBitForwardIterator<uword> e;
-    return e;
-  }
-
-  EWAHBoolArraySetBitForwardIterator(const std::vector<uword> *parent,
-                                     size_t startpointer = 0)
-      : word(0), position(0), runningLength(0), literalPosition(0),
-        wordPosition(startpointer), wordLength(0), buffer(parent),
-        hasNext(false), hasValue(false), answer(0) {
-    if (wordPosition < buffer->size()) {
-      setRunningLengthWord();
-      hasNext = moveToNext();
-      if (hasNext) {
-        next();
-        hasValue = true;
-      }
-    }
-  }
-
-  EWAHBoolArraySetBitForwardIterator()
-      : word(0), position(0), runningLength(0), literalPosition(0),
-        wordPosition(0), wordLength(0), buffer(NULL), hasNext(false),
-        hasValue(false), answer(0) {}
-
-  inline bool runningHasNext() const { return position < runningLength; }
-
-  inline bool literalHasNext() {
-    while (word == 0 && wordPosition < wordLength) {
-      word = (*buffer)[wordPosition++];
-      literalPosition = position;
-      position += WORD_IN_BITS;
-    }
-    return word != 0;
-  }
-
-  inline void setRunningLengthWord() {
-    uword rlw = (*buffer)[wordPosition];
-    runningLength =
-        (size_t)WORD_IN_BITS * RunningLengthWord<uword>::getRunningLength(rlw) +
-        position;
-    if (!RunningLengthWord<uword>::getRunningBit(rlw)) {
-      position = runningLength;
-    }
-    wordPosition++; // point to first literal word
-    wordLength =
-        static_cast<uword>(wordPosition + RunningLengthWord<uword>::getNumberOfLiteralWords(rlw));
-  }
-
-  inline bool moveToNext() {
-    while (!runningHasNext() && !literalHasNext()) {
-      if (wordPosition >= buffer->size()) {
-        return false;
-      }
-      setRunningLengthWord();
-    }
-    return true;
-  }
-
-  void next() { // update answer
-    if (runningHasNext()) {
-      answer = position++;
-      if (runningHasNext())
-        return;
-    } else {
-      uword t = static_cast<uword>(word & (~word + 1));
-      answer = literalPosition + countOnes((UWORD)(t - 1));
-      word ^= t;
-    }
-    hasNext = moveToNext();
-  }
-
-  enum { WORD_IN_BITS = sizeof(uword) * 8 };
-  uword word; // lit word
-  size_t position;
-  size_t runningLength;
-  size_t literalPosition;
-  size_t wordPosition; // points to word in buffer
-  uword wordLength;
-  const std::vector<uword> *buffer;
-  bool hasNext;
-  bool hasValue;
-  size_t answer;
-};
-
-/**
- * This object is returned by the compressed bitmap as a
- * statistical descriptor.
- */
-class BitmapStatistics {
-public:
-  BitmapStatistics()
-      : totalliteral(0), totalcompressed(0), runningwordmarker(0),
-        maximumofrunningcounterreached(0) {}
-  size_t getCompressedSize() const { return totalliteral + runningwordmarker; }
-  size_t getUncompressedSize() const { return totalliteral + totalcompressed; }
-  size_t getNumberOfDirtyWords() const { return totalliteral; }
-  size_t getNumberOfCleanWords() const { return totalcompressed; }
-  size_t getNumberOfMarkers() const { return runningwordmarker; }
-  size_t getOverRuns() const { return maximumofrunningcounterreached; }
-  size_t totalliteral;
-  size_t totalcompressed;
-  size_t runningwordmarker;
-  size_t maximumofrunningcounterreached;
-};
-
-template <class uword> bool EWAHBoolArray<uword>::set(size_t i) {
-  if (i < sizeinbits)
-    return false;
-  const size_t dist = (i + wordinbits) / wordinbits -
-                      (sizeinbits + wordinbits - 1) / wordinbits;
-  sizeinbits = i + 1;
-  if (dist > 0) { // easy
-    if (dist > 1) {
-      fastaddStreamOfEmptyWords(false, dist - 1);
-    }
-    addLiteralWord(
-        static_cast<uword>(static_cast<uword>(1) << (i % wordinbits)));
-    return true;
-  }
-  RunningLengthWord<uword> lastRunningLengthWord(buffer[lastRLW]);
-  if (lastRunningLengthWord.getNumberOfLiteralWords() == 0) {
-    lastRunningLengthWord.setRunningLength(
-        static_cast<uword>(lastRunningLengthWord.getRunningLength() - 1));
-    addLiteralWord(
-        static_cast<uword>(static_cast<uword>(1) << (i % wordinbits)));
-    return true;
-  }
-  buffer[buffer.size() - 1] |=
-      static_cast<uword>(static_cast<uword>(1) << (i % wordinbits));
-  // check if we just completed a stream of 1s
-  if (buffer[buffer.size() - 1] == static_cast<uword>(~0)) {
-    // we remove the last dirty word
-    buffer[buffer.size() - 1] = 0;
-    buffer.resize(buffer.size() - 1);
-    lastRunningLengthWord.setNumberOfLiteralWords(static_cast<uword>(
-        lastRunningLengthWord.getNumberOfLiteralWords() - 1));
-    // next we add one clean word
-    addEmptyWord(true);
-  }
-  return true;
-}
-
-template <class uword> void EWAHBoolArray<uword>::inplace_logicalnot() {
-  size_t pointer(0), lastrlw(0);
-  while (pointer < buffer.size()) {
-    RunningLengthWord<uword> rlw(buffer[pointer]);
-    lastrlw = pointer; // we save this up
-    if (rlw.getRunningBit())
-      rlw.setRunningBit(false);
-    else
-      rlw.setRunningBit(true);
-    ++pointer;
-    for (size_t k = 0; k < rlw.getNumberOfLiteralWords(); ++k) {
-      buffer[pointer] = static_cast<uword>(~buffer[pointer]);
-      ++pointer;
-    }
-  }
-  if (sizeinbits % wordinbits != 0) {
-    RunningLengthWord<uword> rlw(buffer[lastrlw]);
-    const uword maskbogus =
-        static_cast<uword>((static_cast<uword>(1) << (sizeinbits % wordinbits)) - 1);
-    if (rlw.getNumberOfLiteralWords() > 0) { // easy case
-      buffer[lastrlw + 1 + rlw.getNumberOfLiteralWords() - 1] &= maskbogus;
-    } else {
-      rlw.setRunningLength(rlw.getRunningLength() - 1);
-      addLiteralWord(maskbogus);
-    }
-  }
-}
-
-template <class uword> size_t EWAHBoolArray<uword>::numberOfWords() const {
-  size_t tot(0);
-  size_t pointer(0);
-  while (pointer < buffer.size()) {
-    ConstRunningLengthWord<uword> rlw(buffer[pointer]);
-    tot += rlw.size();
-    pointer += 1 + rlw.getNumberOfLiteralWords();
-  }
-  return tot;
-}
-
-template <class uword>
-void EWAHBoolArray<uword>::assertWordCount(std::string message) const {
-#ifdef EWAHASSERT
-  size_t tot = numberOfWords();
-  size_t expected = (sizeinbits + wordinbits - 1) / wordinbits;
-  if (expected != tot) {
-    std::cerr << "[assertWordCount] wordinbits " << wordinbits << std::endl;
-    std::cerr << "[assertWordCount] sizeinbits " << sizeinbits << std::endl;
-    std::cerr << "[assertWordCount] " << message << std::endl;
-    std::cerr << "[assertWordCount] number of words " << tot << std::endl;
-    std::cerr << "[assertWordCount] expected number of words " << expected
-              << std::endl;
-    debugprintout();
-    throw std::runtime_error("bug");
-  }
-#endif
-}
-
-template <class uword> void EWAHBoolArray<uword>::correctWordCount() {
-  size_t tot = numberOfWords();
-  size_t expected = (sizeinbits + wordinbits - 1) / wordinbits;
-  if (expected != tot) {
-    if (tot < expected) {
-      fastaddStreamOfEmptyWords(false, expected - tot);
-    } else {
-      RunningLengthWord<uword> lastRunningLengthWord(buffer[lastRLW]);
-      lastRunningLengthWord.setRunningLength(static_cast<uword>(
-          lastRunningLengthWord.getRunningLength() + expected - tot));
-    }
-  }
-}
-
-template <class uword> size_t EWAHBoolArray<uword>::numberOfOnes() const {
-  size_t tot(0);
-  size_t pointer(0);
-  while (pointer < buffer.size()) {
-    ConstRunningLengthWord<uword> rlw(buffer[pointer]);
-    if (rlw.getRunningBit()) {
-      tot += static_cast<size_t>(rlw.getRunningLength() * wordinbits);
-    }
-    ++pointer;
-    for (size_t k = 0; k < rlw.getNumberOfLiteralWords(); ++k) {
-      tot += countOnes((UWORD)buffer[pointer]);
-      ++pointer;
-    }
-  }
-  return tot;
-}
-
-template <class uword>
-std::vector<size_t> EWAHBoolArray<uword>::toArray() const {
-  std::vector<size_t> ans;
-  size_t pos(0);
-  size_t pointer(0);
-  const size_t buffersize = buffer.size();
-  while (pointer < buffersize) {
-    ConstRunningLengthWord<uword> rlw(buffer[pointer]);
-    const size_t productofrl =
-        static_cast<size_t>(rlw.getRunningLength() * wordinbits);
-    if (rlw.getRunningBit()) {
-      size_t upper_limit = pos + productofrl;
-      for (; pos < upper_limit; ++pos) {
-        ans.push_back(pos);
-      }
-    } else {
-      pos += productofrl;
-    }
-    ++pointer;
-    const size_t rlwlw = rlw.getNumberOfLiteralWords();
-    for (size_t k = 0; k < rlwlw; ++k) {
-      uword myword = buffer[pointer];
-      while (myword != 0) {
-        uint64_t t = myword & (~myword + 1);
-        uint32_t r = numberOfTrailingZeros(t);
-        ans.push_back(pos + r);
-        myword ^= t;
-      }
-      pos += wordinbits;
-      ++pointer;
-    }
-  }
-  return ans;
-}
-
-template <class uword>
-void EWAHBoolArray<uword>::logicalnot(EWAHBoolArray &x) const {
-  x.reset();
-  x.buffer.reserve(buffer.size());
-  EWAHBoolArrayRawIterator<uword> i = this->raw_iterator();
-  if (!i.hasNext())
-    return; // nothing to do
-  while (true) {
-    BufferedRunningLengthWord<uword> &rlw = i.next();
-    if (i.hasNext()) {
-      if (rlw.getRunningLength() > 0)
-        x.fastaddStreamOfEmptyWords(!rlw.getRunningBit(),
-                                    rlw.getRunningLength());
-      if (rlw.getNumberOfLiteralWords() > 0) {
-        const uword *dw = i.dirtyWords();
-        for (size_t k = 0; k < rlw.getNumberOfLiteralWords(); ++k) {
-          x.addLiteralWord(~dw[k]);
-        }
-      }
-    } else {
-      if (rlw.getNumberOfLiteralWords() == 0) {
-        if ((this->sizeinbits % wordinbits != 0) && !rlw.getRunningBit()) {
-          if (rlw.getRunningLength() > 1)
-            x.fastaddStreamOfEmptyWords(!rlw.getRunningBit(),
-                                        rlw.getRunningLength() - 1);
-          const uword maskbogus =
-              static_cast<uword>((static_cast<uword>(1) << (this->sizeinbits % wordinbits)) - 1);
-          x.addLiteralWord(maskbogus);
-          break;
-        } else {
-          if (rlw.getRunningLength() > 0)
-            x.fastaddStreamOfEmptyWords(!rlw.getRunningBit(),
-                                        rlw.getRunningLength());
-          break;
-        }
-      }
-      if (rlw.getRunningLength() > 0)
-        x.fastaddStreamOfEmptyWords(!rlw.getRunningBit(),
-                                    rlw.getRunningLength());
-      const uword *dw = i.dirtyWords();
-      for (size_t k = 0; k + 1 < rlw.getNumberOfLiteralWords(); ++k) {
-        x.addLiteralWord(~dw[k]);
-      }
-      const uword maskbogus =
-          (this->sizeinbits % wordinbits != 0)
-              ? static_cast<uword>((static_cast<uword>(1) << (this->sizeinbits % wordinbits)) - 1)
-              : ~static_cast<uword>(0);
-      x.addLiteralWord(static_cast<uword>((~dw[rlw.getNumberOfLiteralWords() - 1]) & maskbogus));
-      break;
-    }
-  }
-  x.sizeinbits = this->sizeinbits;
-}
-
-template <class uword>
-size_t EWAHBoolArray<uword>::addWord(const uword newdata,
-                                     const uint32_t bitsthatmatter) {
-  sizeinbits += bitsthatmatter;
-  if (newdata == 0) {
-    return addEmptyWord(0);
-  } else if (newdata == static_cast<uword>(~0)) {
-    return addEmptyWord(1);
-  } else {
-    return addLiteralWord(newdata);
-  }
-}
-
-template <class uword>
-inline void EWAHBoolArray<uword>::writeBuffer(std::ostream &out) const {
-  if (!buffer.empty())
-    out.write(reinterpret_cast<const char *>(&buffer[0]),
-              sizeof(uword) * buffer.size());
-}
-
-template <class uword>
-inline void EWAHBoolArray<uword>::readBuffer(std::istream &in,
-                                             const size_t buffersize) {
-  buffer.resize(buffersize);
-  if (buffersize > 0)
-    in.read(reinterpret_cast<char *>(&buffer[0]), sizeof(uword) * buffersize);
-}
-
-template <class uword>
-size_t EWAHBoolArray<uword>::write(std::ostream &out,
-                                   const bool savesizeinbits) const {
-  size_t written = 0;
-  if (savesizeinbits) {
-    uint64_t sb = static_cast<uint64_t>(sizeinbits);
-    out.write(reinterpret_cast<const char *>(&sb), sizeof(sb));
-    written += sizeof(uint64_t);
-  }
-  const size_t buffersize = buffer.size();
-  uint64_t bs = static_cast<uint64_t>(buffersize);
-  out.write(reinterpret_cast<const char *>(&bs), sizeof(bs));
-  written += sizeof(uint64_t);
-
-  if (buffersize > 0) {
-    out.write(reinterpret_cast<const char *>(&buffer[0]),
-              static_cast<std::streamsize>(sizeof(uword) * buffersize));
-    written += sizeof(uword) * buffersize;
-  }
-  return written;
-}
-
-template <class uword>
-size_t EWAHBoolArray<uword>::write(char *out, size_t capacity,
-                                   const bool savesizeinbits) const {
-  size_t written = 0;
-  if (savesizeinbits) {
-    uint64_t sb = static_cast<uint64_t>(sizeinbits);
-    if (capacity < sizeof(sb))
-      return 0;
-    capacity -= sizeof(sb);
-    memcpy(out, &sb, sizeof(sb));
-    out += sizeof(sb);
-    written += sizeof(uint64_t);
-  }
-  const size_t buffersize = buffer.size();
-  uint64_t bs = static_cast<uint64_t>(buffersize);
-  if (capacity < sizeof(bs))
-    return 0;
-  capacity -= sizeof(bs);
-  memcpy(out, &buffersize, sizeof(bs));
-  out += sizeof(bs);
-  written += sizeof(uint64_t);
-
-  if (buffersize > 0) {
-    if (capacity < sizeof(uword) * buffersize)
-      return 0;
-    memcpy(out, &buffer[0], sizeof(uword) * buffersize);
-    written += sizeof(uword) * buffersize;
-  }
-  return written;
-}
-
-template <class uword>
-size_t EWAHBoolArray<uword>::read(std::istream &in, const bool savesizeinbits) {
-  size_t read = 0;
-  if (savesizeinbits) {
-    uint64_t tmp;
-    in.read(reinterpret_cast<char *>(&tmp), sizeof(tmp));
-    read += sizeof(tmp);
-    sizeinbits = static_cast<size_t>(tmp);
-  } else {
-    sizeinbits = 0;
-  }
-  size_t buffersize(0);
-  uint64_t tmp;
-  in.read(reinterpret_cast<char *>(&tmp), sizeof(tmp));
-  read += sizeof(tmp);
-  buffersize = static_cast<size_t>(tmp);
-  buffer.resize(buffersize);
-  if (buffersize > 0) {
-    in.read(reinterpret_cast<char *>(&buffer[0]),
-            static_cast<std::streamsize>(sizeof(uword) * buffersize));
-    read += sizeof(uword) * buffersize;
-  }
-  return read;
-}
-
-template <class uword>
-size_t EWAHBoolArray<uword>::read(const char *in, size_t capacity,
-                                  const bool savesizeinbits) {
-  size_t read = 0;
-  if (savesizeinbits) {
-    uint64_t tmp;
-    if (capacity < sizeof(tmp))
-      return 0;
-    capacity -= sizeof(tmp);
-    memcpy(reinterpret_cast<char *>(&tmp), in, sizeof(tmp));
-    read += sizeof(tmp);
-    in += sizeof(tmp);
-    sizeinbits = static_cast<size_t>(tmp);
-  } else {
-    sizeinbits = 0;
-  }
-  size_t buffersize(0);
-  uint64_t tmp;
-  if (capacity < sizeof(uint64_t))
-    return 0;
-  capacity -= sizeof(uint64_t);
-  memcpy(reinterpret_cast<char *>(&tmp), in, sizeof(uint64_t));
-  in += sizeof(uint64_t);
-  read += sizeof(uint64_t);
-  buffersize = static_cast<size_t>(tmp);
-  buffer.resize(buffersize);
-  if (buffersize > 0) {
-    if (capacity < sizeof(uword) * buffersize)
-      return 0;
-    memcpy(&buffer[0], in, sizeof(uword) * buffersize);
-    read += sizeof(uword) * buffersize;
-  }
-  return read;
-}
-
-template <class uword>
-size_t EWAHBoolArray<uword>::addLiteralWord(const uword newdata) {
-  RunningLengthWord<uword> lastRunningLengthWord(buffer[lastRLW]);
-  uword numbersofar = lastRunningLengthWord.getNumberOfLiteralWords();
-  if (numbersofar >=
-      RunningLengthWord<uword>::largestliteralcount) { // 0x7FFF) {
-    buffer.push_back(0);
-    lastRLW = buffer.size() - 1;
-    RunningLengthWord<uword> lastRunningLengthWord2(buffer[lastRLW]);
-    lastRunningLengthWord2.setNumberOfLiteralWords(1);
-    buffer.push_back(newdata);
-    return 2;
-  }
-  lastRunningLengthWord.setNumberOfLiteralWords(
-      static_cast<uword>(numbersofar + 1));
-  buffer.push_back(newdata);
-  return 1;
-}
-
-template <class uword>
-size_t EWAHBoolArray<uword>::padWithZeroes(const size_t totalbits) {
-  size_t wordsadded = 0;
-  if (totalbits <= sizeinbits)
-    return wordsadded;
-
-  size_t missingbits = totalbits - sizeinbits;
-
-  RunningLengthWord<uword> rlw(buffer[lastRLW]);
-  if (rlw.getNumberOfLiteralWords() > 0) {
-    // Consume trailing zeroes of trailing literal word (past sizeinbits)
-    size_t remain = sizeinbits % wordinbits;
-    if (remain > 0) // Is last word partial?
-    {
-      size_t avail = wordinbits - remain;
-      if (avail > 0) {
-        if (missingbits > avail) {
-          missingbits -= avail;
-        } else {
-          missingbits = 0;
-        }
-        sizeinbits += avail;
-      }
-    }
-  }
-
-  if (missingbits > 0) {
-    size_t wordstoadd = missingbits / wordinbits;
-    if ((missingbits % wordinbits) != 0)
-      ++wordstoadd;
-
-    wordsadded = addStreamOfEmptyWords(false, wordstoadd);
-  }
-  sizeinbits = totalbits;
-  return wordsadded;
-}
-
-/**
- * This is a low-level iterator.
- */
-
-template <class uword = uint32_t> class EWAHBoolArrayRawIterator {
-public:
-  EWAHBoolArrayRawIterator(const EWAHBoolArray<uword> &p)
-      : pointer(0), myparent(&p.getBuffer()), rlw((*myparent)[pointer], this) {}
-  EWAHBoolArrayRawIterator(const EWAHBoolArrayRawIterator &o)
-      : pointer(o.pointer), myparent(o.myparent), rlw(o.rlw) {}
-
-  bool hasNext() const { return pointer < myparent->size(); }
-
-  BufferedRunningLengthWord<uword> &next() {
-    rlw.read((*myparent)[pointer]);
-    pointer = static_cast<size_t>(pointer + rlw.getNumberOfLiteralWords() + 1);
-    return rlw;
-  }
-
-  const uword *dirtyWords() const {
-    return myparent->data() +
-           static_cast<size_t>(pointer - rlw.getNumberOfLiteralWords());
-  }
-
-  EWAHBoolArrayRawIterator &operator=(const EWAHBoolArrayRawIterator &other) {
-    pointer = other.pointer;
-    myparent = other.myparent;
-    rlw = other.rlw;
-    return *this;
-  }
-
-  size_t pointer;
-  const std::vector<uword> *myparent;
-  BufferedRunningLengthWord<uword> rlw;
-
-  EWAHBoolArrayRawIterator();
-};
-
-template <class uword>
-EWAHBoolArrayIterator<uword> EWAHBoolArray<uword>::uncompress() const {
-  return EWAHBoolArrayIterator<uword>(buffer);
-}
-
-template <class uword>
-EWAHBoolArrayRawIterator<uword> EWAHBoolArray<uword>::raw_iterator() const {
-  return EWAHBoolArrayRawIterator<uword>(*this);
-}
-
-template <class uword>
-bool EWAHBoolArray<uword>::operator==(const EWAHBoolArray &x) const {
-  EWAHBoolArrayRawIterator<uword> i = x.raw_iterator();
-  EWAHBoolArrayRawIterator<uword> j = raw_iterator();
-  if (!(i.hasNext() and j.hasNext())) { // hopefully this never happens...
-    return (i.hasNext() == false) && (j.hasNext() == false);
-  }
-  // at this point, this should be safe:
-  BufferedRunningLengthWord<uword> &rlwi = i.next();
-  BufferedRunningLengthWord<uword> &rlwj = j.next();
-
-  while ((rlwi.size() > 0) && (rlwj.size() > 0)) {
-    while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) {
-      const bool i_is_prey = rlwi.getRunningLength() < rlwj.getRunningLength();
-      BufferedRunningLengthWord<uword> &prey = i_is_prey ? rlwi : rlwj;
-      BufferedRunningLengthWord<uword> &predator = i_is_prey ? rlwj : rlwi;
-      size_t index = 0;
-      const bool nonzero =
-          ((!predator.getRunningBit())
-               ? prey.nonzero_discharge(predator.getRunningLength(), index)
-               : prey.nonzero_dischargeNegated(predator.getRunningLength(),
-                                               index));
-      if (nonzero) {
-        return false;
-      }
-      if (predator.getRunningLength() - index > 0) {
-        if (predator.getRunningBit()) {
-          return false;
-        }
-      }
-      predator.discardRunningWordsWithReload();
-    }
-    const uword nbre_literal = std::min(rlwi.getNumberOfLiteralWords(),
-                                         rlwj.getNumberOfLiteralWords());
-    if (nbre_literal > 0) {
-      for (size_t k = 0; k < nbre_literal; ++k)
-        if ((rlwi.getLiteralWordAt(k) ^ rlwj.getLiteralWordAt(k)) != 0)
-          return false;
-      rlwi.discardLiteralWordsWithReload(nbre_literal);
-      rlwj.discardLiteralWordsWithReload(nbre_literal);
-    }
-  }
-  const bool i_remains = rlwi.size() > 0;
-  BufferedRunningLengthWord<uword> &remaining = i_remains ? rlwi : rlwj;
-  return !remaining.nonzero_discharge();
-}
-
-template <class uword> void EWAHBoolArray<uword>::swap(EWAHBoolArray &x) {
-  buffer.swap(x.buffer);
-  size_t tmp = x.sizeinbits;
-  x.sizeinbits = sizeinbits;
-  sizeinbits = tmp;
-  tmp = x.lastRLW;
-  x.lastRLW = lastRLW;
-  lastRLW = tmp;
-}
-
-template <class uword>
-void EWAHBoolArray<uword>::append(const EWAHBoolArray &x) {
-  if (sizeinbits % wordinbits == 0) {
-    // hoping for the best?
-    sizeinbits += x.sizeinbits;
-    ConstRunningLengthWord<uword> lRLW(buffer[lastRLW]);
-    if ((lRLW.getRunningLength() == 0) &&
-        (lRLW.getNumberOfLiteralWords() == 0)) {
-      // it could be that the running length word is empty, in such a case,
-      // we want to get rid of it!
-      lastRLW = x.lastRLW + buffer.size() - 1;
-      buffer.resize(buffer.size() - 1);
-      buffer.insert(buffer.end(), x.buffer.begin(), x.buffer.end());
-    } else {
-      lastRLW = x.lastRLW + buffer.size();
-      buffer.insert(buffer.end(), x.buffer.begin(), x.buffer.end());
-    }
-  } else {
-    std::stringstream ss;
-    ss << "This should really not happen! You are trying to append to a bitmap "
-          "having a fractional number of words, that is,  "
-       << static_cast<int>(sizeinbits) << " bits with a word size in bits of "
-       << static_cast<int>(wordinbits) << ". ";
-    ss << "Size of the bitmap being appended: " << x.sizeinbits << " bits."
-       << std::endl;
-    throw std::invalid_argument(ss.str());
-  }
-}
-
-template <class uword>
-EWAHBoolArrayIterator<uword>::EWAHBoolArrayIterator(
-    const std::vector<uword> &parent)
-    : pointer(0), myparent(parent), compressedwords(0), literalwords(0), rl(0),
-      lw(0), b(0) {
-  if (pointer < myparent.size())
-    readNewRunningLengthWord();
-}
-
-template <class uword>
-void EWAHBoolArrayIterator<uword>::readNewRunningLengthWord() {
-  literalwords = 0;
-  compressedwords = 0;
-  ConstRunningLengthWord<uword> rlw(myparent[pointer]);
-  rl = rlw.getRunningLength();
-  lw = rlw.getNumberOfLiteralWords();
-  b = rlw.getRunningBit();
-  if ((rl == 0) && (lw == 0)) {
-    if (pointer < myparent.size() - 1) {
-      ++pointer;
-      readNewRunningLengthWord();
-    } else {
-      pointer = myparent.size();
-    }
-  }
-}
-
-template <class uword>
-BoolArray<uword> EWAHBoolArray<uword>::toBoolArray() const {
-  BoolArray<uword> ans(sizeinbits);
-  EWAHBoolArrayIterator<uword> i = uncompress();
-  size_t counter = 0;
-  while (i.hasNext()) {
-    ans.setWord(counter++, i.next());
-  }
-  return ans;
-}
-
-template <class uword>
-template <class container>
-void EWAHBoolArray<uword>::appendSetBits(container &out,
-                                         const size_t offset) const {
-  size_t pointer(0);
-  size_t currentoffset(offset);
-  if (RESERVEMEMORY)
-    out.reserve(buffer.size() + 64); // trading memory for speed.
-  const size_t buffersize = buffer.size();
-  while (pointer < buffersize) {
-    ConstRunningLengthWord<uword> rlw(buffer[pointer]);
-    const size_t productofrl =
-        static_cast<size_t>(rlw.getRunningLength() * wordinbits);
-    if (rlw.getRunningBit()) {
-      const size_t upper_limit = currentoffset + productofrl;
-      for (; currentoffset < upper_limit; ++currentoffset) {
-        out.push_back(currentoffset);
-      }
-    } else {
-      currentoffset += productofrl;
-    }
-    ++pointer;
-    const size_t rlwlw = rlw.getNumberOfLiteralWords();
-    for (uword k = 0; k < rlwlw; ++k) {
-      uword currentword = buffer[pointer];
-      while (currentword != 0) {
-        uword t = static_cast<uword>(currentword & (~currentword+1));
-        uint32_t r = numberOfTrailingZeros(t);
-        out.push_back(currentoffset + r);
-        currentword ^= t;
-      }
-      currentoffset += wordinbits;
-      ++pointer;
-    }
-  }
-}
-
-template <class uword>
-bool EWAHBoolArray<uword>::operator!=(const EWAHBoolArray<uword> &x) const {
-  return !(*this == x);
-}
-
-template <class uword>
-bool EWAHBoolArray<uword>::operator==(const BoolArray<uword> &x) const {
-  // could be more efficient
-  return (this->toBoolArray() == x);
-}
-
-template <class uword>
-bool EWAHBoolArray<uword>::operator!=(const BoolArray<uword> &x) const {
-  // could be more efficient
-  return (this->toBoolArray() != x);
-}
-
-template <class uword>
-size_t EWAHBoolArray<uword>::addStreamOfEmptyWords(const bool v,
-                                                   size_t number) {
-  if (number == 0)
-    return 0;
-  sizeinbits += number * wordinbits;
-  size_t wordsadded = 0;
-  if ((RunningLengthWord<uword>::getRunningBit(buffer[lastRLW]) != v) &&
-      (RunningLengthWord<uword>::size(buffer[lastRLW]) == 0)) {
-    RunningLengthWord<uword>::setRunningBit(buffer[lastRLW], v);
-  } else if ((RunningLengthWord<uword>::getNumberOfLiteralWords(
-                  buffer[lastRLW]) != 0) ||
-             (RunningLengthWord<uword>::getRunningBit(buffer[lastRLW]) != v)) {
-    buffer.push_back(0);
-    ++wordsadded;
-    lastRLW = buffer.size() - 1;
-    if (v)
-      RunningLengthWord<uword>::setRunningBit(buffer[lastRLW], v);
-  }
-  const uword runlen =
-      RunningLengthWord<uword>::getRunningLength(buffer[lastRLW]);
-
-  const uword whatwecanadd =
-      number < static_cast<size_t>(
-                   RunningLengthWord<uword>::largestrunninglengthcount - runlen)
-          ? static_cast<uword>(number)
-          : static_cast<uword>(
-                RunningLengthWord<uword>::largestrunninglengthcount - runlen);
-  RunningLengthWord<uword>::setRunningLength(
-      buffer[lastRLW], static_cast<uword>(runlen + whatwecanadd));
-
-  number -= static_cast<size_t>(whatwecanadd);
-  while (number >= RunningLengthWord<uword>::largestrunninglengthcount) {
-    buffer.push_back(0);
-    ++wordsadded;
-    lastRLW = buffer.size() - 1;
-    if (v)
-      RunningLengthWord<uword>::setRunningBit(buffer[lastRLW], v);
-    RunningLengthWord<uword>::setRunningLength(
-        buffer[lastRLW], RunningLengthWord<uword>::largestrunninglengthcount);
-    number -= static_cast<size_t>(
-        RunningLengthWord<uword>::largestrunninglengthcount);
-  }
-  if (number > 0) {
-    buffer.push_back(0);
-    ++wordsadded;
-    lastRLW = buffer.size() - 1;
-    if (v)
-      RunningLengthWord<uword>::setRunningBit(buffer[lastRLW], v);
-    RunningLengthWord<uword>::setRunningLength(buffer[lastRLW],
-                                               static_cast<uword>(number));
-  }
-  return wordsadded;
-}
-
-template <class uword>
-void EWAHBoolArray<uword>::fastaddStreamOfEmptyWords(const bool v,
-                                                     size_t number) {
-  if (number == 0)
-    return;
-  if ((RunningLengthWord<uword>::getRunningBit(buffer[lastRLW]) != v) &&
-      (RunningLengthWord<uword>::size(buffer[lastRLW]) == 0)) {
-    RunningLengthWord<uword>::setRunningBit(buffer[lastRLW], v);
-  } else if ((RunningLengthWord<uword>::getNumberOfLiteralWords(
-                  buffer[lastRLW]) != 0) ||
-             (RunningLengthWord<uword>::getRunningBit(buffer[lastRLW]) != v)) {
-    buffer.push_back(0);
-    lastRLW = buffer.size() - 1;
-    if (v)
-      RunningLengthWord<uword>::setRunningBit(buffer[lastRLW], v);
-  }
-  const uword runlen =
-      RunningLengthWord<uword>::getRunningLength(buffer[lastRLW]);
-
-  const uword whatwecanadd =
-      number < static_cast<size_t>(
-                   RunningLengthWord<uword>::largestrunninglengthcount - runlen)
-          ? static_cast<uword>(number)
-          : static_cast<uword>(
-                RunningLengthWord<uword>::largestrunninglengthcount - runlen);
-  RunningLengthWord<uword>::setRunningLength(
-      buffer[lastRLW], static_cast<uword>(runlen + whatwecanadd));
-
-  number -= static_cast<size_t>(whatwecanadd);
-  while (number >= RunningLengthWord<uword>::largestrunninglengthcount) {
-    buffer.push_back(0);
-    lastRLW = buffer.size() - 1;
-    if (v)
-      RunningLengthWord<uword>::setRunningBit(buffer[lastRLW], v);
-    RunningLengthWord<uword>::setRunningLength(
-        buffer[lastRLW], RunningLengthWord<uword>::largestrunninglengthcount);
-    number -= static_cast<size_t>(
-        RunningLengthWord<uword>::largestrunninglengthcount);
-  }
-  if (number > 0) {
-    buffer.push_back(0);
-    lastRLW = buffer.size() - 1;
-    if (v)
-      RunningLengthWord<uword>::setRunningBit(buffer[lastRLW], v);
-    RunningLengthWord<uword>::setRunningLength(buffer[lastRLW],
-                                               static_cast<uword>(number));
-  }
-}
-
-template <class uword>
-size_t EWAHBoolArray<uword>::addStreamOfDirtyWords(const uword *v,
-                                                   const size_t number) {
-  if (number == 0)
-    return 0;
-  uword rlw = buffer[lastRLW];
-  size_t NumberOfLiteralWords =
-      RunningLengthWord<uword>::getNumberOfLiteralWords(rlw);
-  if (NumberOfLiteralWords + number <=
-      RunningLengthWord<uword>::largestliteralcount) {
-    RunningLengthWord<uword>::setNumberOfLiteralWords(
-        rlw, static_cast<uword>(NumberOfLiteralWords + number));
-    buffer[lastRLW] = rlw;
-    sizeinbits += number * wordinbits;
-    buffer.insert(buffer.end(), v, v + number);
-    return number;
-  }
-  // we proceed the long way
-  size_t howmanywecanadd =
-      RunningLengthWord<uword>::largestliteralcount - NumberOfLiteralWords;
-  RunningLengthWord<uword>::setNumberOfLiteralWords(
-      rlw, RunningLengthWord<uword>::largestliteralcount);
-  buffer[lastRLW] = rlw;
-  buffer.insert(buffer.end(), v, v + howmanywecanadd);
-  size_t wordadded = howmanywecanadd;
-  sizeinbits += howmanywecanadd * wordinbits;
-  buffer.push_back(0);
-  lastRLW = buffer.size() - 1;
-  ++wordadded;
-  wordadded +=
-      addStreamOfDirtyWords(v + howmanywecanadd, number - howmanywecanadd);
-  return wordadded;
-}
-
-template <class uword>
-void EWAHBoolArray<uword>::fastaddStreamOfDirtyWords(const uword *v,
-                                                     const size_t number) {
-  if (number == 0)
-    return;
-  uword rlw = buffer[lastRLW];
-  size_t NumberOfLiteralWords =
-      RunningLengthWord<uword>::getNumberOfLiteralWords(rlw);
-  if (NumberOfLiteralWords + number <=
-      RunningLengthWord<uword>::largestliteralcount) {
-    RunningLengthWord<uword>::setNumberOfLiteralWords(
-        rlw, static_cast<uword>(NumberOfLiteralWords + number));
-    buffer[lastRLW] = rlw;
-    for (size_t i = 0; i < number; ++i)
-      buffer.push_back(v[i]);
-    // buffer.insert(buffer.end(), v, v+number); // seems slower than push_back?
-    return;
-  }
-  // we proceed the long way
-  size_t howmanywecanadd =
-      RunningLengthWord<uword>::largestliteralcount - NumberOfLiteralWords;
-  RunningLengthWord<uword>::setNumberOfLiteralWords(
-      rlw, RunningLengthWord<uword>::largestliteralcount);
-  buffer[lastRLW] = rlw;
-  for (size_t i = 0; i < howmanywecanadd; ++i)
-    buffer.push_back(v[i]);
-  // buffer.insert(buffer.end(), v, v+howmanywecanadd);// seems slower than
-  // push_back?
-  buffer.push_back(0);
-  lastRLW = buffer.size() - 1;
-  fastaddStreamOfDirtyWords(v + howmanywecanadd, number - howmanywecanadd);
-}
-
-template <class uword>
-size_t EWAHBoolArray<uword>::addStreamOfNegatedDirtyWords(const uword *v,
-                                                          const size_t number) {
-  if (number == 0)
-    return 0;
-  uword rlw = buffer[lastRLW];
-  size_t NumberOfLiteralWords =
-      RunningLengthWord<uword>::getNumberOfLiteralWords(rlw);
-  if (NumberOfLiteralWords + number <=
-      RunningLengthWord<uword>::largestliteralcount) {
-    RunningLengthWord<uword>::setNumberOfLiteralWords(
-        rlw, static_cast<uword>(NumberOfLiteralWords + number));
-    buffer[lastRLW] = rlw;
-    sizeinbits += number * wordinbits;
-    for (size_t k = 0; k < number; ++k)
-      buffer.push_back(~v[k]);
-    return number;
-  }
-  // we proceed the long way
-  size_t howmanywecanadd =
-      RunningLengthWord<uword>::largestliteralcount - NumberOfLiteralWords;
-  RunningLengthWord<uword>::setNumberOfLiteralWords(
-      rlw, RunningLengthWord<uword>::largestliteralcount);
-  buffer[lastRLW] = rlw;
-  for (size_t k = 0; k < howmanywecanadd; ++k)
-    buffer.push_back(~v[k]);
-  size_t wordadded = howmanywecanadd;
-  sizeinbits += howmanywecanadd * wordinbits;
-  buffer.push_back(0);
-  lastRLW = buffer.size() - 1;
-  ++wordadded;
-  wordadded +=
-      addStreamOfDirtyWords(v + howmanywecanadd, number - howmanywecanadd);
-  return wordadded;
-}
-
-template <class uword> size_t EWAHBoolArray<uword>::addEmptyWord(const bool v) {
-  RunningLengthWord<uword> lastRunningLengthWord(buffer[lastRLW]);
-  const bool noliteralword =
-      (lastRunningLengthWord.getNumberOfLiteralWords() == 0);
-  // first, if the last running length word is empty, we align it
-  // this
-  uword runlen = lastRunningLengthWord.getRunningLength();
-  if ((noliteralword) && (runlen == 0)) {
-    lastRunningLengthWord.setRunningBit(v);
-  }
-  if ((noliteralword) && (lastRunningLengthWord.getRunningBit() == v) &&
-      (runlen < RunningLengthWord<uword>::largestrunninglengthcount)) {
-    lastRunningLengthWord.setRunningLength(static_cast<uword>(runlen + 1));
-    return 0;
-  } else {
-    // we have to start anew
-    buffer.push_back(0);
-    lastRLW = buffer.size() - 1;
-    RunningLengthWord<uword> lastRunningLengthWord2(buffer[lastRLW]);
-    lastRunningLengthWord2.setRunningBit(v);
-    lastRunningLengthWord2.setRunningLength(1);
-    return 1;
-  }
-}
-
-template <class uword>
-void fast_logicalor_tocontainer(size_t n, const EWAHBoolArray<uword> **inputs,
-                                EWAHBoolArray<uword> &container) {
-  class EWAHBoolArrayPtr {
-
-  public:
-    EWAHBoolArrayPtr(const EWAHBoolArray<uword> *p, bool o) : ptr(p), own(o) {}
-    const EWAHBoolArray<uword> *ptr;
-    bool own; // whether to clean
-
-    bool operator<(const EWAHBoolArrayPtr &o) const {
-      return o.ptr->sizeInBytes() < ptr->sizeInBytes(); // backward on purpose
-    }
-  };
-
-  if (n == 0) {
-    container.reset();
-    return;
-  }
-  if (n == 1) {
-    container = *inputs[0];
-    return;
-  }
-  std::priority_queue<EWAHBoolArrayPtr> pq;
-  for (size_t i = 0; i < n; i++) {
-    // could use emplace
-    pq.push(EWAHBoolArrayPtr(inputs[i], false));
-  }
-  while (pq.size() > 2) {
-
-    EWAHBoolArrayPtr x1 = pq.top();
-    pq.pop();
-
-    EWAHBoolArrayPtr x2 = pq.top();
-    pq.pop();
-
-    EWAHBoolArray<uword> *buffer = new EWAHBoolArray<uword>();
-    x1.ptr->logicalor(*x2.ptr, *buffer);
-
-    if (x1.own) {
-      delete x1.ptr;
-    }
-    if (x2.own) {
-      delete x2.ptr;
-    }
-    pq.push(EWAHBoolArrayPtr(buffer, true));
-  }
-  EWAHBoolArrayPtr x1 = pq.top();
-  pq.pop();
-
-  EWAHBoolArrayPtr x2 = pq.top();
-  pq.pop();
-
-  x1.ptr->logicalor(*x2.ptr, container);
-
-  if (x1.own) {
-    delete x1.ptr;
-  }
-  if (x2.own) {
-    delete x2.ptr;
-  }
-}
-
-template <class uword>
-void EWAHBoolArray<uword>::logicalor(const EWAHBoolArray &a,
-                                     EWAHBoolArray &container) const {
-  container.reset();
-  if (RESERVEMEMORY)
-    container.buffer.reserve(buffer.size() + a.buffer.size());
-  EWAHBoolArrayRawIterator<uword> i = a.raw_iterator();
-  EWAHBoolArrayRawIterator<uword> j = raw_iterator();
-  if (!(i.hasNext() and j.hasNext())) { // hopefully this never happens...
-    container.setSizeInBits(sizeInBits());
-    return;
-  }
-  // at this point, this should be safe:
-  BufferedRunningLengthWord<uword> &rlwi = i.next();
-  BufferedRunningLengthWord<uword> &rlwj = j.next();
-
-  while ((rlwi.size() > 0) && (rlwj.size() > 0)) {
-    while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) {
-      const bool i_is_prey = rlwi.getRunningLength() < rlwj.getRunningLength();
-      BufferedRunningLengthWord<uword> &prey = i_is_prey ? rlwi : rlwj;
-      BufferedRunningLengthWord<uword> &predator = i_is_prey ? rlwj : rlwi;
-      if (predator.getRunningBit()) {
-        container.fastaddStreamOfEmptyWords(true, predator.getRunningLength());
-        prey.discardFirstWordsWithReload(predator.getRunningLength());
-      } else {
-        const size_t index =
-            prey.discharge(container, predator.getRunningLength());
-        container.fastaddStreamOfEmptyWords(false, predator.getRunningLength() -
-                                                       index);
-      }
-      predator.discardRunningWordsWithReload();
-    }
-
-    const uword nbre_literal = std::min(rlwi.getNumberOfLiteralWords(),
-                                         rlwj.getNumberOfLiteralWords());
-    if (nbre_literal > 0) {
-      for (size_t k = 0; k < nbre_literal; ++k) {
-        container.addWord(rlwi.getLiteralWordAt(k) | rlwj.getLiteralWordAt(k));
-      }
-      rlwi.discardLiteralWordsWithReload(nbre_literal);
-      rlwj.discardLiteralWordsWithReload(nbre_literal);
-    }
-  }
-  const bool i_remains = rlwi.size() > 0;
-  BufferedRunningLengthWord<uword> &remaining = i_remains ? rlwi : rlwj;
-  remaining.discharge(container);
-  container.setSizeInBits(sizeInBits() > a.sizeInBits() ? sizeInBits()
-                                                        : a.sizeInBits());
-}
-
-template <class uword>
-size_t EWAHBoolArray<uword>::logicalorcount(const EWAHBoolArray &a) const {
-  size_t answer = 0;
-  EWAHBoolArrayRawIterator<uword> i = a.raw_iterator();
-  EWAHBoolArrayRawIterator<uword> j = raw_iterator();
-  if (!(i.hasNext() and j.hasNext())) { // hopefully this never happens...
-    return 0;
-  }
-  // at this point, this should be safe:
-  BufferedRunningLengthWord<uword> &rlwi = i.next();
-  BufferedRunningLengthWord<uword> &rlwj = j.next();
-
-  while ((rlwi.size() > 0) && (rlwj.size() > 0)) {
-    while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) {
-      const bool i_is_prey = rlwi.getRunningLength() < rlwj.getRunningLength();
-      BufferedRunningLengthWord<uword> &prey = i_is_prey ? rlwi : rlwj;
-      BufferedRunningLengthWord<uword> &predator = i_is_prey ? rlwj : rlwi;
-      if (predator.getRunningBit()) {
-        answer += predator.getRunningLength() * wordinbits;
-        prey.discardFirstWordsWithReload(predator.getRunningLength());
-
-      } else {
-        // const size_t index =
-        prey.dischargeCount(predator.getRunningLength(), &answer);
-      }
-      predator.discardRunningWordsWithReload();
-    }
-
-    const uword nbre_literal = std::min(rlwi.getNumberOfLiteralWords(),
-                                         rlwj.getNumberOfLiteralWords());
-    if (nbre_literal > 0) {
-      for (size_t k = 0; k < nbre_literal; ++k) {
-        answer += countOnes(
-            (uword)(rlwi.getLiteralWordAt(k) | rlwj.getLiteralWordAt(k)));
-      }
-      rlwi.discardLiteralWordsWithReload(nbre_literal);
-      rlwj.discardLiteralWordsWithReload(nbre_literal);
-    }
-  }
-  const bool i_remains = rlwi.size() > 0;
-  BufferedRunningLengthWord<uword> &remaining = i_remains ? rlwi : rlwj;
-  answer += remaining.dischargeCount();
-  return answer;
-}
-
-template <class uword>
-void EWAHBoolArray<uword>::logicalxor(const EWAHBoolArray &a,
-                                      EWAHBoolArray &container) const {
-  container.reset();
-  if (RESERVEMEMORY)
-    container.buffer.reserve(buffer.size() + a.buffer.size());
-  EWAHBoolArrayRawIterator<uword> i = a.raw_iterator();
-  EWAHBoolArrayRawIterator<uword> j = raw_iterator();
-  if (!(i.hasNext() and j.hasNext())) { // hopefully this never happens...
-    container.setSizeInBits(sizeInBits());
-    return;
-  }
-  // at this point, this should be safe:
-  BufferedRunningLengthWord<uword> &rlwi = i.next();
-  BufferedRunningLengthWord<uword> &rlwj = j.next();
-  while ((rlwi.size() > 0) && (rlwj.size() > 0)) {
-    while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) {
-      const bool i_is_prey = rlwi.getRunningLength() < rlwj.getRunningLength();
-      BufferedRunningLengthWord<uword> &prey = i_is_prey ? rlwi : rlwj;
-      BufferedRunningLengthWord<uword> &predator = i_is_prey ? rlwj : rlwi;
-      const size_t index =
-          (!predator.getRunningBit())
-              ? prey.discharge(container, predator.getRunningLength())
-              : prey.dischargeNegated(container, predator.getRunningLength());
-      container.fastaddStreamOfEmptyWords(predator.getRunningBit(),
-                                          predator.getRunningLength() - index);
-      predator.discardRunningWordsWithReload();
-    }
-    const uword nbre_literal = std::min(rlwi.getNumberOfLiteralWords(),
-                                         rlwj.getNumberOfLiteralWords());
-    if (nbre_literal > 0) {
-      for (size_t k = 0; k < nbre_literal; ++k)
-        container.addWord(rlwi.getLiteralWordAt(k) ^ rlwj.getLiteralWordAt(k));
-      rlwi.discardLiteralWordsWithReload(nbre_literal);
-      rlwj.discardLiteralWordsWithReload(nbre_literal);
-    }
-  }
-  const bool i_remains = rlwi.size() > 0;
-  BufferedRunningLengthWord<uword> &remaining = i_remains ? rlwi : rlwj;
-  remaining.discharge(container);
-  container.setSizeInBits(sizeInBits() > a.sizeInBits() ? sizeInBits()
-                                                        : a.sizeInBits());
-}
-
-template <class uword>
-size_t EWAHBoolArray<uword>::logicalxorcount(const EWAHBoolArray &a) const {
-  EWAHBoolArrayRawIterator<uword> i = a.raw_iterator();
-  EWAHBoolArrayRawIterator<uword> j = raw_iterator();
-  if (!i.hasNext())
-    return a.numberOfOnes();
-  if (!j.hasNext())
-    return this->numberOfOnes();
-
-  size_t answer = 0;
-
-  // at this point, this should be safe:
-  BufferedRunningLengthWord<uword> &rlwi = i.next();
-  BufferedRunningLengthWord<uword> &rlwj = j.next();
-  while ((rlwi.size() > 0) && (rlwj.size() > 0)) {
-    while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) {
-      const bool i_is_prey = rlwi.getRunningLength() < rlwj.getRunningLength();
-      BufferedRunningLengthWord<uword> &prey = i_is_prey ? rlwi : rlwj;
-      BufferedRunningLengthWord<uword> &predator = i_is_prey ? rlwj : rlwi;
-      size_t index;
-
-      if (predator.getRunningBit()) {
-        index =
-            prey.dischargeCountNegated(predator.getRunningLength(), &answer);
-      } else {
-        index = prey.dischargeCount(predator.getRunningLength(), &answer);
-      }
-      if (predator.getRunningBit())
-        answer += (predator.getRunningLength() - index) * wordinbits;
-
-      predator.discardRunningWordsWithReload();
-    }
-    const uword nbre_literal = std::min(rlwi.getNumberOfLiteralWords(),
-                                         rlwj.getNumberOfLiteralWords());
-    if (nbre_literal > 0) {
-      for (size_t k = 0; k < nbre_literal; ++k) {
-        answer += countOnes(
-            (uword)(rlwi.getLiteralWordAt(k) ^ rlwj.getLiteralWordAt(k)));
-      }
-      rlwi.discardLiteralWordsWithReload(nbre_literal);
-      rlwj.discardLiteralWordsWithReload(nbre_literal);
-    }
-  }
-  const bool i_remains = rlwi.size() > 0;
-  BufferedRunningLengthWord<uword> &remaining = i_remains ? rlwi : rlwj;
-  answer += remaining.dischargeCount();
-  return answer;
-}
-
-template <class uword>
-void EWAHBoolArray<uword>::logicaland(const EWAHBoolArray &a,
-                                      EWAHBoolArray &container) const {
-  container.reset();
-  if (RESERVEMEMORY)
-    container.buffer.reserve(buffer.size() > a.buffer.size() ? buffer.size()
-                                                             : a.buffer.size());
-  EWAHBoolArrayRawIterator<uword> i = a.raw_iterator();
-  EWAHBoolArrayRawIterator<uword> j = raw_iterator();
-  if (!(i.hasNext() and j.hasNext())) { // hopefully this never happens...
-    container.setSizeInBits(sizeInBits());
-    return;
-  }
-  // at this point, this should be safe:
-  BufferedRunningLengthWord<uword> &rlwi = i.next();
-  BufferedRunningLengthWord<uword> &rlwj = j.next();
-
-  while ((rlwi.size() > 0) && (rlwj.size() > 0)) {
-    while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) {
-      const bool i_is_prey = rlwi.getRunningLength() < rlwj.getRunningLength();
-      BufferedRunningLengthWord<uword> &prey(i_is_prey ? rlwi : rlwj);
-      BufferedRunningLengthWord<uword> &predator(i_is_prey ? rlwj : rlwi);
-      if (!predator.getRunningBit()) {
-        container.fastaddStreamOfEmptyWords(false, predator.getRunningLength());
-        prey.discardFirstWordsWithReload(predator.getRunningLength());
-      } else {
-        const size_t index =
-            prey.discharge(container, predator.getRunningLength());
-        container.fastaddStreamOfEmptyWords(false, predator.getRunningLength() -
-                                                       index);
-      }
-      predator.discardRunningWordsWithReload();
-    }
-    const uword nbre_literal = std::min(rlwi.getNumberOfLiteralWords(),
-                                         rlwj.getNumberOfLiteralWords());
-    if (nbre_literal > 0) {
-      for (size_t k = 0; k < nbre_literal; ++k) {
-        container.addWord(rlwi.getLiteralWordAt(k) & rlwj.getLiteralWordAt(k));
-      }
-      rlwi.discardLiteralWordsWithReload(nbre_literal);
-      rlwj.discardLiteralWordsWithReload(nbre_literal);
-    }
-  }
-  BufferedRunningLengthWord<uword> &remain = rlwj.size() > 0 ? rlwj : rlwi;
-  while(remain.size() > 0) {
-    container.addStreamOfEmptyWords(false, remain.size());
-    if (!remain.next()) { break; }
-  }
-  container.setSizeInBits(sizeInBits() > a.sizeInBits() ? sizeInBits()
-                                                        : a.sizeInBits());
-  container.assertWordCount("logicaland");
-}
-
-template <class uword>
-void EWAHBoolArray<uword>::logicalandnot(const EWAHBoolArray &a,
-                                         EWAHBoolArray &container) const {
-  container.reset();
-  if (RESERVEMEMORY)
-    container.buffer.reserve(buffer.size() > a.buffer.size() ? buffer.size()
-                                                             : a.buffer.size());
-  EWAHBoolArrayRawIterator<uword> i = raw_iterator();
-  EWAHBoolArrayRawIterator<uword> j = a.raw_iterator();
-  if (!j.hasNext()) {  // the other fellow is empty
-    container = *this; // just copy, stupidly, the data
-    return;
-  }
-  if (!(i.hasNext())) { // hopefully this never happens...
-    container.setSizeInBits(sizeInBits());
-    return;
-  }
-  // at this point, this should be safe:
-  BufferedRunningLengthWord<uword> &rlwi = i.next();
-  BufferedRunningLengthWord<uword> &rlwj = j.next();
-
-  while ((rlwi.size() > 0) && (rlwj.size() > 0)) {
-    while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) {
-      const bool i_is_prey = rlwi.getRunningLength() < rlwj.getRunningLength();
-      BufferedRunningLengthWord<uword> &prey(i_is_prey ? rlwi : rlwj);
-      BufferedRunningLengthWord<uword> &predator(i_is_prey ? rlwj : rlwi);
-      if (((predator.getRunningBit()) && (i_is_prey)) ||
-          ((!predator.getRunningBit()) && (!i_is_prey))) {
-        container.fastaddStreamOfEmptyWords(false, predator.getRunningLength());
-        prey.discardFirstWordsWithReload(predator.getRunningLength());
-      } else if (i_is_prey) {
-        const size_t index =
-            prey.discharge(container, predator.getRunningLength());
-        container.fastaddStreamOfEmptyWords(false, predator.getRunningLength() -
-                                                       index);
-      } else {
-        const size_t index =
-            prey.dischargeNegated(container, predator.getRunningLength());
-        container.fastaddStreamOfEmptyWords(true, predator.getRunningLength() -
-                                                      index);
-      }
-      predator.discardRunningWordsWithReload();
-    }
-    const uword nbre_literal = std::min(rlwi.getNumberOfLiteralWords(),
-                                         rlwj.getNumberOfLiteralWords());
-    if (nbre_literal > 0) {
-      for (size_t k = 0; k < nbre_literal; ++k) {
-        container.addWord(static_cast<uword>(rlwi.getLiteralWordAt(k) & ~rlwj.getLiteralWordAt(k)));
-      }
-      rlwi.discardLiteralWordsWithReload(nbre_literal);
-      rlwj.discardLiteralWordsWithReload(nbre_literal);
-    }
-  }
-  if(rlwi.size() > 0) {
-    rlwi.discharge(container);
-    container.setSizeInBits(sizeInBits());
-  } else {
-    while(rlwj.size() > 0) {
-      container.addStreamOfEmptyWords(false, rlwj.size());
-      if (!rlwj.next()) { break; }
-    }
-    container.setSizeInBits(a.sizeInBits());
-  }
-  container.assertWordCount("logicalandnot");
-}
-
-template <class uword>
-size_t EWAHBoolArray<uword>::logicalandnotcount(const EWAHBoolArray &a) const {
-  EWAHBoolArrayRawIterator<uword> i = raw_iterator();
-  EWAHBoolArrayRawIterator<uword> j = a.raw_iterator();
-  if (!j.hasNext()) { // the other fellow is empty
-    return this->numberOfOnes();
-  }
-  if (!(i.hasNext())) { // hopefully this never happens...
-    return 0;
-  }
-  size_t answer = 0;
-  // at this point, this should be safe:
-  BufferedRunningLengthWord<uword> &rlwi = i.next();
-  BufferedRunningLengthWord<uword> &rlwj = j.next();
-
-  while ((rlwi.size() > 0) && (rlwj.size() > 0)) {
-    while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) {
-      const bool i_is_prey = rlwi.getRunningLength() < rlwj.getRunningLength();
-      BufferedRunningLengthWord<uword> &prey(i_is_prey ? rlwi : rlwj);
-      BufferedRunningLengthWord<uword> &predator(i_is_prey ? rlwj : rlwi);
-      if (((predator.getRunningBit()) && (i_is_prey)) ||
-          ((!predator.getRunningBit()) && (!i_is_prey))) {
-        prey.discardFirstWordsWithReload(predator.getRunningLength());
-      } else if (i_is_prey) {
-        prey.dischargeCount(predator.getRunningLength(), &answer);
-      } else {
-        const size_t index =
-            prey.dischargeCountNegated(predator.getRunningLength(), &answer);
-        answer += (predator.getRunningLength() - index) * wordinbits;
-      }
-      predator.discardRunningWordsWithReload();
-    }
-    const uword nbre_literal = std::min(rlwi.getNumberOfLiteralWords(),
-                                         rlwj.getNumberOfLiteralWords());
-    if (nbre_literal > 0) {
-      for (size_t k = 0; k < nbre_literal; ++k) {
-        answer += countOnes(
-            (uword)(rlwi.getLiteralWordAt(k) & (~rlwj.getLiteralWordAt(k))));
-      }
-      rlwi.discardLiteralWordsWithReload(nbre_literal);
-      rlwj.discardLiteralWordsWithReload(nbre_literal);
-    }
-  }
-  const bool i_remains = rlwi.size() > 0;
-  if (i_remains) {
-    answer += rlwi.dischargeCount();
-  }
-  return answer;
-}
-
-template <class uword>
-size_t EWAHBoolArray<uword>::logicalandcount(const EWAHBoolArray &a) const {
-  EWAHBoolArrayRawIterator<uword> i = a.raw_iterator();
-  EWAHBoolArrayRawIterator<uword> j = raw_iterator();
-  if (!(i.hasNext() and j.hasNext())) { // hopefully this never happens...
-    return 0;
-  }
-  size_t answer = 0;
-  // at this point, this should be safe:
-  BufferedRunningLengthWord<uword> &rlwi = i.next();
-  BufferedRunningLengthWord<uword> &rlwj = j.next();
-
-  while ((rlwi.size() > 0) && (rlwj.size() > 0)) {
-    while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) {
-      const bool i_is_prey = rlwi.getRunningLength() < rlwj.getRunningLength();
-      BufferedRunningLengthWord<uword> &prey(i_is_prey ? rlwi : rlwj);
-      BufferedRunningLengthWord<uword> &predator(i_is_prey ? rlwj : rlwi);
-      if (!predator.getRunningBit()) {
-        prey.discardFirstWordsWithReload(predator.getRunningLength());
-      } else {
-        // const size_t index =
-        prey.dischargeCount(predator.getRunningLength(), &answer);
-      }
-      predator.discardRunningWordsWithReload();
-    }
-    const uword nbre_literal = std::min(rlwi.getNumberOfLiteralWords(),
-                                         rlwj.getNumberOfLiteralWords());
-    if (nbre_literal > 0) {
-      for (size_t k = 0; k < nbre_literal; ++k) {
-        answer += countOnes(
-            (uword)(rlwi.getLiteralWordAt(k) & rlwj.getLiteralWordAt(k)));
-      }
-      rlwi.discardLiteralWordsWithReload(nbre_literal);
-      rlwj.discardLiteralWordsWithReload(nbre_literal);
-    }
-  }
-  return answer;
-}
-
-template <class uword>
-bool EWAHBoolArray<uword>::intersects(const EWAHBoolArray &a) const {
-  EWAHBoolArrayRawIterator<uword> i = a.raw_iterator();
-  EWAHBoolArrayRawIterator<uword> j = raw_iterator();
-  if (!(i.hasNext() and j.hasNext())) { // hopefully this never happens...
-    return false;
-  }
-  // at this point, this should be safe:
-  BufferedRunningLengthWord<uword> &rlwi = i.next();
-  BufferedRunningLengthWord<uword> &rlwj = j.next();
-
-  while ((rlwi.size() > 0) && (rlwj.size() > 0)) {
-    while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) {
-      const bool i_is_prey = rlwi.getRunningLength() < rlwj.getRunningLength();
-      BufferedRunningLengthWord<uword> &prey(i_is_prey ? rlwi : rlwj);
-      BufferedRunningLengthWord<uword> &predator(i_is_prey ? rlwj : rlwi);
-      if (!predator.getRunningBit()) {
-        prey.discardFirstWordsWithReload(predator.getRunningLength());
-      } else {
-        size_t index = 0;
-        bool isnonzero =
-            prey.nonzero_discharge(predator.getRunningLength(), index);
-        if (isnonzero)
-          return true;
-      }
-      predator.discardRunningWordsWithReload();
-    }
-    const uword nbre_literal = std::min(rlwi.getNumberOfLiteralWords(),
-                                         rlwj.getNumberOfLiteralWords());
-    if (nbre_literal > 0) {
-      for (size_t k = 0; k < nbre_literal; ++k) {
-        if ((rlwi.getLiteralWordAt(k) & rlwj.getLiteralWordAt(k)) != 0)
-          return true;
-      }
-      rlwi.discardLiteralWordsWithReload(nbre_literal);
-      rlwj.discardLiteralWordsWithReload(nbre_literal);
-    }
-  }
-  return false;
-}
-
-template <class uword>
-BitmapStatistics EWAHBoolArray<uword>::computeStatistics() const {
-  BitmapStatistics bs;
-  EWAHBoolArrayRawIterator<uword> i = raw_iterator();
-  while (i.hasNext()) {
-    BufferedRunningLengthWord<uword> &brlw(i.next());
-    ++bs.runningwordmarker;
-    bs.totalliteral += brlw.getNumberOfLiteralWords();
-    bs.totalcompressed += brlw.getRunningLength();
-    if (brlw.getRunningLength() ==
-        RunningLengthWord<uword>::largestrunninglengthcount) {
-      ++bs.maximumofrunningcounterreached;
-    }
-  }
-  return bs;
-}
-
-template <class uword> void EWAHBoolArray<uword>::debugprintout() const {
-  std::cout << "==printing out EWAHBoolArray==" << std::endl;
-  std::cout << "Number of compressed words: " << buffer.size() << std::endl;
-  std::cout << "Size in bits: " << sizeinbits << std::endl;
-
-  size_t pointer = 0;
-  while (pointer < buffer.size()) {
-    ConstRunningLengthWord<uword> rlw(buffer[pointer]);
-    bool b = rlw.getRunningBit();
-    const uword rl = rlw.getRunningLength();
-    const uword lw = rlw.getNumberOfLiteralWords();
-    std::cout << "pointer = " << pointer << " running bit=" << b
-              << " running length=" << rl << " lit. words=" << lw << std::endl;
-    for (uword j = 0; j < lw; ++j) {
-      const uword &w = buffer[pointer + j + 1];
-      std::cout << toBinaryString(w) << std::endl;
-    }
-    pointer += lw + 1;
-  }
-  std::cout << "==END==" << std::endl;
-}
-
-template <class uword>
-size_t EWAHBoolArray<uword>::sizeOnDisk(const bool savesizeinbits) const {
-  return (savesizeinbits ? sizeof(uint64_t) : 0) + sizeof(uint64_t) +
-         sizeof(uword) * buffer.size();
-}
-} // namespace ewah
-#endif
diff --git a/yt/utilities/lib/ewahboolarray/ewah.h b/yt/utilities/lib/ewahboolarray/ewah.h
deleted file mode 100644
index d2678a5a668..00000000000
--- a/yt/utilities/lib/ewahboolarray/ewah.h
+++ /dev/null
@@ -1,712 +0,0 @@
-/**
- * This code is released under the
- * Apache License Version 2.0 http://www.apache.org/licenses/.
- *
- * (c) Daniel Lemire, http://lemire.me/en/
- *     with contributions from Zarian Waheed and others.
- */
-
-#ifndef EWAH_H
-#define EWAH_H
-
-#include <algorithm>
-#include <queue>
-#include <vector>
-
-#include "boolarray.h"
-#include "ewahutil.h"
-
-#include "runninglengthword.h"
-
-namespace ewah {
-
-template <class uword> class EWAHBoolArrayIterator;
-
-template <class uword> class EWAHBoolArraySetBitForwardIterator;
-
-class BitmapStatistics;
-
-template <class uword> class EWAHBoolArrayRawIterator;
-
-/**
- * This class is a compressed bitmap.
- * This is where compression
- * happens.
- * The underlying data structure is an STL vector.
- */
-template <class uword = uint32_t> class EWAHBoolArray {
-public:
-  EWAHBoolArray() : buffer(1, 0), sizeinbits(0), lastRLW(0) {}
-
-  static EWAHBoolArray bitmapOf(size_t n, ...) {
-    EWAHBoolArray ans;
-    va_list vl;
-    va_start(vl, n);
-    for (size_t i = 0; i < n; i++) {
-      ans.set(static_cast<size_t>(va_arg(vl, int)));
-    }
-    va_end(vl);
-    return ans;
-  }
-
-  /**
-   * Recover wasted memory usage. Fit buffers to the actual data.
-   */
-  void trim() { buffer.shrink_to_fit(); }
-
-  /**
-   * Query the value of bit i. This runs in time proportional to
-   * the size of the bitmap. This is not meant to be use in
-   * a performance-sensitive context.
-   *
-   *  (This implementation is based on zhenjl's Go version of JavaEWAH.)
-   *
-   */
-  bool get(const size_t pos) const {
-    if (pos >= static_cast<size_t>(sizeinbits))
-      return false;
-    const size_t wordpos = pos / wordinbits;
-    size_t WordChecked = 0;
-    EWAHBoolArrayRawIterator<uword> j = raw_iterator();
-    while (j.hasNext()) {
-      BufferedRunningLengthWord<uword> &rle = j.next();
-      WordChecked += static_cast<size_t>(rle.getRunningLength());
-      if (wordpos < WordChecked)
-        return rle.getRunningBit();
-      if (wordpos < WordChecked + rle.getNumberOfLiteralWords()) {
-        const uword w = j.dirtyWords()[wordpos - WordChecked];
-        return (w & (static_cast<uword>(1) << (pos % wordinbits))) != 0;
-      }
-      WordChecked += static_cast<size_t>(rle.getNumberOfLiteralWords());
-    }
-    return false;
-  }
-
-  /**
-   * Returns true if no bit is set.
-   */
-  bool empty() const {
-    size_t pointer(0);
-    while (pointer < buffer.size()) {
-      ConstRunningLengthWord<uword> rlw(buffer[pointer]);
-      if (rlw.getRunningBit()) {
-        if (rlw.getRunningLength() > 0)
-          return false;
-      }
-      ++pointer;
-      for (size_t k = 0; k < rlw.getNumberOfLiteralWords(); ++k) {
-        if (buffer[pointer] != 0)
-          return false;
-        ++pointer;
-      }
-    }
-    return true;
-  }
-
-  /**
-   * Set the ith bit to true (starting at zero).
-   * Auto-expands the bitmap. It has constant running time complexity.
-   * Note that you must set the bits in increasing order:
-   * set(1), set(2) is ok; set(2), set(1) is not ok.
-   * set(100), set(100) is also not ok.
-   *
-   * Note: by design EWAH is not an updatable data structure in
-   * the sense that once bit 1000 is set, you cannot change the value
-   * of bits 0 to 1000.
-   *
-   * Returns true if the value of the bit was changed, and false otherwise.
-   * (In practice, if you set the bits in strictly increasing order, it
-   * should always return true.)
-   */
-  bool set(size_t i);
-
-  /**
-   * Transform into a string that presents a list of set bits.
-   * The running time is linear in the compressed size of the bitmap.
-   */
-  operator std::string() const {
-    std::stringstream ss;
-    ss << *this;
-    return ss.str();
-  }
-  friend std::ostream &operator<<(std::ostream &out, const EWAHBoolArray &a) {
-
-    out << "{";
-    for (EWAHBoolArray::const_iterator i = a.begin(); i != a.end();) {
-      out << *i;
-      ++i;
-      if (i != a.end())
-        out << ",";
-    }
-    out << "}";
-
-    return out;
-  }
-  /**
-   * Make sure the two bitmaps have the same size (padding with zeroes
-   * if necessary). It has constant running time complexity.
-   *
-   * This is useful when calling "logicalnot" functions.
-   *
-   * This can an adverse effect of performance, especially when computing
-   * intersections.
-   */
-  void makeSameSize(EWAHBoolArray &a) {
-    if (a.sizeinbits < sizeinbits)
-      a.padWithZeroes(sizeinbits);
-    else if (sizeinbits < a.sizeinbits)
-      padWithZeroes(a.sizeinbits);
-  }
-
-  enum { RESERVEMEMORY = true }; // for speed
-
-  typedef EWAHBoolArraySetBitForwardIterator<uword> const_iterator;
-
-  /**
-   * Returns an iterator that can be used to access the position of the
-   * set bits. The running time complexity of a full scan is proportional to the
-   * number
-   * of set bits: be aware that if you have long strings of 1s, this can be
-   * very inefficient.
-   *
-   * It can be much faster to use the toArray method if you want to
-   * retrieve the set bits.
-   */
-  const_iterator begin() const {
-    return EWAHBoolArraySetBitForwardIterator<uword>(&buffer);
-  }
-
-  /**
-   * Basically a bogus iterator that can be used together with begin()
-   * for constructions such as for(EWAHBoolArray<uword>::iterator i = b.begin();
-   * i!=b.end(); ++i) {}
-   */
-  const_iterator &end() const {
-    return EWAHBoolArraySetBitForwardIterator<uword>::end();
-  }
-
-  /**
-   * Retrieve the set bits. Can be much faster than iterating through
-   * the set bits with an iterator.
-   */
-  std::vector<size_t> toArray() const;
-
-  /**
-   * computes the logical and with another compressed bitmap
-   * answer goes into container
-   * Running time complexity is proportional to the sum of the compressed
-   * bitmap sizes.
-   *
-   * The sizeInBits() of the result is equal to the maximum that of the current
-   * bitmap's sizeInBits() and that of a.sizeInBits().
-   */
-  void logicaland(const EWAHBoolArray &a, EWAHBoolArray &container) const;
-
-  /**
-   * computes the logical and with another compressed bitmap
-   * Return the answer
-   * Running time complexity is proportional to the sum of the compressed
-   * bitmap sizes.
-   *
-   * The sizeInBits() of the result is equal to the maximum that of the current
-   * bitmap's sizeInBits() and that of a.sizeInBits().
-   */
-  EWAHBoolArray logicaland(const EWAHBoolArray &a) const {
-    EWAHBoolArray answer;
-    logicaland(a, answer);
-    return answer;
-  }
-
-  /**
-   * calls logicaland
-   */
-  EWAHBoolArray operator&(const EWAHBoolArray &a) const {
-    return logicaland(a);
-  }
-
-  /**
-   * computes the logical and with another compressed bitmap
-   * answer goes into container
-   * Running time complexity is proportional to the sum of the compressed
-   * bitmap sizes.
-   *
-   * The sizeInBits() of the result should be equal to that of the current
-   * bitmap irrespective of a.sizeInBits().
-   *
-   */
-  void logicalandnot(const EWAHBoolArray &a, EWAHBoolArray &container) const;
-
-  /**
-   * calls logicalandnot
-   */
-  EWAHBoolArray operator-(const EWAHBoolArray &a) const {
-    return logicalandnot(a);
-  }
-
-  /**
-   * computes the logical and not with another compressed bitmap
-   * Return the answer
-   * Running time complexity is proportional to the sum of the compressed
-   * bitmap sizes.
-   *
-   * The sizeInBits() of the result should be equal to that of the current
-   * bitmap irrespective of a.sizeInBits().
-   *
-   */
-  EWAHBoolArray logicalandnot(const EWAHBoolArray &a) const {
-    EWAHBoolArray answer;
-    logicalandnot(a, answer);
-    return answer;
-  }
-
-  /**
-   * tests whether the bitmaps "intersect" (have at least one 1-bit at the same
-   * position). This function does not modify the existing bitmaps.
-   * It is faster than calling logicaland.
-   */
-  bool intersects(const EWAHBoolArray &a) const;
-
-  /**
-   * computes the logical or with another compressed bitmap
-   * answer goes into container
-   * Running time complexity is proportional to the sum of the compressed
-   * bitmap sizes.
-   *
-   * If you have many bitmaps, see fast_logicalor_tocontainer.
-   *
-   * The sizeInBits() of the result is equal to the maximum that of the current
-   * bitmap's sizeInBits() and that of a.sizeInBits().
-   */
-  void logicalor(const EWAHBoolArray &a, EWAHBoolArray &container) const;
-
-  /**
-   * computes the size (in number of set bits) of the logical or with another
-   * compressed bitmap
-   * Running time complexity is proportional to the sum of the compressed
-   * bitmap sizes.
-   */
-  size_t logicalorcount(const EWAHBoolArray &a) const;
-
-  /**
-   * computes the size (in number of set bits) of the logical and with another
-   * compressed bitmap
-   * Running time complexity is proportional to the sum of the compressed
-   * bitmap sizes.
-   */
-  size_t logicalandcount(const EWAHBoolArray &a) const;
-
-  /**
-   * computes the size (in number of set bits) of the logical and not with
-   * another compressed bitmap
-   * Running time complexity is proportional to the sum of the compressed
-   * bitmap sizes.
-   */
-  size_t logicalandnotcount(const EWAHBoolArray &a) const;
-
-  /**
-   * computes the size (in number of set bits) of the logical xor with another
-   * compressed bitmap
-   * Running time complexity is proportional to the sum of the compressed
-   * bitmap sizes.
-   */
-  size_t logicalxorcount(const EWAHBoolArray &a) const;
-
-  /**
-   * computes the logical or with another compressed bitmap
-   * Return the answer
-   * Running time complexity is proportional to the sum of the compressed
-   * bitmap sizes.
-   *
-   * If you have many bitmaps, see fast_logicalor.
-   *
-   * The sizeInBits() of the result is equal to the maximum that of the current
-   * bitmap's sizeInBits() and that of a.sizeInBits().
-   */
-  EWAHBoolArray logicalor(const EWAHBoolArray &a) const {
-    EWAHBoolArray answer;
-    logicalor(a, answer);
-    return answer;
-  }
-
-  /**
-   * calls logicalor
-   */
-  EWAHBoolArray operator|(const EWAHBoolArray &a) const { return logicalor(a); }
-
-  /**
-   * computes the logical xor with another compressed bitmap
-   * answer goes into container
-   * Running time complexity is proportional to the sum of the compressed
-   * bitmap sizes.
-   *
-   * The sizeInBits() of the result is equal to the maximum that of the current
-   * bitmap's sizeInBits() and that of a.sizeInBits().
-   */
-  void logicalxor(const EWAHBoolArray &a, EWAHBoolArray &container) const;
-
-  /**
-   * computes the logical xor with another compressed bitmap
-   * Return the answer
-   * Running time complexity is proportional to the sum of the compressed
-   * bitmap sizes.
-   *
-   * The sizeInBits() of the result is equal to the maximum that of the current
-   * bitmap's sizeInBits() and that of a.sizeInBits().
-   */
-  EWAHBoolArray logicalxor(const EWAHBoolArray &a) const {
-    EWAHBoolArray answer;
-    logicalxor(a, answer);
-    return answer;
-  }
-
-  /**
-   * calls logicalxor
-   */
-  EWAHBoolArray operator^(const EWAHBoolArray &a) const {
-    return logicalxor(a);
-  }
-  /**
-   * clear the content of the bitmap. It does not
-   * release the memory.
-   */
-  void reset() {
-    buffer.clear();
-    buffer.push_back(0);
-    sizeinbits = 0;
-    lastRLW = 0;
-  }
-
-  /**
-   * convenience method.
-   *
-   * returns the number of words added (storage cost increase)
-   */
-  inline size_t addWord(const uword newdata,
-                        const uint32_t bitsthatmatter = 8 * sizeof(uword));
-
-  inline void printout(std::ostream &o = std::cout) {
-    toBoolArray().printout(o);
-  }
-
-  /**
-   * Prints a verbose description of the content of the compressed bitmap.
-   */
-  void debugprintout() const;
-
-  /**
-   * Return the size in bits of this bitmap (this refers
-   * to the uncompressed size in bits).
-   *
-   * You can increase it with padWithZeroes()
-   */
-  inline size_t sizeInBits() const { return sizeinbits; }
-
-  /**
-   * Return the size of the buffer in bytes. This
-   * is equivalent to the storage cost, minus some overhead.
-   * See sizeOnDisk to get the actual storage cost with overhead.
-   */
-  inline size_t sizeInBytes() const { return buffer.size() * sizeof(uword); }
-
-  /**
-   * same as addEmptyWord, but you can do several in one shot!
-   * returns the number of words added (storage cost increase)
-   */
-  size_t addStreamOfEmptyWords(const bool v, size_t number);
-
-  /**
-   * add a stream of dirty words, returns the number of words added
-   * (storage cost increase)
-   */
-  size_t addStreamOfDirtyWords(const uword *v, const size_t number);
-
-  /**
-   * add a stream of dirty words, each one negated, returns the number of words
-   * added
-   * (storage cost increase)
-   */
-  size_t addStreamOfNegatedDirtyWords(const uword *v, const size_t number);
-
-  /**
-   * make sure the size of the array is totalbits bits by padding with zeroes.
-   * returns the number of words added (storage cost increase).
-   *
-   * This is useful when calling "logicalnot" functions.
-   *
-   * This can an adverse effect of performance, especially when computing
-   * intersections.
-   *
-   */
-  size_t padWithZeroes(const size_t totalbits);
-
-  /**
-   * Compute the size on disk assuming that it was saved using
-   * the method "write".
-   */
-  size_t sizeOnDisk(const bool savesizeinbits = true) const;
-
-  /**
-   * Save this bitmap to a stream. The file format is
-   * | sizeinbits | buffer length | buffer content|
-   * the sizeinbits part can be omitted if "savesizeinbits=false".
-   * Both sizeinbits and buffer length are saved using the uint64_t data
-   * type.
-   * Returns how many bytes were handed out to the stream.
-   */
-  size_t write(std::ostream &out, const bool savesizeinbits = true) const;
-
-  /**
-   * same as write(std::ostream...), except that you provide a char pointer
-   * and a "capacity" (in bytes). The function never writes at or beyond
-   * "out+capacity". If the storage needed exceeds the given capacity, the value
-   * zero is returned: it should be considered an error. Otherwise, the number
-   * of bytes copied is returned.
-   */
-  size_t write(char *out, size_t capacity,
-               const bool savesizeinbits = true) const;
-
-  /**
-   * This only writes the content of the buffer (see write()) method.
-   * It is for advanced users.
-   */
-  void writeBuffer(std::ostream &out) const;
-
-  /**
-   * size (in words) of the underlying STL vector.
-   */
-  size_t bufferSize() const { return buffer.size(); }
-
-  /**
-   * this is the counterpart to the write method.
-   * if you set savesizeinbits=false, then you are responsible
-   * for setting the value of the attribute sizeinbits (see method
-   * setSizeInBits).
-   *
-   * Returns how many bytes were queried from the stream.
-   */
-  size_t read(std::istream &in, const bool savesizeinbits = true);
-
-  /**
-   * same as read(std::istream...), except that you provide a char pointer
-   * and a "capacity" (in bytes). The function never reads at or beyond
-   * "in+capacity". If the detected storage exceeds the  given capacity, the
-   * value zero is returned: it should be considered an error. Otherwise, the
-   * number of bytes read is returned.
-   */
-  size_t read(const char *in, size_t capacity,
-              const bool savesizeinbits = true);
-
-  /**
-   * read the buffer from a stream, see method writeBuffer.
-   * this is for advanced users.
-   */
-  void readBuffer(std::istream &in, const size_t buffersize);
-
-  /**
-   * We define two EWAHBoolArray as being equal if they have the same set bits.
-   * Alternatively, B1==B2 if and only if cardinality(B1 XOR B2) ==0.
-   */
-  bool operator==(const EWAHBoolArray &x) const;
-
-  /**
-   * We define two EWAHBoolArray as being different if they do not have the same
-   * set bits.
-   * Alternatively, B1!=B2 if and only if cardinality(B1 XOR B2) >0.
-   */
-  bool operator!=(const EWAHBoolArray &x) const;
-
-  bool operator==(const BoolArray<uword> &x) const;
-
-  bool operator!=(const BoolArray<uword> &x) const;
-
-  /**
-   * Iterate over the uncompressed words.
-   * Can be considerably faster than begin()/end().
-   * Running time complexity of a full scan is proportional to the
-   * uncompressed size of the bitmap.
-   */
-  EWAHBoolArrayIterator<uword> uncompress() const;
-
-  /**
-   * To iterate over the compressed data.
-   * Can be faster than any other iterator.
-   * Running time complexity of a full scan is proportional to the
-   * compressed size of the bitmap.
-   */
-  EWAHBoolArrayRawIterator<uword> raw_iterator() const;
-
-  /**
-   * Appends the content of some other compressed bitmap
-   * at the end of the current bitmap.
-   */
-  void append(const EWAHBoolArray &x);
-
-  /**
-   * For research purposes. This computes the number of
-   * dirty words and the number of compressed words.
-   */
-  BitmapStatistics computeStatistics() const;
-
-  /**
-   * For convenience, this fully uncompresses the bitmap.
-   * Not fast!
-   */
-  BoolArray<uword> toBoolArray() const;
-
-  /**
-   * Convert to a list of positions of "set" bits.
-   * The recommended container is vector<size_t>.
-   *
-   * See also toArray().
-   */
-  template <class container>
-  void appendSetBits(container &out, const size_t offset = 0) const;
-
-  /**
-   * Returns a vector containing the position of the set
-   * bits in increasing order. This just calls "toArray".
-   */
-  std::vector<size_t> toVector() const { return toArray(); }
-
-  /**
-   * Returns the number of bits set to the value 1.
-   * The running time complexity is proportional to the
-   * compressed size of the bitmap.
-   *
-   * This is sometimes called the cardinality.
-   */
-  size_t numberOfOnes() const;
-
-  /**
-   * Swap the content of this bitmap with another bitmap.
-   * No copying is done. (Running time complexity is constant.)
-   */
-  void swap(EWAHBoolArray &x);
-
-  const std::vector<uword> &getBuffer() const { return buffer; }
-
-  enum { wordinbits = sizeof(uword) * 8 };
-
-  /**
-   * Please don't copy your bitmaps! The running time
-   * complexity of a copy is the size of the compressed bitmap.
-   **/
-  EWAHBoolArray(const EWAHBoolArray &other)
-      : buffer(other.buffer), sizeinbits(other.sizeinbits),
-        lastRLW(other.lastRLW) {}
-
-  /**
-   * Copies the content of one bitmap onto another. Running time complexity
-   * is proportional to the size of the compressed bitmap.
-   * please, never hard-copy this object. Use the swap method if you must.
-   */
-  EWAHBoolArray &operator=(const EWAHBoolArray &x) {
-    buffer = x.buffer;
-    sizeinbits = x.sizeinbits;
-    lastRLW = x.lastRLW;
-    return *this;
-  }
-
-  /**
-   * Move constructor.
-   */
-  EWAHBoolArray(EWAHBoolArray &&other)
-      : buffer(std::move(other.buffer)), sizeinbits(other.sizeinbits),
-        lastRLW(other.lastRLW) {}
-
-  /**
-   * Move assignment operator.
-   */
-  EWAHBoolArray &operator=(EWAHBoolArray &&x) {
-    buffer = std::move(x.buffer);
-    sizeinbits = x.sizeinbits;
-    lastRLW = x.lastRLW;
-    return *this;
-  }
-
-  /**
-   * This is equivalent to the operator =. It is used
-   * to keep in mind that assignment can be expensive.
-   *
-   *if you don't care to copy the bitmap (performance-wise), use this!
-   */
-  void expensive_copy(const EWAHBoolArray &x) {
-    buffer = x.buffer;
-    sizeinbits = x.sizeinbits;
-    lastRLW = x.lastRLW;
-  }
-
-  /**
-   * Write the logical not of this bitmap in the provided container.
-   *
-   * This function takes into account the sizeInBits value.
-   * You may need to call "padWithZeroes" to adjust the sizeInBits.
-   */
-  void logicalnot(EWAHBoolArray &x) const;
-
-  /**
-   * Write the logical not of this bitmap in the provided container.
-   *
-   * This function takes into account the sizeInBits value.
-   * You may need to call "padWithZeroes" to adjust the sizeInBits.
-   */
-  EWAHBoolArray<uword> logicalnot() const {
-    EWAHBoolArray answer;
-    logicalnot(answer);
-    return answer;
-  }
-
-  /**
-   * Apply the logical not operation on this bitmap.
-   * Running time complexity is proportional to the compressed size of the
-   *bitmap.
-   * The current bitmap is not modified.
-   *
-   * This function takes into account the sizeInBits value.
-   * You may need to call "padWithZeroes" to adjust the sizeInBits.
-   **/
-  void inplace_logicalnot();
-
-  /**
-   * set size in bits. This does not affect the compressed size. It
-   * runs in constant time. This should not normally be used, except
-   * as part of a deserialization process.
-   */
-  inline void setSizeInBits(const size_t size) { sizeinbits = size; }
-
-  /**
-   * Like addStreamOfEmptyWords but
-   * addStreamOfEmptyWords but does not return the cost increase,
-   * does not update sizeinbits
-   */
-  inline void fastaddStreamOfEmptyWords(const bool v, size_t number);
-  /**
-   * LikeaddStreamOfDirtyWords but does not return the cost increase,
-   * does not update sizeinbits.
-   */
-  inline void fastaddStreamOfDirtyWords(const uword *v, const size_t number);
-
-private:
-  void assertWordCount(std::string message) const;
-  void correctWordCount();
-  size_t numberOfWords() const;
-  // private because does not increment the size in bits
-  // returns the number of words added (storage cost increase)
-  inline size_t addLiteralWord(const uword newdata);
-
-  // private because does not increment the size in bits
-  // returns the number of words added (storage cost increase)
-  size_t addEmptyWord(const bool v);
-  // this second version "might" be faster if you hate OOP.
-  // in my tests, it turned out to be slower!
-  // private because does not increment the size in bits
-  // inline void addEmptyWordStaticCalls(bool v);
-
-  std::vector<uword> buffer;
-  size_t sizeinbits;
-  size_t lastRLW;
-};
-} // namespace ewah
-#include "ewah-inl.h"
-
-#endif
diff --git a/yt/utilities/lib/ewahboolarray/ewahutil.h b/yt/utilities/lib/ewahboolarray/ewahutil.h
deleted file mode 100644
index 1ab35b27f0b..00000000000
--- a/yt/utilities/lib/ewahboolarray/ewahutil.h
+++ /dev/null
@@ -1,233 +0,0 @@
-/**
- * This code is released under the
- * Apache License Version 2.0 http://www.apache.org/licenses/.
- *
- * (c) Daniel Lemire, http://lemire.me/en/
- *
- * Some code from the public domain tuklib.
- */
-
-#ifndef EWAHUTIL_H
-#define EWAHUTIL_H
-
-#include <iso646.h> // mostly for Microsoft compilers
-#include <limits.h>
-#include <stdint.h> // part of Visual Studio 2010 and better
-#include <stdlib.h>
-#include <string.h>
-
-#include <algorithm>
-#include <cassert>
-#include <cstddef>
-#include <iostream>
-#include <sstream>
-#include <stdexcept>
-#include <string>
-#include <vector>
-
-#ifdef _MSC_VER
-#include <intrin.h>
-#endif
-
-
-#if ((ULONG_MAX) == (UINT_MAX))
-#define UWORD uint32_t
-#else
-#define UWORD uint64_t
-#endif
-
-namespace ewah {
-
-static inline uint32_t ctz64(uint64_t n) {
-#if defined(__GNUC__) && UINT_MAX >= UINT32_MAX && ULLONG_MAX >= UINT64_MAX
-  return static_cast<uint32_t>(__builtin_ctzll(n));
-#elif defined(_WIN64) && defined(_MSC_VER) && _MSC_VER >= 1400 &&              \
-    ULONG_MAX >= UINT64_MAX
-  uint32_t i;
-  _BitScanForward64((unsigned long *)&i, n);
-  return i;
-#else
-  uint32_t i = 1;
-  if ((n & static_cast<uint64_t>(4294967295)) == 0) {
-    n >>= 32;
-    i += 32;
-  }
-  if ((n & static_cast<uint64_t>(0x0000FFFFUL)) == 0) {
-    n >>= 16;
-    i += 16;
-  }
-
-  if ((n & static_cast<uint64_t>(0x000000FFUL)) == 0) {
-    n >>= 8;
-    i += 8;
-  }
-
-  if ((n & static_cast<uint64_t>(0x0000000FUL)) == 0) {
-    n >>= 4;
-    i += 4;
-  }
-
-  if ((n & static_cast<uint64_t>(0x00000003UL)) == 0) {
-    n >>= 2;
-    i += 2;
-  }
-  i -= (n & 0x1);
-  return i;
-#endif
-}
-
-static inline uint32_t ctz32(uint32_t n) {
-#if defined(__GNUC__) && UINT_MAX >= UINT32_MAX
-  return static_cast<uint32_t>(__builtin_ctz(n));
-
-#elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
-  uint32_t i;
-  __asm__("bsfl %1, %0" : "=r"(i) : "rm"(n));
-  return i;
-
-#elif defined(_MSC_VER) && _MSC_VER >= 1400
-  uint32_t i;
-  _BitScanForward((unsigned long *)&i, n);
-  return i;
-
-#else
-  uint32_t i = 1;
-
-  if ((n & static_cast<uint32_t>(0x0000FFFF)) == 0) {
-    n >>= 16;
-    i += 16;
-  }
-
-  if ((n & static_cast<uint32_t>(0x000000FF)) == 0) {
-    n >>= 8;
-    i += 8;
-  }
-
-  if ((n & static_cast<uint32_t>(0x0000000F)) == 0) {
-    n >>= 4;
-    i += 4;
-  }
-
-  if ((n & static_cast<uint32_t>(0x00000003)) == 0) {
-    n >>= 2;
-    i += 2;
-  }
-
-  i -= (n & 1);
-
-  return i;
-#endif
-}
-
-static inline uint32_t ctz16(uint16_t n) {
-#if defined(__GNUC__) && UINT_MAX >= UINT32_MAX
-  return static_cast<uint32_t>(__builtin_ctz(n));
-
-#elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
-  uint32_t i;
-  __asm__("bsfl %1, %0" : "=r"(i) : "rm"(n));
-  return i;
-
-#elif defined(_MSC_VER) && _MSC_VER >= 1400
-  uint32_t i;
-  _BitScanForward((unsigned long *)&i, n);
-  return i;
-
-#else
-  uint32_t i = 1;
-
-  if ((n & static_cast<uint16_t>(0x000000FF)) == 0) {
-    n >>= 8;
-    i += 8;
-  }
-
-  if ((n & static_cast<uint16_t>(0x0000000F)) == 0) {
-    n >>= 4;
-    i += 4;
-  }
-
-  if ((n & static_cast<uint16_t>(0x00000003)) == 0) {
-    n >>= 2;
-    i += 2;
-  }
-  i -= (n & 1);
-
-  return i;
-#endif
-}
-
-#ifdef __GNUC__
-/**
- * count the number of bits set to one (32 bit version)
- */
-inline uint32_t countOnes(uint32_t x) {
-  return static_cast<uint32_t>(__builtin_popcount(x));
-}
-#elif defined(_MSC_VER) && _MSC_VER >= 1400 && !defined(_M_ARM)&& !defined(_M_ARM64)
-inline uint32_t countOnes(uint32_t x) { return __popcnt(x); }
-#else
-inline uint32_t countOnes(uint32_t v) {
-  v = v - ((v >> 1) & 0x55555555);
-  v = (v & 0x33333333) + ((v >> 2) & 0x33333333);
-  return static_cast<uint32_t>((((v + (v >> 4)) & 0x0F0F0F0F) * 0x01010101) >>
-                               24);
-}
-#endif
-
-#ifdef __GNUC__
-/**
- * count the number of bits set to one (64 bit version)
- */
-inline uint32_t countOnes(uint64_t x) {
-  return static_cast<uint32_t>(__builtin_popcountll(x));
-}
-#elif defined(_WIN64) && defined(_MSC_VER) && _MSC_VER >= 1400 && !defined(_M_ARM64)
-inline uint32_t countOnes(uint64_t x) {
-  return static_cast<uint32_t>(__popcnt64(static_cast<__int64>(x)));
-}
-#else
-inline uint32_t countOnes(uint64_t v) {
-  v = v - ((v >> 1) & 0x5555555555555555);
-  v = (v & 0x3333333333333333) + ((v >> 2) & 0x3333333333333333);
-  v = ((v + (v >> 4)) & 0x0F0F0F0F0F0F0F0F);
-  return static_cast<uint32_t>((v * (0x0101010101010101)) >> 56);
-}
-#endif
-
-inline uint32_t countOnes(uint16_t v) {
-  return countOnes(static_cast<uint32_t>(v));
-}
-
-inline uint32_t numberOfTrailingZeros(uint32_t x) {
-  if (x == 0)
-    return 32;
-  return ctz32(x);
-}
-
-inline uint32_t numberOfTrailingZeros(uint64_t x) {
-  if (x == 0)
-    return 64;
-  return ctz64(x);
-}
-
-inline uint32_t numberOfTrailingZeros(uint16_t x) {
-  if (x == 0)
-    return 16;
-  return ctz16(x);
-}
-
-/**
- * Returns the binary representation of a binary word.
- */
-template <class uword> std::string toBinaryString(const uword w) {
-  std::ostringstream convert;
-  for (uint32_t k = 0; k < sizeof(uword) * 8; ++k) {
-    if (w & (static_cast<uword>(1) << k))
-      convert << "1";
-    else
-      convert << "0";
-  }
-  return convert.str();
-}
-} // namespace ewah
-#endif
diff --git a/yt/utilities/lib/ewahboolarray/runninglengthword.h b/yt/utilities/lib/ewahboolarray/runninglengthword.h
deleted file mode 100644
index 405427b06a7..00000000000
--- a/yt/utilities/lib/ewahboolarray/runninglengthword.h
+++ /dev/null
@@ -1,551 +0,0 @@
-/**
- * This code is released under the
- * Apache License Version 2.0 http://www.apache.org/licenses/.
- *
- * (c) Daniel Lemire, http://lemire.me/en/
- */
-#ifndef RUNNINGLENGTHWORD_H_
-#define RUNNINGLENGTHWORD_H_
-#include <iostream>
-namespace ewah {
-
-/**
- * For expert users.
- * This class is used to represent a special type of word storing
- * a run length. It is defined by the Enhanced Word Aligned  Hybrid (EWAH)
- * format. You don't normally need to access this class.
- */
-template <class uword> class RunningLengthWord {
-public:
-  RunningLengthWord(uword &data) : mydata(data) {}
-
-  RunningLengthWord(const RunningLengthWord &rlw) : mydata(rlw.mydata) {}
-
-  RunningLengthWord &operator=(const RunningLengthWord &rlw) {
-    mydata = rlw.mydata;
-    return *this;
-  }
-
-  /**
-   * Which bit is being repeated?
-   */
-  bool getRunningBit() const { return mydata & static_cast<uword>(1); }
-
-  /**
-   * how many words should be filled by the running bit
-   */
-  static inline bool getRunningBit(uword data) {
-    return data & static_cast<uword>(1);
-  }
-
-  /**
-   * how many words should be filled by the running bit
-   */
-  uword getRunningLength() const {
-    return static_cast<uword>((mydata >> 1) & largestrunninglengthcount);
-  }
-
-  /**
-   * followed by how many literal words?
-   */
-  static inline uword getRunningLength(uword data) {
-    return static_cast<uword>((data >> 1) & largestrunninglengthcount);
-  }
-
-  /**
-   * followed by how many literal words?
-   */
-  uword getNumberOfLiteralWords() const {
-    return static_cast<uword>(mydata >> (1 + runninglengthbits));
-  }
-
-  /**
-   * Total of getRunningLength() and getNumberOfLiteralWords()
-   */
-  uword size() const {
-    return static_cast<uword>(getRunningLength() + getNumberOfLiteralWords());
-  }
-
-  /**
-   * Total of getRunningLength() and getNumberOfLiteralWords()
-   */
-  static inline uword size(uword data) {
-    return static_cast<uword>(getRunningLength(data) +
-                              getNumberOfLiteralWords(data));
-  }
-
-  /**
-   * followed by how many literal words?
-   */
-  static inline uword getNumberOfLiteralWords(uword data) {
-    return static_cast<uword>(data >> (1 + runninglengthbits));
-  }
-
-  /**
-   * running length of which type of bits
-   */
-  void setRunningBit(bool b) {
-    if (b)
-      mydata |= static_cast<uword>(1);
-    else
-      mydata &= static_cast<uword>(~1);
-  }
-
-  void discardFirstWords(uword x) {
-    const uword rl(getRunningLength());
-    if (rl >= x) {
-      setRunningLength(rl - x);
-      return;
-    }
-    x -= rl;
-    setRunningLength(0);
-    setNumberOfLiteralWords(getNumberOfLiteralWords() - x);
-  }
-
-  /**
-   * running length of which type of bits
-   */
-  static inline void setRunningBit(uword &data, bool b) {
-    if (b)
-      data |= static_cast<uword>(1);
-    else
-      data &= static_cast<uword>(~1);
-  }
-
-  void setRunningLength(uword l) {
-    mydata |= shiftedlargestrunninglengthcount;
-    mydata &=
-        static_cast<uword>((l << 1) | notshiftedlargestrunninglengthcount);
-  }
-
-  // static call for people who hate objects
-  static inline void setRunningLength(uword &data, uword l) {
-    data |= shiftedlargestrunninglengthcount;
-    data &= static_cast<uword>((l << 1) | notshiftedlargestrunninglengthcount);
-  }
-
-  void setNumberOfLiteralWords(uword l) {
-    mydata |= notrunninglengthplusrunningbit;
-    mydata &= static_cast<uword>((l << (runninglengthbits + 1)) |
-                                 runninglengthplusrunningbit);
-  }
-  // static call for people who hate objects
-  static inline void setNumberOfLiteralWords(uword &data, uword l) {
-    data |= notrunninglengthplusrunningbit;
-    data &= static_cast<uword>(l << (runninglengthbits + 1)) |
-            runninglengthplusrunningbit;
-  }
-
-  static const uint32_t runninglengthbits = sizeof(uword) * 4;
-  static const uint32_t literalbits = sizeof(uword) * 8 - 1 - runninglengthbits;
-  static const uword largestliteralcount =
-      (static_cast<uword>(1) << literalbits) - 1;
-  static const uword largestrunninglengthcount =
-      (static_cast<uword>(1) << runninglengthbits) - 1;
-  static const uword shiftedlargestrunninglengthcount =
-      largestrunninglengthcount << 1;
-  static const uword notshiftedlargestrunninglengthcount =
-      static_cast<uword>(~shiftedlargestrunninglengthcount);
-  static const uword runninglengthplusrunningbit =
-      (static_cast<uword>(1) << (runninglengthbits + 1)) - 1;
-  static const uword notrunninglengthplusrunningbit =
-      static_cast<uword>(~runninglengthplusrunningbit);
-  static const uword notlargestrunninglengthcount =
-      static_cast<uword>(~largestrunninglengthcount);
-
-  uword &mydata;
-};
-
-/**
- * Same as RunningLengthWord, except that the values cannot be modified.
- */
-template <class uword = uint32_t> class ConstRunningLengthWord {
-public:
-  ConstRunningLengthWord() : mydata(0) {}
-
-  ConstRunningLengthWord(const uword data) : mydata(data) {}
-
-  ConstRunningLengthWord(const ConstRunningLengthWord &rlw)
-      : mydata(rlw.mydata) {}
-
-  /**
-   * Which bit is being repeated?
-   */
-  bool getRunningBit() const { return mydata & static_cast<uword>(1); }
-
-  /**
-   * how many words should be filled by the running bit
-   */
-  uword getRunningLength() const {
-    return static_cast<uword>(
-        (mydata >> 1) & RunningLengthWord<uword>::largestrunninglengthcount);
-  }
-
-  /**
-   * followed by how many literal words?
-   */
-  uword getNumberOfLiteralWords() const {
-    return static_cast<uword>(
-        mydata >> (1 + RunningLengthWord<uword>::runninglengthbits));
-  }
-
-  /**
-   * Total of getRunningLength() and getNumberOfLiteralWords()
-   */
-  uword size() const { return getRunningLength() + getNumberOfLiteralWords(); }
-
-  uword mydata;
-};
-
-template <class uword> class EWAHBoolArray;
-
-template <class uword> class EWAHBoolArrayRawIterator;
-
-/**
- * Same as RunningLengthWord, except that the values are buffered for quick
- * access.
- */
-template <class uword = uint32_t> class BufferedRunningLengthWord {
-public:
-  enum { wordinbits = sizeof(uword) * 8 };
-
-  BufferedRunningLengthWord(const uword &data,
-                            EWAHBoolArrayRawIterator<uword> *p)
-      : RunningBit(data & static_cast<uword>(1)),
-        RunningLength(static_cast<uword>(
-            (data >> 1) & RunningLengthWord<uword>::largestrunninglengthcount)),
-        NumberOfLiteralWords(static_cast<uword>(
-            data >> (1 + RunningLengthWord<uword>::runninglengthbits))),
-        parent(p) {}
-  BufferedRunningLengthWord(const RunningLengthWord<uword> &p)
-      : RunningBit(p.mydata & static_cast<uword>(1)),
-        RunningLength((p.mydata >> 1) &
-                      RunningLengthWord<uword>::largestrunninglengthcount),
-        NumberOfLiteralWords(p.mydata >>
-                             (1 + RunningLengthWord<uword>::runninglengthbits)),
-        parent(p.parent) {}
-
-  void discharge(EWAHBoolArray<uword> &container) {
-    while (size() > 0) {
-      // first run
-      size_t pl = getRunningLength();
-      container.fastaddStreamOfEmptyWords(getRunningBit(), pl);
-      size_t pd = getNumberOfLiteralWords();
-      writeLiteralWords(pd, container);
-      if (!next())
-        break;
-    }
-  }
-
-  size_t dischargeCount() {
-    size_t answer = 0;
-    while (size() > 0) {
-      // first run
-      if (getRunningBit()) {
-        answer += wordinbits * getRunningLength();
-      }
-      size_t pd = getNumberOfLiteralWords();
-      for (size_t i = 0; i < pd; ++i)
-        answer += countOnes((uword)getLiteralWordAt(i));
-      if (!next())
-        break;
-    }
-    return answer;
-  }
-
-  size_t dischargeCountNegated() {
-    size_t answer = 0;
-    while (size() > 0) {
-      // first run
-      if (!getRunningBit()) {
-        answer += wordinbits * getRunningLength();
-      }
-      size_t pd = getNumberOfLiteralWords();
-      for (size_t i = 0; i < pd; ++i)
-        answer += countOnes((uword)(~getLiteralWordAt(i)));
-      if (!next())
-        break;
-    }
-    return answer;
-  }
-
-  // Symbolically write out up to max words, returns how many were written,
-  // write to count the number bits written (we assume that count was initially
-  // zero)
-  size_t dischargeCount(size_t max, size_t *count) {
-    size_t index = 0;
-    while (true) {
-      if (index + RunningLength > max) {
-        const size_t offset = max - index;
-        if (getRunningBit())
-          *count += offset * wordinbits;
-        RunningLength -= offset;
-        return max;
-      }
-      if (getRunningBit())
-        *count += RunningLength * wordinbits;
-      index += RunningLength;
-      if (NumberOfLiteralWords + index > max) {
-        const size_t offset = max - index;
-        for (size_t i = 0; i < offset; ++i)
-          *count += countOnes((uword)getLiteralWordAt(i));
-        RunningLength = 0;
-        NumberOfLiteralWords -= offset;
-        return max;
-      }
-      for (size_t i = 0; i < NumberOfLiteralWords; ++i)
-        *count += countOnes((uword)getLiteralWordAt(i));
-      index += NumberOfLiteralWords;
-      if (!next())
-        break;
-    }
-    return index;
-  }
-
-  size_t dischargeCountNegated(size_t max, size_t *count) {
-    size_t index = 0;
-    while (true) {
-      if (index + RunningLength > max) {
-        const size_t offset = max - index;
-        if (!getRunningBit())
-          *count += offset * wordinbits;
-        RunningLength -= offset;
-        return max;
-      }
-      if (!getRunningBit())
-        *count += RunningLength * wordinbits;
-      index += RunningLength;
-      if (NumberOfLiteralWords + index > max) {
-        const size_t offset = max - index;
-        for (size_t i = 0; i < offset; ++i)
-          *count += countOnes((uword)(~getLiteralWordAt(i)));
-        RunningLength = 0;
-        NumberOfLiteralWords -= offset;
-        return max;
-      }
-      for (size_t i = 0; i < NumberOfLiteralWords; ++i)
-        *count += countOnes((uword)(~getLiteralWordAt(i)));
-      index += NumberOfLiteralWords;
-      if (!next())
-        break;
-    }
-    return index;
-  }
-  bool nonzero_discharge() {
-    while (size() > 0) {
-      // first run
-      size_t pl = getRunningLength();
-      if ((pl > 0) && (getRunningBit()))
-        return true;
-      size_t pd = getNumberOfLiteralWords();
-      if (pd > 0)
-        return true;
-      discardFirstWordsWithReload(static_cast<uword>(pl + pd));
-    }
-    return false;
-  }
-
-  // Write out up to max words, returns how many were written
-  size_t discharge(EWAHBoolArray<uword> &container, size_t max) {
-    size_t index = 0;
-    while (true) {
-      if (index + RunningLength > max) {
-        const size_t offset = max - index;
-        container.fastaddStreamOfEmptyWords(getRunningBit(), offset);
-        RunningLength = static_cast<uword>(RunningLength - offset);
-        return max;
-      }
-      container.fastaddStreamOfEmptyWords(getRunningBit(), RunningLength);
-      index += RunningLength;
-      if (NumberOfLiteralWords + index > max) {
-        const size_t offset = max - index;
-        writeLiteralWords(offset, container);
-        RunningLength = 0;
-        NumberOfLiteralWords = static_cast<uword>(NumberOfLiteralWords - offset);
-        return max;
-      }
-      writeLiteralWords(NumberOfLiteralWords, container);
-      index += NumberOfLiteralWords;
-      if (!next())
-        break;
-    }
-    return index;
-  }
-
-  bool nonzero_discharge(size_t max, size_t &index) {
-    index = 0;
-    while ((index < max) && (size() > 0)) {
-      // first run
-      size_t pl = getRunningLength();
-      if (index + pl > max) {
-        pl = max - index;
-      }
-      if ((getRunningBit()) && (pl > 0))
-        return true;
-      index += pl;
-      size_t pd = getNumberOfLiteralWords();
-      if (pd + index > max) {
-        pd = max - index;
-      }
-      if (pd > 0)
-        return true;
-      discardFirstWordsWithReload(static_cast<uword>(pl + pd));
-    }
-    return false;
-  }
-
-  // Write out up to max words, returns how many were written
-  size_t dischargeNegated(EWAHBoolArray<uword> &container, size_t max) {
-    // todo: could be optimized further
-    size_t index = 0;
-    while ((index < max) && (size() > 0)) {
-      // first run
-      size_t pl = getRunningLength();
-      if (index + pl > max) {
-        pl = max - index;
-      }
-      container.fastaddStreamOfEmptyWords(!getRunningBit(), pl);
-      index += pl;
-      size_t pd = getNumberOfLiteralWords();
-      if (pd + index > max) {
-        pd = max - index;
-      }
-      writeNegatedLiteralWords(pd, container);
-      discardFirstWordsWithReload(static_cast<uword>(pl + pd));
-      index += pd;
-    }
-    return index;
-  }
-  bool nonzero_dischargeNegated(size_t max, size_t &index) {
-    while ((index < max) && (size() > 0)) {
-      // first run
-      size_t pl = getRunningLength();
-      if (index + pl > max) {
-        pl = max - index;
-      }
-      if ((!getRunningBit()) && (pl > 0))
-        return true;
-      index += pl;
-      size_t pd = getNumberOfLiteralWords();
-      if (pd + index > max) {
-        pd = max - index;
-      }
-      if (pd > 0)
-        return true;
-      discardFirstWordsWithReload(static_cast<uword>(pl + pd));
-      index += pd;
-    }
-    return false;
-  }
-
-  uword getLiteralWordAt(size_t index) { return parent->dirtyWords()[index]; }
-
-  void writeLiteralWords(size_t numWords, EWAHBoolArray<uword> &container) {
-    container.fastaddStreamOfDirtyWords(parent->dirtyWords(), numWords);
-  }
-
-  void writeNegatedLiteralWords(size_t numWords,
-                                EWAHBoolArray<uword> &container) {
-    container.addStreamOfNegatedDirtyWords(parent->dirtyWords(), numWords);
-  }
-
-  void discardRunningWords() { RunningLength = 0; }
-
-  void discardRunningWordsWithReload() {
-    RunningLength = 0;
-    if (NumberOfLiteralWords == 0)
-      next();
-  }
-
-  bool next() {
-    if (!parent->hasNext()) {
-      NumberOfLiteralWords = 0;
-      RunningLength = 0;
-      return false;
-    }
-    parent->next();
-    return true;
-  }
-
-  void read(const uword &data) {
-    RunningBit = data & static_cast<uword>(1);
-    RunningLength = static_cast<uword>(
-        (data >> 1) & RunningLengthWord<uword>::largestrunninglengthcount);
-    NumberOfLiteralWords = static_cast<uword>(
-        data >> (1 + RunningLengthWord<uword>::runninglengthbits));
-  }
-
-  /**
-   * Which bit is being repeated?
-   */
-  bool getRunningBit() const { return RunningBit; }
-
-  void discardFirstWords(uword x) {
-    if (RunningLength >= x) {
-      RunningLength = static_cast<uword>(RunningLength - x);
-      return;
-    }
-    x = static_cast<uword>(x - RunningLength);
-    RunningLength = 0;
-    NumberOfLiteralWords = static_cast<uword>(NumberOfLiteralWords - x);
-  }
-
-  /**
-   * how many words should be filled by the running bit (see previous method)
-   */
-  uword getRunningLength() const { return RunningLength; }
-
-  /**
-   * followed by how many literal words?
-   */
-  uword getNumberOfLiteralWords() const { return NumberOfLiteralWords; }
-
-  /**
-   * Total of getRunningLength() and getNumberOfLiteralWords()
-   */
-  uword size() const {
-    return static_cast<uword>(RunningLength + NumberOfLiteralWords);
-  }
-
-  friend std::ostream &operator<<(std::ostream &out,
-                                  const BufferedRunningLengthWord &a) {
-    out << "{RunningBit:" << a.RunningBit
-        << ",RunningLength:" << a.RunningLength
-        << ",NumberOfLiteralWords:" << a.NumberOfLiteralWords << "}";
-    return out;
-  }
-  void discardLiteralWordsWithReload(uword x) {
-    assert(NumberOfLiteralWords >= x);
-    NumberOfLiteralWords -= x;
-    if (NumberOfLiteralWords == 0)
-      next();
-  }
-
-  void discardFirstWordsWithReload(uword x) {
-    while (x > 0) {
-      if (RunningLength > x) {
-        RunningLength = static_cast<uword>(RunningLength - x);
-        return;
-      }
-      x = static_cast<uword>(x - RunningLength);
-      RunningLength = 0;
-      size_t toDiscard = x > NumberOfLiteralWords ? NumberOfLiteralWords : x;
-      NumberOfLiteralWords =
-          static_cast<uword>(NumberOfLiteralWords - toDiscard);
-      x = static_cast<uword>(x - toDiscard);
-      if ((x > 0) || (size() == 0)) {
-        if (!next())
-          break;
-      }
-    }
-  }
-
-private:
-  bool RunningBit;
-  uword RunningLength;
-  uword NumberOfLiteralWords;
-  EWAHBoolArrayRawIterator<uword> *parent;
-};
-} // namespace ewah
-
-#endif /* RUNNINGLENGTHWORD_H_ */

From a3133bd25833760d89aa6362ed4406fa6b70259d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Robert?= <cr52@protonmail.com>
Date: Tue, 25 Apr 2023 16:49:27 +0200
Subject: [PATCH 2/2] fix silent conflict

---
 setupext.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setupext.py b/setupext.py
index c4085976818..006bb582af2 100644
--- a/setupext.py
+++ b/setupext.py
@@ -206,7 +206,7 @@ def check_CPP14_flags(possible_compile_flags):
 
 def get_ewah_bool_utils_path():
     if sys.version_info >= (3, 9):
-        return os.path.abspath(importlib.resources.files("ewah_bool_utils"))
+        return os.path.abspath(importlib_resources.files("ewah_bool_utils"))
     else:
         from pkg_resources import resource_filename
         return os.path.dirname(os.path.abspath(resource_filename("ewah_bool_utils", "ewah_bool_wrap.pxd")))