diff --git a/benchmarks/time/src/main/scala/benchmarks/immutableHashSet/package.scala b/benchmarks/time/src/main/scala/benchmarks/immutableHashSet/package.scala new file mode 100644 index 0000000000..7bc5a51e6b --- /dev/null +++ b/benchmarks/time/src/main/scala/benchmarks/immutableHashSet/package.scala @@ -0,0 +1,127 @@ +package benchmarks.immutableHashSet + +import java.util.concurrent.TimeUnit + +import org.openjdk.jmh.annotations._ +import org.openjdk.jmh.infra.Blackhole +import strawman.collection.immutable + +@BenchmarkMode(scala.Array(Mode.AverageTime)) +@OutputTimeUnit(TimeUnit.NANOSECONDS) +@Fork(1) +@Warmup(iterations = 6) +@Measurement(iterations = 6) +@State(Scope.Benchmark) +class HashSet { + @Param(scala.Array("0", "1", "3", "8", "17", "282", "4096", "131070", "7312102")) + var size: Int = _ + + var xs: immutable.HashSet[Long] = _ + var ys: immutable.HashSet[Long] = _ + def fresh(n: Int) = immutable.HashSet((1 to n).map(_.toLong): _*) + + @Setup(Level.Trial) + def initTrial(): Unit = { + xs = fresh(size) + ys = xs.take(xs.size / 2) + } + + @Benchmark + def subsetOf(bh: Blackhole) = bh.consume(ys.subsetOf(xs)) + + @Benchmark + def equals(bh: Blackhole) = bh.consume(xs == ys) + + @Benchmark + def incl(bh: Blackhole) = bh.consume(xs + 42) + + @Benchmark + def excl(bh: Blackhole) = bh.consume(xs - 42) + + @Benchmark + def union(bh: Blackhole) = bh.consume(xs union ys) + + @Benchmark + def intersection(bh: Blackhole) = bh.consume(xs intersect ys) + +} + +@BenchmarkMode(scala.Array(Mode.AverageTime)) +@OutputTimeUnit(TimeUnit.NANOSECONDS) +@Fork(1) +@Warmup(iterations = 6) +@Measurement(iterations = 6) +@State(Scope.Benchmark) +class ChampHashSet { + @Param(scala.Array("0", "1", "3", "8", "17", "282", "4096", "131070", "7312102")) + var size: Int = _ + + var xs: immutable.ChampHashSet[Long] = _ + var ys: immutable.ChampHashSet[Long] = _ + def fresh(n: Int) = immutable.ChampHashSet((1 to n).map(_.toLong): _*) + + @Setup(Level.Trial) + def initTrial(): Unit = { + xs = fresh(size) + ys = xs.take(xs.size / 2) + } + + @Benchmark + def subsetOf(bh: Blackhole) = bh.consume(ys.subsetOf(xs)) + + @Benchmark + def equals(bh: Blackhole) = bh.consume(xs == ys) + + @Benchmark + def incl(bh: Blackhole) = bh.consume(xs + 42) + + @Benchmark + def excl(bh: Blackhole) = bh.consume(xs - 42) + + @Benchmark + def union(bh: Blackhole) = bh.consume(xs union ys) + + @Benchmark + def intersection(bh: Blackhole) = bh.consume(xs intersect ys) + +} + +@BenchmarkMode(scala.Array(Mode.AverageTime)) +@OutputTimeUnit(TimeUnit.NANOSECONDS) +@Fork(1) +@Warmup(iterations = 6) +@Measurement(iterations = 6) +@State(Scope.Benchmark) +class OldHashSet { + @Param(scala.Array("0", "1", "3", "8", "17", "282", "4096", "131070", "7312102")) + var size: Int = _ + + var xs: scala.collection.immutable.HashSet[Long] = _ + var ys: scala.collection.immutable.HashSet[Long] = _ + def fresh(n: Int) = scala.collection.immutable.HashSet((1 to n).map(_.toLong): _*) + + @Setup(Level.Trial) + def initTrial(): Unit = { + xs = fresh(size) + ys = xs.take(xs.size / 2) + } + + @Benchmark + def subsetOf(bh: Blackhole) = bh.consume(ys.subsetOf(xs)) + + @Benchmark + def equals(bh: Blackhole) = bh.consume(xs == ys) + + @Benchmark + def incl(bh: Blackhole) = bh.consume(xs + 42) + + @Benchmark + def excl(bh: Blackhole) = bh.consume(xs - 42) + + @Benchmark + def union(bh: Blackhole) = bh.consume(xs union ys) + + @Benchmark + def intersection(bh: Blackhole) = bh.consume(xs intersect ys) + +} diff --git a/collections/src/main/scala/strawman/collection/immutable/HashSet.scala b/collections/src/main/scala/strawman/collection/immutable/HashSet.scala index 832fb710c1..14dc30a0fd 100644 --- a/collections/src/main/scala/strawman/collection/immutable/HashSet.scala +++ b/collections/src/main/scala/strawman/collection/immutable/HashSet.scala @@ -5,9 +5,13 @@ package immutable import mutable.{Builder, ImmutableBuilder} import Hashing.computeHash -import scala.{Any, AnyRef, Array, Boolean, Int, NoSuchElementException, SerialVersionUID, Serializable, Unit, `inline`, sys, Some, None} -import scala.Predef.assert -import java.lang.Integer +import scala.{Any, AnyRef, Array, Boolean, Int, NoSuchElementException, None, SerialVersionUID, Serializable, Some, Unit, `inline`, sys} +import scala.Predef.{assert, intWrapper} +import java.lang.{Integer, System} + +import strawman.collection.generic.BitOperations + +import scala.annotation.tailrec /** This class implements immutable sets using a hash trie. * @@ -29,7 +33,7 @@ sealed abstract class HashSet[A] with StrictOptimizedIterableOps[A, HashSet, HashSet[A]] with Serializable { - import HashSet.nullToEmpty + import HashSet.{bufferSize, LeafHashSet, nullToEmpty} def iterableFactory = HashSet @@ -43,7 +47,47 @@ sealed abstract class HashSet[A] def excl(elem: A): HashSet[A] = nullToEmpty(removed0(elem, computeHash(elem), 0)) - override def empty: HashSet[A] = HashSet.empty + override def subsetOf(that: collection.Set[A]): Boolean = that match { + case that:HashSet[A] => + // call the specialized implementation with a level of 0 since both this and that are top-level hash sets + subsetOf0(that, 0) + case _ => + // call the generic implementation + super.subsetOf(that) + } + + override def concat(that: collection.Iterable[A]): HashSet[A] = that match { + case that: HashSet[A] => + val buffer = new Array[HashSet[A]](bufferSize(this.size + that.size)) + nullToEmpty(union0(that, 0, buffer, 0)) + case _ => super.concat(that) + } + + override def intersect(that: collection.Set[A]): HashSet[A] = that match { + case that: HashSet[A] => + val buffer = new Array[HashSet[A]](bufferSize(this.size min that.size)) + nullToEmpty(intersect0(that, 0, buffer, 0)) + case _ => super.intersect(that) + } + + override def diff(that: collection.Set[A]): HashSet[A] = that match { + case that: HashSet[A] => + val buffer = new Array[HashSet[A]](bufferSize(this.size)) + nullToEmpty(diff0(that, 0, buffer, 0)) + case _ => super.diff(that) + } + + override def filter(p: A => Boolean) = { + val buffer = new Array[HashSet[A]](bufferSize(size)) + nullToEmpty(filter0(p, false, 0, buffer, 0)) + } + + override def filterNot(p: A => Boolean) = { + val buffer = new Array[HashSet[A]](bufferSize(size)) + nullToEmpty(filter0(p, true, 0, buffer, 0)) + } + + def empty: HashSet[A] = HashSet.empty override def tail: HashSet[A] = this - head @@ -55,6 +99,60 @@ sealed abstract class HashSet[A] protected def removed0(key: A, hash: Int, level: Int): HashSet[A] + protected def filter0(p: A => Boolean, negate: Boolean, level: Int, buffer: Array[HashSet[A]], offset0: Int): HashSet[A] + + /** + * A specialized implementation of subsetOf for when both this and that are HashSet[A] and we can take advantage + * of the tree structure of both operands and the precalculated hashcodes of the HashSet1 instances. + * @param that the other set + * @param level the level of this and that hashset + * The purpose of level is to keep track of how deep we are in the tree. + * We need this information for when we arrive at a leaf and have to call get0 on that + * The value of level is 0 for a top-level HashSet and grows in increments of 5 + * @return true if all elements of this set are contained in that set + */ + protected def subsetOf0(that: HashSet[A], level: Int): Boolean + + /** + * Union with a leaf HashSet at a given level. + * @param that a leaf HashSet + * @param level the depth in the tree. We need this when we have to create a branch node on top of this and that + * @return The union of this and that at the given level. Unless level is zero, the result is not a self-contained + * HashSet but needs to be stored at the correct depth + */ + private[immutable] def union0(that: LeafHashSet[A], level: Int): HashSet[A] + + /** + * Union with a HashSet at a given level + * @param that a HashSet + * @param level the depth in the tree. We need to keep track of the level to know how deep we are in the tree + * @param buffer a temporary buffer that is used for temporarily storing elements when creating new branch nodes + * @param offset0 the first offset into the buffer in which we are allowed to write + * @return The union of this and that at the given level. Unless level is zero, the result is not a self-contained + * HashSet but needs to be stored at the correct depth + */ + protected def union0(that: HashSet[A], level: Int, buffer: Array[HashSet[A]], offset0: Int): HashSet[A] + + /** + * Intersection with another hash set at a given level + * @param level the depth in the tree. We need to keep track of the level to know how deep we are in the tree + * @param buffer a temporary buffer that is used for temporarily storing elements when creating new branch nodes + * @param offset0 the first offset into the buffer in which we are allowed to write + * @return The intersection of this and that at the given level. Unless level is zero, the result is not a + * self-contained HashSet but needs to be stored at the correct depth + */ + protected def intersect0(that: HashSet[A], level: Int, buffer: Array[HashSet[A]], offset0: Int): HashSet[A] + + /** + * Diff with another hash set at a given level + * @param level the depth in the tree. We need to keep track of the level to know how deep we are in the tree + * @param buffer a temporary buffer that is used for temporarily storing elements when creating new branch nodes + * @param offset0 the first offset into the buffer in which we are allowed to write + * @return The diff of this and that at the given level. Unless level is zero, the result is not a + * self-contained HashSet but needs to be stored at the correct depth + */ + protected def diff0(that: HashSet[A], level: Int, buffer: Array[HashSet[A]], offset0: Int): HashSet[A] + } /** @@ -95,10 +193,25 @@ object HashSet extends IterableFactory[HashSet] { protected def get0(elem: Any, hash: Int, level: Int) = false + protected def subsetOf0(that: HashSet[Any], level: Int): Boolean = { + // returns true because the empty set is a subset of all sets + true + } + protected def updated0(elem: Any, hash: Int, level: Int) = new HashSet1(elem, hash) protected def removed0(key: Any, hash: Int, level: Int) = this + private[immutable] def union0(that: LeafHashSet[Any], level: Int): HashSet[Any] = that + + protected def union0(that: HashSet[Any], level: Int, buffer: Array[HashSet[Any]], offset0: Int): HashSet[Any] = that + + protected def intersect0(that: HashSet[Any], level: Int, buffer: Array[HashSet[Any]], offset0: Int): HashSet[Any] = null + + protected def diff0(that: HashSet[Any], level: Int, buffer: Array[HashSet[Any]], offset0: Int): HashSet[Any] = null + + protected def filter0(p: Any => Boolean, negate: Boolean, level: Int, buffer: Array[HashSet[Any]], offset0: Int): HashSet[Any] = null + } /** @@ -143,6 +256,50 @@ object HashSet extends IterableFactory[HashSet] { protected def removed0(key: A, hash: Int, level: Int) = if (hash == this.hash && key == this.key) null else this + protected def subsetOf0(that: HashSet[A], level: Int): Boolean = { + // check if that contains this.key + // we use get0 with our key and hash at the correct level instead of calling contains, + // which would not work since that might not be a top-level HashSet + // and in any case would be inefficient because it would require recalculating the hash code + that.get0(key, hash, level) + } + + private[immutable] def union0(that: LeafHashSet[A], level: Int): HashSet[A] = that match { + case that if that.hash != this.hash => + // different hash code, so there is no need to investigate further. + // Just create a branch node containing the two. + makeHashTrieSet(this.hash, this, that.hash, that, level) + case that: HashSet1[A] => + if (this.key == that.key) { + this + } else { + // 32-bit hash collision (rare, but not impossible) + new HashSetCollision1[A](hash, ListSet.empty + this.key + that.key) + } + case that: HashSetCollision1[A] => + val ks1 = that.ks + key + // Could use eq check (faster) if ListSet was guaranteed to return itself + if (ks1.size == that.ks.size) { + that + } else { + new HashSetCollision1[A](hash, ks1) + } + } + + protected def union0(that: HashSet[A], level: Int, buffer: Array[HashSet[A]], offset0: Int) = { + // switch to the Leaf version of union + // we can exchange the arguments because union is symmetrical + that.union0(this, level) + } + + protected def intersect0(that: HashSet[A], level: Int, buffer: Array[HashSet[A]], offset0: Int) = + if (that.get0(key, hash, level)) this else null + + protected def diff0(that: HashSet[A], level: Int, buffer: Array[HashSet[A]], offset0: Int) = + if (that.get0(key, hash, level)) null else this + + protected def filter0(p: A => Boolean, negate: Boolean, level: Int, buffer: Array[HashSet[A]], offset0: Int): HashSet[A] = + if (negate ^ p(key)) this else null } private[immutable] final class HashSetCollision1[A](private[HashSet] val hash: Int, val ks: ListSet[A]) extends LeafHashSet[A] { @@ -194,8 +351,119 @@ object HashSet extends IterableFactory[HashSet] { //hash = computeHash(kvs.) } - } + protected def filter0(p: A => Boolean, negate: Boolean, level: Int, buffer: Array[HashSet[A]], offset0: Int): HashSet[A] = { + val ks1 = if(negate) ks.filterNot(p) else ks.filter(p) + ks1.size match { + case 0 => + null + case 1 => + new HashSet1(ks1.head, hash) + case x if x == ks.size => + this + case _ => + new HashSetCollision1(hash, ks1) + } + } + protected def subsetOf0(that: HashSet[A], level: Int): Boolean = { + // we have to check each element + // we use get0 with our hash at the correct level instead of calling contains, + // which would not work since that might not be a top-level HashSet + // and in any case would be inefficient because it would require recalculating the hash code + ks.forall(key => that.get0(key, hash, level)) + } + + private[immutable] def union0(that: LeafHashSet[A], level: Int) = that match { + case that if that.hash != this.hash => + // different hash code, so there is no need to investigate further. + // Just create a branch node containing the two. + makeHashTrieSet(this.hash, this, that.hash, that, level) + case that: HashSet1[A] => + val ks1 = ks + that.key + // Could use eq check (faster) if ListSet was guaranteed to return itself + if (ks1.size == ks.size) { + this + } else { + // create a new HashSetCollision with the existing hash + // we don't have to check for size=1 because union is never going to remove elements + new HashSetCollision1[A](hash, ks1) + } + case that: HashSetCollision1[A] => + val ks1 = this.ks ++ that.ks + ks1.size match { + case size if size == this.ks.size => + // could this check be made faster by doing an eq check? + // I am not sure we can rely on ListSet returning itself when all elements are already in the set, + // so it seems unwise to rely on it. + this + case size if size == that.ks.size => + // we have to check this as well, since we don't want to create a new instance if this is a subset of that + that + case _ => + // create a new HashSetCollision with the existing hash + // we don't have to check for size=1 because union is never going to remove elements + new HashSetCollision1[A](hash, ks1) + } + } + + protected def union0(that: HashSet[A], level: Int, buffer: Array[HashSet[A]], offset0: Int) = that match { + case that: LeafHashSet[A] => + // switch to the simpler Tree/Leaf implementation + this.union0(that, level) + case that: HashTrieSet[A] => + // switch to the simpler Tree/Leaf implementation + // we can swap this and that because union is symmetrical + that.union0(this, level) + case _ => this + } + + protected def intersect0(that: HashSet[A], level: Int, buffer: Array[HashSet[A]], offset0: Int) = { + // filter the keys, taking advantage of the fact that we know their hash code + val ks1 = ks.filter(that.get0(_, hash, level)) + ks1.size match { + case 0 => + // the empty set + null + case size if size == this.size => + // unchanged + // We do this check first since even if the result is of size 1 since + // it is preferable to return the existing set for better structural sharing + this + case size if size == that.size => + // the other set + // We do this check first since even if the result is of size 1 since + // it is preferable to return the existing set for better structural sharing + that + case 1 => + // create a new HashSet1 with the hash we already know + new HashSet1(ks1.head, hash) + case _ => + // create a new HashSetCollision with the hash we already know and the new keys + new HashSetCollision1(hash, ks1) + } + } + + protected def diff0(that: HashSet[A], level: Int, buffer: Array[HashSet[A]], offset0: Int) = { + val ks1 = ks.filterNot(that.get0(_, hash, level)) + ks1.size match { + case 0 => + // the empty set + null + case size if size == this.size => + // unchanged + // We do this check first since even if the result is of size 1 since + // it is preferable to return the existing set for better structural sharing + this + case 1 => + // create a new HashSet1 with the hash we already know + new HashSet1(ks1.head, hash) + case _ => + // create a new HashSetCollision with the hash we already know and the new keys + new HashSetCollision1(hash, ks1) + } + } + + } /** * A branch node of the HashTrieSet with at least one and up to 32 children. @@ -323,6 +591,373 @@ object HashSet extends IterableFactory[HashSet] { this } } + + private[immutable] def union0(that: LeafHashSet[A], level: Int): HashSet[A] = { + val index = (that.hash >>> level) & 0x1f + val mask = (1 << index) + val offset = Integer.bitCount(bitmap & (mask - 1)) + if ((bitmap & mask) != 0) { + val sub = elems(offset) + val sub1 = sub.union0(that, level + 5) + if (sub eq sub1) this + else { + val elems1 = new Array[HashSet[A]](elems.length) + Array.copy(elems, 0, elems1, 0, elems.length) + elems1(offset) = sub1 + new HashTrieSet(bitmap, elems1, size + (sub1.size - sub.size)) + } + } else { + val elems1 = new Array[HashSet[A]](elems.length + 1) + Array.copy(elems, 0, elems1, 0, offset) + elems1(offset) = that + Array.copy(elems, offset, elems1, offset + 1, elems.length - offset) + val bitmap1 = bitmap | mask + new HashTrieSet(bitmap1, elems1, size + that.size) + } + } + + protected def union0(that: HashSet[A], level: Int, buffer: Array[HashSet[A]], offset0: Int): HashSet[A] = that match { + case that if that eq this => + // shortcut for when that is this + // this happens often for nodes deeper in the tree, especially when that and this share a common "heritage" + // e.g. you have a large set A and do some small operations (adding and removing elements) to it to create B + // then A and B will have the vast majority of nodes in common, and this eq check will allow not even looking + // at these nodes. + this + case that: LeafHashSet[A] => + // when that is a leaf, we can switch to the simpler Tree/Leaf implementation + this.union0(that, level) + case that: HashTrieSet[A] => + val a = this.elems + var abm = this.bitmap + var ai = 0 + + val b = that.elems + var bbm = that.bitmap + var bi = 0 + + // fetch a new temporary array that is guaranteed to be big enough (32 elements) + var offset = offset0 + var rs = 0 + + // loop as long as there are bits left in either abm or bbm + while ((abm | bbm) != 0) { + // lowest remaining bit in abm + val alsb = abm ^ (abm & (abm - 1)) + // lowest remaining bit in bbm + val blsb = bbm ^ (bbm & (bbm - 1)) + if (alsb == blsb) { + val sub1 = a(ai).union0(b(bi), level + 5, buffer, offset) + rs += sub1.size + buffer(offset) = sub1 + offset += 1 + // clear lowest remaining one bit in abm and increase the a index + abm &= ~alsb + ai += 1 + // clear lowest remaining one bit in bbm and increase the b index + bbm &= ~blsb + bi += 1 + } else if (BitOperations.Int.unsignedCompare(alsb - 1, blsb - 1)) { + // alsb is smaller than blsb, or alsb is set and blsb is 0 + // in any case, alsb is guaranteed to be set here! + val sub1 = a(ai) + rs += sub1.size + buffer(offset) = sub1 + offset += 1 + // clear lowest remaining one bit in abm and increase the a index + abm &= ~alsb + ai += 1 + } else { + // blsb is smaller than alsb, or blsb is set and alsb is 0 + // in any case, blsb is guaranteed to be set here! + val sub1 = b(bi) + rs += sub1.size + buffer(offset) = sub1 + offset += 1 + // clear lowest remaining one bit in bbm and increase the b index + bbm &= ~blsb + bi += 1 + } + } + if (rs == this.size) { + // if the result would be identical to this, we might as well return this + this + } else if (rs == that.size) { + // if the result would be identical to that, we might as well return that + that + } else { + // we don't have to check whether the result is a leaf, since union will only make the set larger + // and this is not a leaf to begin with. + val length = offset - offset0 + val elems = new Array[HashSet[A]](length) + System.arraycopy(buffer, offset0, elems, 0, length) + new HashTrieSet(this.bitmap | that.bitmap, elems, rs) + } + case _ => this + } + + protected def intersect0(that: HashSet[A], level: Int, buffer: Array[HashSet[A]], offset0: Int): HashSet[A] = that match { + case that if that eq this => + // shortcut for when that is this + // this happens often for nodes deeper in the tree, especially when that and this share a common "heritage" + // e.g. you have a large set A and do some small operations (adding and removing elements) to it to create B + // then A and B will have the vast majority of nodes in common, and this eq check will allow not even looking + // at these nodes! + this + case that: LeafHashSet[A] => + // when that is a leaf, we can switch to the simpler Tree/Leaf implementation + // it is OK to swap the arguments because intersect is symmetric + // (we can't do this in case of diff, which is not symmetric) + that.intersect0(this, level, buffer, offset0) + case that: HashTrieSet[A] => + val a = this.elems + var abm = this.bitmap + var ai = 0 + + val b = that.elems + var bbm = that.bitmap + var bi = 0 + + // if the bitmasks do not overlap, the result is definitely empty so we can abort here + if ((abm & bbm) == 0) + return null + + // fetch a new temporary array that is guaranteed to be big enough (32 elements) + var offset = offset0 + var rs = 0 + var rbm = 0 + + // loop as long as there are bits left that are set in both abm and bbm + while ((abm & bbm) != 0) { + // highest remaining bit in abm + val alsb = abm ^ (abm & (abm - 1)) + // highest remaining bit in bbm + val blsb = bbm ^ (bbm & (bbm - 1)) + if (alsb == blsb) { + val sub1 = a(ai).intersect0(b(bi), level + 5, buffer, offset) + if (sub1 ne null) { + rs += sub1.size + rbm |= alsb + buffer(offset) = sub1 + offset += 1 + } + // clear lowest remaining one bit in abm and increase the a index + abm &= ~alsb + ai += 1 + // clear lowest remaining one bit in bbm and increase the b index + bbm &= ~blsb + bi += 1 + } else if (BitOperations.Int.unsignedCompare(alsb - 1, blsb - 1)) { + // alsb is smaller than blsb, or alsb is set and blsb is 0 + // in any case, alsb is guaranteed to be set here! + // clear lowest remaining one bit in abm and increase the a index + abm &= ~alsb + ai += 1 + } else { + // blsb is smaller than alsb, or blsb is set and alsb is 0 + // in any case, blsb is guaranteed to be set here! + // clear lowest remaining one bit in bbm and increase the b index + bbm &= ~blsb + bi += 1 + } + } + + if (rbm == 0) { + // if the result bitmap is empty, the result is the empty set + null + } else if (rs == size0) { + // if the result has the same number of elements as this, it must be identical to this, + // so we might as well return this + this + } else if (rs == that.size0) { + // if the result has the same number of elements as that, it must be identical to that, + // so we might as well return that + that + } else { + val length = offset - offset0 + if (length == 1 && !buffer(offset0).isInstanceOf[HashTrieSet[A]]) + buffer(offset0) + else { + val elems = new Array[HashSet[A]](length) + System.arraycopy(buffer, offset0, elems, 0, length) + new HashTrieSet[A](rbm, elems, rs) + } + } + case _ => null + } + + protected def diff0(that: HashSet[A], level: Int, buffer: Array[HashSet[A]], offset0: Int): HashSet[A] = that match { + case that if that eq this => + // shortcut for when that is this + // this happens often for nodes deeper in the tree, especially when that and this share a common "heritage" + // e.g. you have a large set A and do some small operations (adding and removing elements) to it to create B + // then A and B will have the vast majority of nodes in common, and this eq check will allow not even looking + // at these nodes! + null + case that: HashSet1[A] => + removed0(that.key, that.hash, level) + case that: HashTrieSet[A] => + val a = this.elems + var abm = this.bitmap + var ai = 0 + + val b = that.elems + var bbm = that.bitmap + var bi = 0 + + // fetch a new temporary array that is guaranteed to be big enough (32 elements) + var offset = offset0 + var rs = 0 + var rbm = 0 + + // loop until there are no more bits in abm + while(abm!=0) { + // highest remaining bit in abm + val alsb = abm ^ (abm & (abm - 1)) + // highest remaining bit in bbm + val blsb = bbm ^ (bbm & (bbm - 1)) + if (alsb == blsb) { + val sub1 = a(ai).diff0(b(bi), level + 5, buffer, offset) + if (sub1 ne null) { + rs += sub1.size + rbm |= alsb + buffer(offset) = sub1 + offset += 1 + } + // clear lowest remaining one bit in abm and increase the a index + abm &= ~alsb; ai += 1 + // clear lowest remaining one bit in bbm and increase the b index + bbm &= ~blsb; bi += 1 + } else if (BitOperations.Int.unsignedCompare(alsb - 1, blsb - 1)) { + // alsb is smaller than blsb, or alsb is set and blsb is 0 + // in any case, alsb is guaranteed to be set here! + val sub1 = a(ai) + rs += sub1.size + rbm |= alsb + buffer(offset) = sub1; offset += 1 + // clear lowest remaining one bit in abm and increase the a index + abm &= ~alsb; ai += 1 + } else { + // blsb is smaller than alsb, or blsb is set and alsb is 0 + // in any case, blsb is guaranteed to be set here! + // clear lowest remaining one bit in bbm and increase the b index + bbm &= ~blsb; bi += 1 + } + } + if (rbm == 0) { + null + } else if (rs == this.size0) { + // if the result has the same number of elements as this, it must be identical to this, + // so we might as well return this + this + } else { + val length = offset - offset0 + if (length == 1 && !buffer(offset0).isInstanceOf[HashTrieSet[A]]) + buffer(offset0) + else { + val elems = new Array[HashSet[A]](length) + System.arraycopy(buffer, offset0, elems, 0, length) + new HashTrieSet[A](rbm, elems, rs) + } + } + case that: HashSetCollision1[A] => + // we remove the elements using removed0 so we can use the fact that we know the hash of all elements + // to be removed + @tailrec def removeAll(s:HashSet[A], r:ListSet[A]) : HashSet[A] = + if(r.isEmpty || (s eq null)) s + else removeAll(s.removed0(r.head, that.hash, level), r.tail) + removeAll(this, that.ks) + case _ => this + } + + protected def subsetOf0(that: HashSet[A], level: Int): Boolean = if (that eq this) true else that match { + case that: HashTrieSet[A] if this.size0 <= that.size0 => + // create local mutable copies of members + var abm = this.bitmap + val a = this.elems + var ai = 0 + val b = that.elems + var bbm = that.bitmap + var bi = 0 + if ((abm & bbm) == abm) { + // I tried rewriting this using tail recursion, but the generated java byte code was less than optimal + while(abm!=0) { + // highest remaining bit in abm + val alsb = abm ^ (abm & (abm - 1)) + // highest remaining bit in bbm + val blsb = bbm ^ (bbm & (bbm - 1)) + // if both trees have a bit set at the same position, we need to check the subtrees + if (alsb == blsb) { + // we are doing a comparison of a child of this with a child of that, + // so we have to increase the level by 5 to keep track of how deep we are in the tree + if (!a(ai).subsetOf0(b(bi), level + 5)) + return false + // clear lowest remaining one bit in abm and increase the a index + abm &= ~alsb; ai += 1 + } + // clear lowermost remaining one bit in bbm and increase the b index + // we must do this in any case + bbm &= ~blsb; bi += 1 + } + true + } else { + // the bitmap of this contains more one bits than the bitmap of that, + // so this can not possibly be a subset of that + false + } + case _ => + // if the other set is a HashTrieSet but has less elements than this, it can not be a subset + // if the other set is a HashSet1, we can not be a subset of it because we are a HashTrieSet with at least two children (see assertion) + // if the other set is a HashSetCollision1, we can not be a subset of it because we are a HashTrieSet with at least two different hash codes + // if the other set is the empty set, we are not a subset of it because we are not empty + false + } + + protected def filter0(p: A => Boolean, negate: Boolean, level: Int, buffer: Array[HashSet[A]], offset0: Int): HashSet[A] = { + // current offset + var offset = offset0 + // result size + var rs = 0 + // bitmap for kept elems + var kept = 0 + // loop over all elements + var i = 0 + while (i < elems.length) { + val result = elems(i).filter0(p, negate, level + 5, buffer, offset) + if (result ne null) { + buffer(offset) = result + offset += 1 + // add the result size + rs += result.size + // mark the bit i as kept + kept |= (1 << i) + } + i += 1 + } + if (offset == offset0) { + // empty + null + } else if (rs == size0) { + // unchanged + this + } else if (offset == offset0 + 1 && !buffer(offset0).isInstanceOf[HashTrieSet[A]]) { + // leaf + buffer(offset0) + } else { + // we have to return a HashTrieSet + val length = offset - offset0 + val elems1 = new Array[HashSet[A]](length) + System.arraycopy(buffer, offset0, elems1, 0, length) + val bitmap1 = if (length == elems.length) { + // we can reuse the original bitmap + bitmap + } else { + // calculate new bitmap by keeping just bits in the kept bitmask + Hashing.keepBits(bitmap, kept) + } + new HashTrieSet(bitmap1, elems1, rs) + } + } } // utility method to create a HashTrieSet from two leaf HashSets (HashSet1 or HashSetCollision1) with non-colliding hash code) @@ -349,6 +984,13 @@ object HashSet extends IterableFactory[HashSet] { } } + /** + * Calculates the maximum buffer size given the maximum possible total size of the trie-based collection + * @param size the maximum size of the collection to be generated + * @return the maximum buffer size + */ + @`inline` private def bufferSize(size: Int): Int = (size + 6) min (32 * 7) + /** * In many internal operations the empty set is represented as null for performance reasons. This method converts * null to the empty set for use in public methods