Skip to content
This repository has been archived by the owner on Dec 22, 2021. It is now read-only.

Commit

Permalink
Reimplementations of immutable HashSet and HashMap.
Browse files Browse the repository at this point in the history
The reimplementations are based upon Compressed Hash-Array Mapped Prefix-trees (CHAMP), see paper "Optimizing Hash-Array Mapped Tries for Fast and Lean Immutable JVM Collections" by Steindorfer and Vinju (OOPSLA'15) for more details and descriptions of low-level performance optimizations (a pre-print of the paper is available under https://michael.steindorfer.name/publications/oopsla15.pdf). This commit closes #192.

The new implementations (i.e., ChampHashSet and ChampHashMap) currently exist next to the previous HashMap and HashSet. By default immutable.Map and immutable.Set now pickup the CHAMP data structures. A JVM flag (-Dstrawman.collection.immutable.useBaseline=true) allows to switch back to the previous HashSet and HashMap implementations for testing. Note, the flag and the previous HashSet and HashMap implementations will be removed in the final version of collection-strawman, but for the time being they remain to support comparing the different trade-offs and performance characteristics of the current and the new data structures.

Preliminary performance numbers of the new CHAMP data structures were presented in issue #192. Overall one can summarize that the CHAMP data structures significantly lower memory footprints and significantly improve all iteration-based operations and equality checks. Basic operations such as lookup, insertion, and deletion may slow down. The current state of the reimplementation does not optimize for hash-collisions yet.

Note that the CHAMP design / implementation differs from the previous immutable hashed data structures by not memoizing the hash codes of the individual elements (which may change the performance of certain workloads). If necessary, CHAMP's design allows to modularly add memoized hash codes of the individual elements (at the expense of some memory savings). Details are discussed in the paper mentioned above.
  • Loading branch information
msteindorfer committed Feb 3, 2018
1 parent 2eb7f23 commit 566b143
Show file tree
Hide file tree
Showing 15 changed files with 2,429 additions and 20 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
package strawman.collection.immutable

import java.util.concurrent.TimeUnit

import org.openjdk.jmh.annotations._
import org.openjdk.jmh.infra.Blackhole

@BenchmarkMode(scala.Array(Mode.AverageTime))
@OutputTimeUnit(TimeUnit.NANOSECONDS)
@Fork(1)
@Warmup(iterations = 8)
@Measurement(iterations = 8)
@State(Scope.Benchmark)
class ChampHashSetBenchmark {
@Param(scala.Array("0", "1", "2", "3", "4", "7", "8", "15", "16", "17", "39", "282", "4096", "131070", "7312102"))
var size: Int = _

var xs: ChampHashSet[Long] = _
var ys: ChampHashSet[Long] = _
var zs: ChampHashSet[Long] = _
var zipped: ChampHashSet[(Long, Long)] = _
var randomIndices: scala.Array[Int] = _
def fresh(n: Int) = ChampHashSet((1 to n).map(_.toLong): _*)

@Setup(Level.Trial)
def initTrial(): Unit = {
xs = fresh(size)
ys = fresh(size)
zs = fresh((size / 1000) max 2).map(-_)
zipped = xs.map(x => (x, x))
if (size > 0) {
randomIndices = scala.Array.fill(1000)(scala.util.Random.nextInt(size))
}
}

@Benchmark
def create(bh: Blackhole): Unit = bh.consume(fresh(size))

@Benchmark
@OperationsPerInvocation(1000)
def expand_incl(bh: Blackhole): Unit = {
var ys = xs
var i = 0L
while (i < 1000) {
ys += -i
i += 1
}
bh.consume(ys)
}

@Benchmark
def expand_concat(bh: Blackhole): Unit = bh.consume(xs ++ zs)

@Benchmark
def traverse_foreach(bh: Blackhole): Unit = xs.foreach(x => bh.consume(x))

@Benchmark
def traverse_headTail(bh: Blackhole): Unit = {
var ys = xs
while (ys.nonEmpty) {
bh.consume(ys.head)
ys = ys.tail
}
}

@Benchmark
def traverse_initLast(bh: Blackhole): Unit = {
var ys = xs
while (ys.nonEmpty) {
bh.consume(ys.last)
ys = ys.init
}
}

@Benchmark
def traverse_iterator(bh: Blackhole): Unit = {
val it = xs.iterator()
while (it.hasNext) {
bh.consume(it.next())
}
}

@Benchmark
def traverse_foldLeft(bh: Blackhole): Unit = bh.consume(xs.foldLeft(0) {
case (acc, n) =>
bh.consume(n)
acc + 1
})

@Benchmark
def traverse_foldRight(bh: Blackhole): Unit = bh.consume(xs.foldRight(0) {
case (n, acc) =>
bh.consume(n)
acc - 1
})
@Benchmark
def access_tail(bh: Blackhole): Unit = bh.consume(xs.tail)

@Benchmark
def access_init(bh: Blackhole): Unit = bh.consume(xs.init)

@Benchmark
@OperationsPerInvocation(100)
def access_slice(bh: Blackhole): Unit = {
var i = 0
while (i < 100) {
bh.consume(xs.slice(size - size / (i + 1), size))
i += 1
}
}

@Benchmark
@OperationsPerInvocation(1000)
def access_contains(bh: Blackhole): Unit = {
var i = 0
while (i < 1000) {
bh.consume(xs.contains(i))
i += 1
}
}

@Benchmark
def transform_map(bh: Blackhole): Unit = bh.consume(xs.map(x => x + 1))

@Benchmark
@OperationsPerInvocation(100)
def transform_span(bh: Blackhole): Unit = {
var i = 0
while (i < 100) {
val (xs1, xs2) = xs.span(x => x < randomIndices(i))
bh.consume(xs1)
bh.consume(xs2)
i += 1
}
}

@Benchmark
def transform_zip(bh: Blackhole): Unit = bh.consume(xs.zip(xs))

@Benchmark
def transform_zipMapTupled(bh: Blackhole): Unit = {
val f = (a: Long, b: Long) => (a, b)
bh.consume(xs.zip(xs).map(f.tupled))
}

@Benchmark
def transform_zipWithIndex(bh: Blackhole): Unit = bh.consume(xs.zipWithIndex)

@Benchmark
def transform_lazyZip(bh: Blackhole): Unit = bh.consume(xs.lazyZip(xs).map((_, _)))

@Benchmark
def transform_unzip(bh: Blackhole): Unit = bh.consume(zipped.unzip)

@Benchmark
def transform_groupBy(bh: Blackhole): Unit = {
val result = xs.groupBy(_ % 5)
bh.consume(result)
}

@Benchmark
def traverse_subsetOf(bh: Blackhole): Unit = bh.consume(ys.subsetOf(xs))

@Benchmark
def traverse_equals(bh: Blackhole): Unit = bh.consume(xs == ys)

}
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ class HashSetBenchmark {
var size: Int = _

var xs: HashSet[Long] = _
var ys: HashSet[Long] = _
var zs: HashSet[Long] = _
var zipped: HashSet[(Long, Long)] = _
var randomIndices: scala.Array[Int] = _
Expand All @@ -27,6 +28,7 @@ class HashSetBenchmark {
@Setup(Level.Trial)
def initTrial(): Unit = {
xs = fresh(size)
ys = fresh(size)
zs = fresh((size / 1000) max 2).map(-_)
zipped = xs.map(x => (x, x))
if (size > 0) {
Expand Down Expand Up @@ -64,14 +66,15 @@ class HashSetBenchmark {
}
}

@Benchmark
def traverse_initLast(bh: Blackhole): Unit = {
var ys = xs
while (ys.nonEmpty) {
bh.consume(ys.last)
ys = ys.init
}
}
// // TODO: currently disabled, since it does not finish
// @Benchmark
// def traverse_initLast(bh: Blackhole): Unit = {
// var ys = xs
// while (ys.nonEmpty) {
// bh.consume(ys.last)
// ys = ys.init
// }
// }

@Benchmark
def traverse_iterator(bh: Blackhole): Unit = {
Expand Down Expand Up @@ -158,4 +161,11 @@ class HashSetBenchmark {
val result = xs.groupBy(_ % 5)
bh.consume(result)
}

@Benchmark
def traverse_subsetOf(bh: Blackhole): Unit = bh.consume(ys.subsetOf(xs))

@Benchmark
def traverse_equals(bh: Blackhole): Unit = bh.consume(xs == ys)

}
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ class ScalaHashSetBenchmark {
var size: Int = _

var xs: scala.collection.immutable.HashSet[Long] = _
var ys: scala.collection.immutable.HashSet[Long] = _
var zs: scala.collection.immutable.HashSet[Long] = _
var zipped: scala.collection.immutable.HashSet[(Long, Long)] = _
var randomIndices: scala.Array[Int] = _
Expand All @@ -27,6 +28,7 @@ class ScalaHashSetBenchmark {
@Setup(Level.Trial)
def initTrial(): Unit = {
xs = fresh(size)
ys = fresh(size)
zs = fresh((size / 1000) max 2).map(-_)
zipped = xs.map(x => (x, x))
if (size > 0) {
Expand Down Expand Up @@ -64,14 +66,15 @@ class ScalaHashSetBenchmark {
}
}

@Benchmark
def traverse_initLast(bh: Blackhole): Unit = {
var ys = xs
while (ys.nonEmpty) {
bh.consume(ys.last)
ys = ys.init
}
}
// // TODO: currently disabled, since it does not finish
// @Benchmark
// def traverse_initLast(bh: Blackhole): Unit = {
// var ys = xs
// while (ys.nonEmpty) {
// bh.consume(ys.last)
// ys = ys.init
// }
// }

@Benchmark
def traverse_iterator(bh: Blackhole): Unit = {
Expand Down Expand Up @@ -158,4 +161,11 @@ class ScalaHashSetBenchmark {
val result = xs.groupBy(_ % 5)
bh.consume(result)
}

@Benchmark
def traverse_subsetOf(bh: Blackhole): Unit = bh.consume(ys.subsetOf(xs))

@Benchmark
def traverse_equals(bh: Blackhole): Unit = bh.consume(xs == ys)

}
1 change: 1 addition & 0 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ val commonSettings = Seq(
<developer><id>odersky</id><name>Martin Odersky</name></developer>
<developer><id>julienrf</id><name>Julien Richard-Foy</name></developer>
<developer><id>szeiger</id><name>Stefan Zeiger</name></developer>
<developer><id>msteindorfer</id><name>Michael J. Steindorfer</name></developer>
</developers>,
// For publishing snapshots
credentials ++= (
Expand Down
Loading

0 comments on commit 566b143

Please sign in to comment.