This repository has been archived by the owner on Dec 22, 2021. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 72
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Reimplementations of immutable HashSet and HashMap.
The reimplementations are based upon Compressed Hash-Array Mapped Prefix-trees (CHAMP), see paper "Optimizing Hash-Array Mapped Tries for Fast and Lean Immutable JVM Collections" by Steindorfer and Vinju (OOPSLA'15) for more details and descriptions of low-level performance optimizations (a pre-print of the paper is available under https://michael.steindorfer.name/publications/oopsla15.pdf). This commit closes #192. The new implementations (i.e., ChampHashSet and ChampHashMap) currently exist next to the previous HashMap and HashSet. By default immutable.Map and immutable.Set now pickup the CHAMP data structures. A JVM flag (-Dstrawman.collection.immutable.useBaseline=true) allows to switch back to the previous HashSet and HashMap implementations for testing. Note, the flag and the previous HashSet and HashMap implementations will be removed in the final version of collection-strawman, but for the time being they remain to support comparing the different trade-offs and performance characteristics of the current and the new data structures. Preliminary performance numbers of the new CHAMP data structures were presented in issue #192. Overall one can summarize that the CHAMP data structures significantly lower memory footprints and significantly improve all iteration-based operations and equality checks. Basic operations such as lookup, insertion, and deletion may slow down. The current state of the reimplementation does not optimize for hash-collisions yet. Note that the CHAMP design / implementation differs from the previous immutable hashed data structures by not memoizing the hash codes of the individual elements (which may change the performance of certain workloads). If necessary, CHAMP's design allows to modularly add memoized hash codes of the individual elements (at the expense of some memory savings). Details are discussed in the paper mentioned above.
- Loading branch information
1 parent
2eb7f23
commit 566b143
Showing
15 changed files
with
2,429 additions
and
20 deletions.
There are no files selected for viewing
167 changes: 167 additions & 0 deletions
167
benchmarks/time/src/main/scala/strawman/collection/immutable/ChampHashSetBenchmark.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,167 @@ | ||
package strawman.collection.immutable | ||
|
||
import java.util.concurrent.TimeUnit | ||
|
||
import org.openjdk.jmh.annotations._ | ||
import org.openjdk.jmh.infra.Blackhole | ||
|
||
@BenchmarkMode(scala.Array(Mode.AverageTime)) | ||
@OutputTimeUnit(TimeUnit.NANOSECONDS) | ||
@Fork(1) | ||
@Warmup(iterations = 8) | ||
@Measurement(iterations = 8) | ||
@State(Scope.Benchmark) | ||
class ChampHashSetBenchmark { | ||
@Param(scala.Array("0", "1", "2", "3", "4", "7", "8", "15", "16", "17", "39", "282", "4096", "131070", "7312102")) | ||
var size: Int = _ | ||
|
||
var xs: ChampHashSet[Long] = _ | ||
var ys: ChampHashSet[Long] = _ | ||
var zs: ChampHashSet[Long] = _ | ||
var zipped: ChampHashSet[(Long, Long)] = _ | ||
var randomIndices: scala.Array[Int] = _ | ||
def fresh(n: Int) = ChampHashSet((1 to n).map(_.toLong): _*) | ||
|
||
@Setup(Level.Trial) | ||
def initTrial(): Unit = { | ||
xs = fresh(size) | ||
ys = fresh(size) | ||
zs = fresh((size / 1000) max 2).map(-_) | ||
zipped = xs.map(x => (x, x)) | ||
if (size > 0) { | ||
randomIndices = scala.Array.fill(1000)(scala.util.Random.nextInt(size)) | ||
} | ||
} | ||
|
||
@Benchmark | ||
def create(bh: Blackhole): Unit = bh.consume(fresh(size)) | ||
|
||
@Benchmark | ||
@OperationsPerInvocation(1000) | ||
def expand_incl(bh: Blackhole): Unit = { | ||
var ys = xs | ||
var i = 0L | ||
while (i < 1000) { | ||
ys += -i | ||
i += 1 | ||
} | ||
bh.consume(ys) | ||
} | ||
|
||
@Benchmark | ||
def expand_concat(bh: Blackhole): Unit = bh.consume(xs ++ zs) | ||
|
||
@Benchmark | ||
def traverse_foreach(bh: Blackhole): Unit = xs.foreach(x => bh.consume(x)) | ||
|
||
@Benchmark | ||
def traverse_headTail(bh: Blackhole): Unit = { | ||
var ys = xs | ||
while (ys.nonEmpty) { | ||
bh.consume(ys.head) | ||
ys = ys.tail | ||
} | ||
} | ||
|
||
@Benchmark | ||
def traverse_initLast(bh: Blackhole): Unit = { | ||
var ys = xs | ||
while (ys.nonEmpty) { | ||
bh.consume(ys.last) | ||
ys = ys.init | ||
} | ||
} | ||
|
||
@Benchmark | ||
def traverse_iterator(bh: Blackhole): Unit = { | ||
val it = xs.iterator() | ||
while (it.hasNext) { | ||
bh.consume(it.next()) | ||
} | ||
} | ||
|
||
@Benchmark | ||
def traverse_foldLeft(bh: Blackhole): Unit = bh.consume(xs.foldLeft(0) { | ||
case (acc, n) => | ||
bh.consume(n) | ||
acc + 1 | ||
}) | ||
|
||
@Benchmark | ||
def traverse_foldRight(bh: Blackhole): Unit = bh.consume(xs.foldRight(0) { | ||
case (n, acc) => | ||
bh.consume(n) | ||
acc - 1 | ||
}) | ||
@Benchmark | ||
def access_tail(bh: Blackhole): Unit = bh.consume(xs.tail) | ||
|
||
@Benchmark | ||
def access_init(bh: Blackhole): Unit = bh.consume(xs.init) | ||
|
||
@Benchmark | ||
@OperationsPerInvocation(100) | ||
def access_slice(bh: Blackhole): Unit = { | ||
var i = 0 | ||
while (i < 100) { | ||
bh.consume(xs.slice(size - size / (i + 1), size)) | ||
i += 1 | ||
} | ||
} | ||
|
||
@Benchmark | ||
@OperationsPerInvocation(1000) | ||
def access_contains(bh: Blackhole): Unit = { | ||
var i = 0 | ||
while (i < 1000) { | ||
bh.consume(xs.contains(i)) | ||
i += 1 | ||
} | ||
} | ||
|
||
@Benchmark | ||
def transform_map(bh: Blackhole): Unit = bh.consume(xs.map(x => x + 1)) | ||
|
||
@Benchmark | ||
@OperationsPerInvocation(100) | ||
def transform_span(bh: Blackhole): Unit = { | ||
var i = 0 | ||
while (i < 100) { | ||
val (xs1, xs2) = xs.span(x => x < randomIndices(i)) | ||
bh.consume(xs1) | ||
bh.consume(xs2) | ||
i += 1 | ||
} | ||
} | ||
|
||
@Benchmark | ||
def transform_zip(bh: Blackhole): Unit = bh.consume(xs.zip(xs)) | ||
|
||
@Benchmark | ||
def transform_zipMapTupled(bh: Blackhole): Unit = { | ||
val f = (a: Long, b: Long) => (a, b) | ||
bh.consume(xs.zip(xs).map(f.tupled)) | ||
} | ||
|
||
@Benchmark | ||
def transform_zipWithIndex(bh: Blackhole): Unit = bh.consume(xs.zipWithIndex) | ||
|
||
@Benchmark | ||
def transform_lazyZip(bh: Blackhole): Unit = bh.consume(xs.lazyZip(xs).map((_, _))) | ||
|
||
@Benchmark | ||
def transform_unzip(bh: Blackhole): Unit = bh.consume(zipped.unzip) | ||
|
||
@Benchmark | ||
def transform_groupBy(bh: Blackhole): Unit = { | ||
val result = xs.groupBy(_ % 5) | ||
bh.consume(result) | ||
} | ||
|
||
@Benchmark | ||
def traverse_subsetOf(bh: Blackhole): Unit = bh.consume(ys.subsetOf(xs)) | ||
|
||
@Benchmark | ||
def traverse_equals(bh: Blackhole): Unit = bh.consume(xs == ys) | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.