Skip to content

Commit

Permalink
Make classpath hashing more lightweight
Browse files Browse the repository at this point in the history
And make it parallel!

This patch adds a cache that relies on filesystem metadata to cache
hashes for jars that have the same last modified time across different
compiler iterations. This is important because until now there was a
significant overhead when running `compile` on multi-module builds that
have gigantic classpaths. In this scenario, the previous algorithm
computed hashes for all jars transitively across all these projects.

This patch is conservative; there are several things that are wrong with
the status quo of classpath hashing. The most important one is the fact
that Zinc has been doing `hashCode` on a SHA-1 checksum, which doesn't
make sense. The second one is that we don't need a SHA-1 checksum for
the kind of checks we want to do. #371
explains why. The third limitation with this check is that file hashes
are implemented internally as `int`s, which is not enough to represent
the richness of the checksum. My previous PR also tackles this problem,
which will be solved in the long term.

Therefore, this pull request only tackles these two things:

* Caching of classpath entry hashes.
* Parallelize this IO-bound task.

Results, on my local machine:

- No parallel hashing of the first 500 jars in my ivy cache: 1330ms.
- Parallel hashing of the first 500 jars in my ivy cache: 770ms.
- Second parallel hashing of the first 500 jars in my ivy cache: 1ms.

Fixes #433.
  • Loading branch information
jvican committed Nov 9, 2017
1 parent 8050289 commit 1297263
Show file tree
Hide file tree
Showing 2 changed files with 91 additions and 3 deletions.
35 changes: 32 additions & 3 deletions zinc/src/main/scala/sbt/internal/inc/MixedAnalyzingCompiler.scala
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,10 @@ package inc

import java.io.File
import java.lang.ref.{ Reference, SoftReference }
import java.nio.file.Files
import java.nio.file.attribute.{ BasicFileAttributes, FileTime }
import java.util.Optional
import java.util.concurrent.ConcurrentHashMap

import inc.javac.AnalyzingJavaCompiler
import xsbti.{ Reporter, AnalysisCallback => XAnalysisCallback }
Expand Down Expand Up @@ -162,6 +165,17 @@ final class MixedAnalyzingCompiler(
* of cross Java-Scala compilation.
*/
object MixedAnalyzingCompiler {
private type JarMetadata = (FileTime, Long)
// Using paths instead of files as key because they have more lightweight memory consumption
private[this] val cacheMetadataJar = new ConcurrentHashMap[File, (JarMetadata, FileHash)]()
private[this] final val emptyStampCode = EmptyStamp.hashCode()
private def emptyFileHash(file: File) = FileHash.of(file, emptyStampCode)
private def genFileHash(file: File, metadata: JarMetadata): FileHash = {
val newHash = FileHash.of(file, Stamper.forHash(file).hashCode())
cacheMetadataJar.put(file, (metadata, newHash))
newHash
}

def makeConfig(
scalac: xsbti.compile.ScalaCompiler,
javac: xsbti.compile.JavaCompiler,
Expand All @@ -181,13 +195,28 @@ object MixedAnalyzingCompiler {
incrementalCompilerOptions: IncOptions,
extra: List[(String, String)]
): CompileConfiguration = {
val classpathHash = classpath map { x =>
FileHash.of(x, Stamper.forHash(x).hashCode)
// #433: Cache jars with their metadata to avoid recomputing hashes transitively in other projects
val parallelClasspathHashing = classpath.toParArray.map { file =>
if (!file.exists()) emptyFileHash(file)
else {
// `readAttributes` needs to be guarded by `file.exists()`, otherwise it fails
val attrs = Files.readAttributes(file.toPath, classOf[BasicFileAttributes])
if (attrs.isDirectory) emptyFileHash(file)
else {
val currentMetadata = (attrs.lastModifiedTime(), attrs.size())
Option(cacheMetadataJar.get(file)) match {
case Some((metadata, hashHit)) if metadata == currentMetadata => hashHit
case None => genFileHash(file, currentMetadata)
}
}
}
}

val classpathHash = parallelClasspathHashing.toArray
val compileSetup = MiniSetup.of(
output,
MiniOptions.of(
classpathHash.toArray,
classpathHash,
options.toArray,
javacOptions.toArray
),
Expand Down
59 changes: 59 additions & 0 deletions zinc/src/test/scala/sbt/inc/cached/CachedHashingSpec.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
package sbt.inc.cached

import java.nio.file.Paths

import sbt.inc.{ BaseCompilerSpec, SourceFiles }
import sbt.internal.inc.{ Analysis, CompileOutput, MixedAnalyzingCompiler }
import sbt.io.IO

class CachedHashingSpec extends BaseCompilerSpec {
def timeMs[R](block: => R): Long = {
val t0 = System.nanoTime()
block // call-by-name
val t1 = System.nanoTime()
(t1 - t0) / 1000000
}

"zinc" should "cache jar generation" in {
IO.withTemporaryDirectory { tempDir =>
val classes = Seq(SourceFiles.Good)
val sources0 = Map(Paths.get("src") -> classes.map(path => Paths.get(path)))
val projectSetup = ProjectSetup(tempDir.toPath(), sources0, Nil)
val compiler = projectSetup.createCompiler()

import compiler.in.{ setup, options, compilers, previousResult }
import sbt.internal.inc.JavaInterfaceUtil._
import sbt.io.syntax.{ file, fileToRichFile, singleFileFinder }

val javac = compilers.javaTools.javac
val scalac = compilers.scalac
val giganticClasspath =
file(sys.props("user.home"))./(".ivy2").**("*.jar").get.take(500)

def genConfig = MixedAnalyzingCompiler.makeConfig(
scalac,
javac,
options.sources,
giganticClasspath,
CompileOutput(options.classesDirectory),
setup.cache,
setup.progress.toOption,
options.scalacOptions,
options.javacOptions,
Analysis.empty,
previousResult.setup.toOption,
setup.perClasspathEntryLookup,
setup.reporter,
options.order,
setup.skip,
setup.incrementalCompilerOptions,
setup.extra.toList.map(_.toScalaTuple)
)

val hashingTime = timeMs(genConfig)
val cachedHashingTime = timeMs(genConfig)
assert(cachedHashingTime < (hashingTime * 0.05),
s"Cache jar didn't work: $cachedHashingTime is >= than 5% of $hashingTime.")
}
}
}

0 comments on commit 1297263

Please sign in to comment.