Skip to content

Commit

Permalink
Make classpath hashing more lightweight
Browse files Browse the repository at this point in the history
And make it parallel!

This patch adds a cache that relies on filesystem metadata to cache
hashes for jars that have the same last modified time across different
compiler iterations. This is important because until now there was a
significant overhead when running `compile` on multi-module builds that
have gigantic classpaths. In this scenario, the previous algorithm
computed hashes for all jars transitively across all these projects.

This patch is conservative; there are several things that are wrong with
the status quo of classpath hashing. The most important one is the fact
that Zinc has been doing `hashCode` on a SHA-1 checksum, which doesn't
make sense. The second one is that we don't need a SHA-1 checksum for
the kind of checks we want to do. #371
explains why. The third limitation with this check is that file hashes
are implemented internally as `int`s, which is not enough to represent
the richness of the checksum. My previous PR also tackles this problem,
which will be solved in the long term.

Therefore, this pull request only tackles these two things:

* Caching of classpath entry hashes.
* Parallelize this IO-bound task.

Results, on my local machine:

- No parallel hashing of the first 500 jars in my ivy cache: 1330ms.
- Parallel hashing of the first 500 jars in my ivy cache: 770ms.
- Second parallel hashing of the first 500 jars in my ivy cache: 1ms.

Fixes #433.
  • Loading branch information
jvican committed Nov 10, 2017
1 parent 8050289 commit a90a0e9
Show file tree
Hide file tree
Showing 3 changed files with 105 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ package inc

import java.io.File
import java.lang.ref.{ Reference, SoftReference }
import java.nio.file.Files
import java.util.Optional

import inc.javac.AnalyzingJavaCompiler
Expand All @@ -20,6 +21,7 @@ import xsbti.compile._
import sbt.io.IO
import sbt.util.{ InterfaceUtil, Logger }
import sbt.internal.inc.JavaInterfaceUtil.EnrichOption
import sbt.internal.inc.caching.ClasspathCache
import xsbti.compile.ClassFileManager

/** An instance of an analyzing compiler that can run both javac + scalac. */
Expand Down Expand Up @@ -181,13 +183,11 @@ object MixedAnalyzingCompiler {
incrementalCompilerOptions: IncOptions,
extra: List[(String, String)]
): CompileConfiguration = {
val classpathHash = classpath map { x =>
FileHash.of(x, Stamper.forHash(x).hashCode)
}
val classpathHash = ClasspathCache.hashClasspath(classpath)
val compileSetup = MiniSetup.of(
output,
MiniOptions.of(
classpathHash.toArray,
classpathHash,
options.toArray,
javacOptions.toArray
),
Expand Down
43 changes: 43 additions & 0 deletions zinc/src/main/scala/sbt/internal/inc/caching/ClasspathCache.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
package sbt.internal.inc.caching

import java.io.File
import java.nio.file.Files
import java.util.concurrent.ConcurrentHashMap
import java.nio.file.attribute.{ BasicFileAttributes, FileTime }

import xsbti.compile.FileHash
import sbt.internal.inc.{ EmptyStamp, Stamper }

object ClasspathCache {
// For more safety, store both the time and size
private type JarMetadata = (FileTime, Long)
private[this] val cacheMetadataJar = new ConcurrentHashMap[File, (JarMetadata, FileHash)]()
private[this] final val emptyStampCode = EmptyStamp.hashCode()
private def emptyFileHash(file: File) = FileHash.of(file, emptyStampCode)
private def genFileHash(file: File, metadata: JarMetadata): FileHash = {
val newHash = FileHash.of(file, Stamper.forHash(file).hashCode())
cacheMetadataJar.put(file, (metadata, newHash))
newHash
}

def hashClasspath(classpath: Seq[File]): Array[FileHash] = {
// #433: Cache jars with their metadata to avoid recomputing hashes transitively in other projects
def fromCacheOrHash(file: File): FileHash = {
if (!file.exists()) emptyFileHash(file)
else {
// `readAttributes` needs to be guarded by `file.exists()`, otherwise it fails
val attrs = Files.readAttributes(file.toPath, classOf[BasicFileAttributes])
if (attrs.isDirectory) emptyFileHash(file)
else {
val currentMetadata = (attrs.lastModifiedTime(), attrs.size())
Option(cacheMetadataJar.get(file)) match {
case Some((metadata, hashHit)) if metadata == currentMetadata => hashHit
case None => genFileHash(file, currentMetadata)
}
}
}
}

classpath.toParArray.map(fromCacheOrHash).toArray
}
}
58 changes: 58 additions & 0 deletions zinc/src/test/scala/sbt/inc/cached/CachedHashingSpec.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
package sbt.inc.cached

import java.nio.file.Paths

import sbt.inc.{ BaseCompilerSpec, SourceFiles }
import sbt.internal.inc.{ Analysis, CompileOutput, MixedAnalyzingCompiler }
import sbt.io.IO

class CachedHashingSpec extends BaseCompilerSpec {
def timeMs[R](block: => R): Long = {
val t0 = System.nanoTime()
block // call-by-name
val t1 = System.nanoTime()
(t1 - t0) / 1000000
}

"zinc" should "cache jar generation" in {
IO.withTemporaryDirectory { tempDir =>
val classes = Seq(SourceFiles.Good)
val sources0 = Map(Paths.get("src") -> classes.map(path => Paths.get(path)))
val projectSetup = ProjectSetup(tempDir.toPath(), sources0, Nil)
val compiler = projectSetup.createCompiler()

import compiler.in.{ setup, options, compilers, previousResult }
import sbt.internal.inc.JavaInterfaceUtil._
import sbt.io.syntax.{ file, fileToRichFile, singleFileFinder }

val javac = compilers.javaTools.javac
val scalac = compilers.scalac
val giganticClasspath = file(sys.props("user.home"))./(".ivy2").**("*.jar").get.take(500)

def genConfig = MixedAnalyzingCompiler.makeConfig(
scalac,
javac,
options.sources,
giganticClasspath,
CompileOutput(options.classesDirectory),
setup.cache,
setup.progress.toOption,
options.scalacOptions,
options.javacOptions,
Analysis.empty,
previousResult.setup.toOption,
setup.perClasspathEntryLookup,
setup.reporter,
options.order,
setup.skip,
setup.incrementalCompilerOptions,
setup.extra.toList.map(_.toScalaTuple)
)

val hashingTime = timeMs(genConfig)
val cachedHashingTime = timeMs(genConfig)
assert(cachedHashingTime < (hashingTime * 0.20),
s"Cache jar didn't work: $cachedHashingTime is >= than 20% of $hashingTime.")
}
}
}

0 comments on commit a90a0e9

Please sign in to comment.