From 07aa2c9417668335703a18646a9ad18dacb618df Mon Sep 17 00:00:00 2001 From: jvican Date: Thu, 9 Nov 2017 16:42:55 +0100 Subject: [PATCH] Make classpath hashing more lightweight And make it parallel! This patch adds a cache that relies on filesystem metadata to cache hashes for jars that have the same last modified time across different compiler iterations. This is important because until now there was a significant overhead when running `compile` on multi-module builds that have gigantic classpaths. In this scenario, the previous algorithm computed hashes for all jars transitively across all these projects. This patch is conservative; there are several things that are wrong with the status quo of classpath hashing. The most important one is the fact that Zinc has been doing `hashCode` on a SHA-1 checksum, which doesn't make sense. The second one is that we don't need a SHA-1 checksum for the kind of checks we want to do. https://github.com/sbt/zinc/pull/371 explains why. The third limitation with this check is that file hashes are implemented internally as `int`s, which is not enough to represent the richness of the checksum. My previous PR also tackles this problem, which will be solved in the long term. Therefore, this pull request only tackles these two things: * Caching of classpath entry hashes. * Parallelize this IO-bound task. Results, on my local machine: - No parallel hashing of the first 500 jars in my ivy cache: 1330ms. - Parallel hashing of the first 500 jars in my ivy cache: 770ms. - Second parallel hashing of the first 500 jars in my ivy cache: 1ms. Fixes #433. --- .../internal/inc/MixedAnalyzingCompiler.scala | 35 ++++++++++- .../sbt/inc/cached/CachedHashingSpec.scala | 58 +++++++++++++++++++ 2 files changed, 90 insertions(+), 3 deletions(-) create mode 100644 zinc/src/test/scala/sbt/inc/cached/CachedHashingSpec.scala diff --git a/zinc/src/main/scala/sbt/internal/inc/MixedAnalyzingCompiler.scala b/zinc/src/main/scala/sbt/internal/inc/MixedAnalyzingCompiler.scala index df0952f9c5..344b9d2fae 100644 --- a/zinc/src/main/scala/sbt/internal/inc/MixedAnalyzingCompiler.scala +++ b/zinc/src/main/scala/sbt/internal/inc/MixedAnalyzingCompiler.scala @@ -11,7 +11,10 @@ package inc import java.io.File import java.lang.ref.{ Reference, SoftReference } +import java.nio.file.Files +import java.nio.file.attribute.{ BasicFileAttributes, FileTime } import java.util.Optional +import java.util.concurrent.ConcurrentHashMap import inc.javac.AnalyzingJavaCompiler import xsbti.{ Reporter, AnalysisCallback => XAnalysisCallback } @@ -162,6 +165,17 @@ final class MixedAnalyzingCompiler( * of cross Java-Scala compilation. */ object MixedAnalyzingCompiler { + // For more safety, store both the time and size + private type JarMetadata = (FileTime, Long) + private[this] val cacheMetadataJar = new ConcurrentHashMap[File, (JarMetadata, FileHash)]() + private[this] final val emptyStampCode = EmptyStamp.hashCode() + private def emptyFileHash(file: File) = FileHash.of(file, emptyStampCode) + private def genFileHash(file: File, metadata: JarMetadata): FileHash = { + val newHash = FileHash.of(file, Stamper.forHash(file).hashCode()) + cacheMetadataJar.put(file, (metadata, newHash)) + newHash + } + def makeConfig( scalac: xsbti.compile.ScalaCompiler, javac: xsbti.compile.JavaCompiler, @@ -181,13 +195,28 @@ object MixedAnalyzingCompiler { incrementalCompilerOptions: IncOptions, extra: List[(String, String)] ): CompileConfiguration = { - val classpathHash = classpath map { x => - FileHash.of(x, Stamper.forHash(x).hashCode) + // #433: Cache jars with their metadata to avoid recomputing hashes transitively in other projects + val parallelClasspathHashing = classpath.toParArray.map { file => + if (!file.exists()) emptyFileHash(file) + else { + // `readAttributes` needs to be guarded by `file.exists()`, otherwise it fails + val attrs = Files.readAttributes(file.toPath, classOf[BasicFileAttributes]) + if (attrs.isDirectory) emptyFileHash(file) + else { + val currentMetadata = (attrs.lastModifiedTime(), attrs.size()) + Option(cacheMetadataJar.get(file)) match { + case Some((metadata, hashHit)) if metadata == currentMetadata => hashHit + case None => genFileHash(file, currentMetadata) + } + } + } } + + val classpathHash = parallelClasspathHashing.toArray val compileSetup = MiniSetup.of( output, MiniOptions.of( - classpathHash.toArray, + classpathHash, options.toArray, javacOptions.toArray ), diff --git a/zinc/src/test/scala/sbt/inc/cached/CachedHashingSpec.scala b/zinc/src/test/scala/sbt/inc/cached/CachedHashingSpec.scala new file mode 100644 index 0000000000..16db222b8a --- /dev/null +++ b/zinc/src/test/scala/sbt/inc/cached/CachedHashingSpec.scala @@ -0,0 +1,58 @@ +package sbt.inc.cached + +import java.nio.file.Paths + +import sbt.inc.{ BaseCompilerSpec, SourceFiles } +import sbt.internal.inc.{ Analysis, CompileOutput, MixedAnalyzingCompiler } +import sbt.io.IO + +class CachedHashingSpec extends BaseCompilerSpec { + def timeMs[R](block: => R): Long = { + val t0 = System.nanoTime() + block // call-by-name + val t1 = System.nanoTime() + (t1 - t0) / 1000000 + } + + "zinc" should "cache jar generation" in { + IO.withTemporaryDirectory { tempDir => + val classes = Seq(SourceFiles.Good) + val sources0 = Map(Paths.get("src") -> classes.map(path => Paths.get(path))) + val projectSetup = ProjectSetup(tempDir.toPath(), sources0, Nil) + val compiler = projectSetup.createCompiler() + + import compiler.in.{ setup, options, compilers, previousResult } + import sbt.internal.inc.JavaInterfaceUtil._ + import sbt.io.syntax.{ file, fileToRichFile, singleFileFinder } + + val javac = compilers.javaTools.javac + val scalac = compilers.scalac + val giganticClasspath = file(sys.props("user.home"))./(".ivy2").**("*.jar").get.take(500) + + def genConfig = MixedAnalyzingCompiler.makeConfig( + scalac, + javac, + options.sources, + giganticClasspath, + CompileOutput(options.classesDirectory), + setup.cache, + setup.progress.toOption, + options.scalacOptions, + options.javacOptions, + Analysis.empty, + previousResult.setup.toOption, + setup.perClasspathEntryLookup, + setup.reporter, + options.order, + setup.skip, + setup.incrementalCompilerOptions, + setup.extra.toList.map(_.toScalaTuple) + ) + + val hashingTime = timeMs(genConfig) + val cachedHashingTime = timeMs(genConfig) + assert(cachedHashingTime < (hashingTime * 0.20), + s"Cache jar didn't work: $cachedHashingTime is >= than 20% of $hashingTime.") + } + } +}