-
Notifications
You must be signed in to change notification settings - Fork 121
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Make classpath hashing more lightweight
And make it parallel! This patch adds a cache that relies on filesystem metadata to cache hashes for jars that have the same last modified time across different compiler iterations. This is important because until now there was a significant overhead when running `compile` on multi-module builds that have gigantic classpaths. In this scenario, the previous algorithm computed hashes for all jars transitively across all these projects. This patch is conservative; there are several things that are wrong with the status quo of classpath hashing. The most important one is the fact that Zinc has been doing `hashCode` on a SHA-1 checksum, which doesn't make sense. The second one is that we don't need a SHA-1 checksum for the kind of checks we want to do. #371 explains why. The third limitation with this check is that file hashes are implemented internally as `int`s, which is not enough to represent the richness of the checksum. My previous PR also tackles this problem, which will be solved in the long term. Therefore, this pull request only tackles these two things: * Caching of classpath entry hashes. * Parallelize this IO-bound task. Results, on my local machine: - No parallel hashing of the first 500 jars in my ivy cache: 1330ms. - Parallel hashing of the first 500 jars in my ivy cache: 770ms. - Second parallel hashing of the first 500 jars in my ivy cache: 1ms. Fixes #433.
- Loading branch information
Showing
3 changed files
with
105 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
43 changes: 43 additions & 0 deletions
43
zinc/src/main/scala/sbt/internal/inc/caching/ClasspathCache.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
package sbt.internal.inc.caching | ||
|
||
import java.io.File | ||
import java.nio.file.Files | ||
import java.util.concurrent.ConcurrentHashMap | ||
import java.nio.file.attribute.{ BasicFileAttributes, FileTime } | ||
|
||
import xsbti.compile.FileHash | ||
import sbt.internal.inc.{ EmptyStamp, Stamper } | ||
|
||
object ClasspathCache { | ||
// For more safety, store both the time and size | ||
private type JarMetadata = (FileTime, Long) | ||
private[this] val cacheMetadataJar = new ConcurrentHashMap[File, (JarMetadata, FileHash)]() | ||
private[this] final val emptyStampCode = EmptyStamp.hashCode() | ||
private def emptyFileHash(file: File) = FileHash.of(file, emptyStampCode) | ||
private def genFileHash(file: File, metadata: JarMetadata): FileHash = { | ||
val newHash = FileHash.of(file, Stamper.forHash(file).hashCode()) | ||
cacheMetadataJar.put(file, (metadata, newHash)) | ||
newHash | ||
} | ||
|
||
def hashClasspath(classpath: Seq[File]): Array[FileHash] = { | ||
// #433: Cache jars with their metadata to avoid recomputing hashes transitively in other projects | ||
def fromCacheOrHash(file: File): FileHash = { | ||
if (!file.exists()) emptyFileHash(file) | ||
else { | ||
// `readAttributes` needs to be guarded by `file.exists()`, otherwise it fails | ||
val attrs = Files.readAttributes(file.toPath, classOf[BasicFileAttributes]) | ||
if (attrs.isDirectory) emptyFileHash(file) | ||
else { | ||
val currentMetadata = (attrs.lastModifiedTime(), attrs.size()) | ||
Option(cacheMetadataJar.get(file)) match { | ||
case Some((metadata, hashHit)) if metadata == currentMetadata => hashHit | ||
case None => genFileHash(file, currentMetadata) | ||
} | ||
} | ||
} | ||
} | ||
|
||
classpath.toParArray.map(fromCacheOrHash).toArray | ||
} | ||
} |
58 changes: 58 additions & 0 deletions
58
zinc/src/test/scala/sbt/inc/cached/CachedHashingSpec.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
package sbt.inc.cached | ||
|
||
import java.nio.file.Paths | ||
|
||
import sbt.inc.{ BaseCompilerSpec, SourceFiles } | ||
import sbt.internal.inc.{ Analysis, CompileOutput, MixedAnalyzingCompiler } | ||
import sbt.io.IO | ||
|
||
class CachedHashingSpec extends BaseCompilerSpec { | ||
def timeMs[R](block: => R): Long = { | ||
val t0 = System.nanoTime() | ||
block // call-by-name | ||
val t1 = System.nanoTime() | ||
(t1 - t0) / 1000000 | ||
} | ||
|
||
"zinc" should "cache jar generation" in { | ||
IO.withTemporaryDirectory { tempDir => | ||
val classes = Seq(SourceFiles.Good) | ||
val sources0 = Map(Paths.get("src") -> classes.map(path => Paths.get(path))) | ||
val projectSetup = ProjectSetup(tempDir.toPath(), sources0, Nil) | ||
val compiler = projectSetup.createCompiler() | ||
|
||
import compiler.in.{ setup, options, compilers, previousResult } | ||
import sbt.internal.inc.JavaInterfaceUtil._ | ||
import sbt.io.syntax.{ file, fileToRichFile, singleFileFinder } | ||
|
||
val javac = compilers.javaTools.javac | ||
val scalac = compilers.scalac | ||
val giganticClasspath = file(sys.props("user.home"))./(".ivy2").**("*.jar").get.take(500) | ||
|
||
def genConfig = MixedAnalyzingCompiler.makeConfig( | ||
scalac, | ||
javac, | ||
options.sources, | ||
giganticClasspath, | ||
CompileOutput(options.classesDirectory), | ||
setup.cache, | ||
setup.progress.toOption, | ||
options.scalacOptions, | ||
options.javacOptions, | ||
Analysis.empty, | ||
previousResult.setup.toOption, | ||
setup.perClasspathEntryLookup, | ||
setup.reporter, | ||
options.order, | ||
setup.skip, | ||
setup.incrementalCompilerOptions, | ||
setup.extra.toList.map(_.toScalaTuple) | ||
) | ||
|
||
val hashingTime = timeMs(genConfig) | ||
val cachedHashingTime = timeMs(genConfig) | ||
assert(cachedHashingTime < (hashingTime * 0.20), | ||
s"Cache jar didn't work: $cachedHashingTime is >= than 20% of $hashingTime.") | ||
} | ||
} | ||
} |