Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Replace slow SHA-1 by xxHash for classpath hashes #371

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,8 @@ lazy val zincCore = (project in internalPath / "zinc-core")
// compiler instances that are memory hungry
javaOptions in Test += "-Xmx1G",
name := "zinc Core",
compileOrder := sbt.CompileOrder.Mixed
compileOrder := sbt.CompileOrder.Mixed,
libraryDependencies += xxHashLibrary
)
.configure(addSbtIO, addSbtUtilLogging, addSbtUtilRelation)

Expand Down

This file was deleted.

10 changes: 0 additions & 10 deletions internal/compiler-interface/src/main/contraband/incremental.json
Original file line number Diff line number Diff line change
Expand Up @@ -305,16 +305,6 @@
}
]
},
{
"name": "FileHash",
"namespace": "xsbti.compile",
"target": "Java",
"type": "record",
"fields": [
{ "name": "file", "type": "java.io.File" },
{ "name": "hash", "type": "int" }
]
},
{
"name": "MiniOptions",
"namespace": "xsbti.compile",
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
package xsbti.compile;

import java.nio.ByteBuffer;

public final class FileHash implements java.io.Serializable {

/**
* @deprecated The use of this method is strongly discouraged. Please use {@link #hash64()}'s long variant.
*/
@Deprecated public static FileHash create(java.io.File _file, int _hash) {
return of(_file, _hash);
}

private static byte[] toByteArray(int _hash) {
ByteBuffer buffer = ByteBuffer.allocate(1);
buffer.putInt(_hash);
return buffer.array();
}

/**
* @deprecated The use of this method is strongly discouraged. Please use {@link #hash64()}'s long variant.
*/
@Deprecated public static FileHash of(java.io.File _file, int _hash) {
return new FileHash(_file, toByteArray(_hash));
}

public static FileHash create(java.io.File _file, byte[] _hash) {
return of(_file, _hash);
}

public static FileHash of(java.io.File _file, byte[] _hash) {
return new FileHash(_file, _hash);
}

private java.io.File file;
private byte[] hash;

protected FileHash(java.io.File _file, byte[] _hash) {
super();
file = _file;
hash = _hash;
}

public java.io.File file() {
return this.file;
}

/**
* @deprecated The use of this method is strongly discouraged. Please use {@link #hash64()}.
* @return A truncated 32-byte hash from a 64-byte hash.
*/
@Deprecated public int hash() {
return this.hash[0];
}

public byte[] hash64() {
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Needn't be named hash64 anymore.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Cannot be called hash because it conflicts with the previous signature. I'll call it hashBytes.

return this.hash;
}

public FileHash withFile(java.io.File file) {
return new FileHash(file, hash);
}

/**
* @deprecated The use of this method is strongly discouraged. Please use {@link #hash64()}.
*/
@Deprecated public FileHash withHash(int hash) {
return new FileHash(file, toByteArray(hash));
}

public FileHash withHash(byte[] hash) {
return new FileHash(file, hash);
}

public boolean equals(Object obj) {
if (this == obj) {
return true;
} else if (!(obj instanceof FileHash)) {
return false;
} else {
FileHash o = (FileHash)obj;
return file().equals(o.file()) && (hash64() == o.hash64());
}
}

public int hashCode() {
return 37 * (37 * (37 * (17 + "xsbti.compile.FileHash".hashCode()) + file().hashCode()) + hash64().hashCode());
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Important: The hashCode and equals methods are not useful (need to use the static Arrays.equals(), iirc), which is one reason to prefer ByteBuffers for this.

The other reason to prefer ByteBuffers is that they can act as views into other data, which can avoid copying it: also, protobuf ByteStrings can be "viewed" as ByteBuffers with asReadOnlyByteBuffer.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍 We're already using asReadOnlyByteBuffer in the protobuf converters, but I agree ByteBuffers are superior over Array[Byte].

}

public String toString() {
return "FileHash(" + "file: " + file() + ", " + "hash: " + hash64() + ")";
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,17 @@ public interface Stamp {
/**
* Returns a unique identifier depending on the underlying data structures.
*
* @return A valid string-based representation for logical equality, not referential equality.
* @return A valid representation for logical equality, not referential equality.
*/
public int getValueId();
public byte[] getBytes();

/**
* Returns a unique identifier depending on the underlying data structures.
*
* @deprecated use {@link #getValueId()} instead.
* @return A valid int representation for logical equality, not referential equality.
*/
@Deprecated public int getValueId();

/**
* @return A string-based and recoverable representation of the underlying stamp.
Expand All @@ -35,14 +43,23 @@ public interface Stamp {
/**
* Get the hash of the file contents if the stamp supports it.
*
* @deprecated use {@link #getHash64()} instead.
*
* @return An optional hash of the file contents.
*/
public Optional<String> getHash();
@Deprecated public Optional<String> getHash();

/**
* Get the last modified time (in milliseconds from Epoch) of a file if the stamp supports it.
*
* @return An optional last modified time.
*/
public Optional<Long> getLastModified();

/**
* Get a 64-byte hash of a file if the stamp supports it.
*
* @return An optional 64-byte hash.
*/
public Optional<Long> getHash64();
}
93 changes: 59 additions & 34 deletions internal/zinc-core/src/main/scala/sbt/internal/inc/Stamp.scala
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,13 @@ package sbt
package internal
package inc

import java.io.{ File, IOException }
import java.io.{ File, IOException, RandomAccessFile }
import java.nio.ByteBuffer
import java.nio.channels.FileChannel.MapMode
import java.util
import java.util.Optional

import sbt.io.{ Hash => IOHash }
import net.jpountz.xxhash.XXHashFactory
import xsbti.compile.analysis.{ ReadStamps, Stamp }

import scala.collection.immutable.TreeMap
Expand Down Expand Up @@ -47,7 +49,6 @@ trait Stamps extends ReadStamps {

private[sbt] sealed abstract class StampBase extends Stamp {
override def toString: String = this.writeStamp()
override def hashCode(): Int = this.getValueId()
override def equals(other: Any): Boolean = other match {
case o: Stamp => Stamp.equivStamp.equiv(this, o)
case _ => false
Expand All @@ -56,63 +57,65 @@ private[sbt] sealed abstract class StampBase extends Stamp {

trait WithPattern { protected def Pattern: Regex }

import java.lang.{ Long => BoxedLong }
import java.lang.{ Long => BoxedLong, Integer => BoxedInt }

/** Define the hash of the file contents. It's a typical stamp for compilation sources. */
final class Hash private (val hexHash: String) extends StampBase {
// Assumes `hexHash` is a hexadecimal value.
override def writeStamp: String = s"hash($hexHash)"
override def getValueId: Int = hexHash.hashCode()
override def getHash: Optional[String] = Optional.of(hexHash)
final class Hash64(val hash: Long) extends StampBase {
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it possible to just hold the hash bytes here, and use its hashCode as the hashCode? Not sure which API constraints you're working with, but: converting to 32 is necessary for JVM hashCode methods is necessary, but converting back and forth to Long values just shouldn't (ever) be necessary except in the text based analysis format.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a good suggestion 👍. I'll try to see how it goes.

override def writeStamp: String = s"longHash($hash)"
override def getValueId: Int = hash.toInt
override def getBytes: Array[Byte] = Stamp.toByteArray(hash)
override def getHash: Optional[String] = Optional.empty[String]
override def getLastModified: Optional[BoxedLong] = Optional.empty[BoxedLong]
override def getHash64: Optional[BoxedLong] = Optional.of(hash)
}

private[sbt] object Hash {
private val Pattern = """hash\(((?:[0-9a-fA-F][0-9a-fA-F])+)\)""".r

def ofFile(f: File): Hash =
new Hash(IOHash toHex IOHash(f)) // assume toHex returns a hex string

def fromString(s: String): Option[Hash] = {
val m = Pattern.pattern matcher s
if (m.matches()) Some(new Hash(m group 1))
else None
}

object FromString {
def unapply(s: String): Option[Hash] = fromString(s)
}

def unsafeFromString(s: String): Hash = new Hash(s)
private[sbt] object Hash64 {
final val Pattern = """longHash\((\d+)\)""".r
}

/** Define the last modified time of the file. It's a typical stamp for class files and products. */
final class LastModified(val value: Long) extends StampBase {
override def writeStamp: String = s"lastModified(${value})"
override def getValueId: Int = (value ^ (value >>> 32)).toInt
override def getValueId: Int = value.toInt
override def getBytes: Array[Byte] = Stamp.toByteArray(value)
override def getHash: Optional[String] = Optional.empty[String]
override def getLastModified: Optional[BoxedLong] = Optional.of(value)
override def getHash64: Optional[BoxedLong] = Optional.empty[BoxedLong]
}

/** Defines an empty stamp. */
private[sbt] object EmptyStamp extends StampBase {
// Use `absent` because of historic reasons -- replacement of old `Exists` representation
final val Value = "absent"
private[this] final val underlyingHash = System.identityHashCode(this)
override def writeStamp: String = Value
override def getValueId: Int = System.identityHashCode(this)
override def getValueId: Int = underlyingHash
override def getBytes: Array[Byte] = Stamp.toByteArray(underlyingHash)
override def getHash: Optional[String] = Optional.empty[String]
override def getLastModified: Optional[BoxedLong] = Optional.empty[BoxedLong]
override def getHash64: Optional[BoxedLong] = Optional.empty[BoxedLong]
}

private[inc] object LastModified extends WithPattern {
final val Pattern = """lastModified\((\d+)\)""".r
}

object Stamp {
def toByteArray(int: Int): Array[Byte] = {
val buffer = ByteBuffer.allocate(BoxedInt.BYTES)
buffer.putInt(int)
buffer.array()
}

def toByteArray(long: Long): Array[Byte] = {
val buffer = ByteBuffer.allocate(BoxedLong.BYTES)
buffer.putLong(long)
buffer.array()
}

private final val maxModificationDifferenceInMillis = 100L
implicit val equivStamp: Equiv[Stamp] = new Equiv[Stamp] {
def equiv(a: Stamp, b: Stamp) = (a, b) match {
case (h1: Hash, h2: Hash) => h1.hexHash == h2.hexHash
case (h1: Hash64, h2: Hash64) => h1.hash == h2.hash
// Windows is handling this differently sometimes...
case (lm1: LastModified, lm2: LastModified) =>
lm1.value == lm2.value ||
Expand All @@ -126,8 +129,8 @@ object Stamp {

def fromString(s: String): Stamp = s match {
case EmptyStamp.Value => EmptyStamp
case Hash.FromString(hash) => hash
case LastModified.Pattern(value) => new LastModified(java.lang.Long.parseLong(value))
case Hash64.Pattern(value) => new Hash64(BoxedLong.parseLong(value))
case LastModified.Pattern(value) => new LastModified(BoxedLong.parseLong(value))
case _ =>
throw new IllegalArgumentException("Unrecognized Stamp string representation: " + s)
}
Expand All @@ -141,8 +144,30 @@ object Stamper {
catch { case i: IOException => EmptyStamp }
}

val forHash = (toStamp: File) => tryStamp(Hash.ofFile(toStamp))
val forLastModified = (toStamp: File) => tryStamp(new LastModified(toStamp.lastModified()))
private final val hashFactory = XXHashFactory.fastestInstance()
private final val seed = 0x9747b28c
private def hashFile(toStamp: File): Stamp = {
import java.nio.channels.FileChannel
if (!toStamp.exists() || toStamp.isDirectory) EmptyStamp
else {
val hashFunction = hashFactory.hash64()
var randomFile: RandomAccessFile = null
var channel: FileChannel = null
try {
randomFile = new RandomAccessFile(toStamp, "r")
channel = randomFile.getChannel
val mappedChannel = channel.map(MapMode.READ_ONLY, 0, channel.size())
val hash = hashFunction.hash(mappedChannel.asReadOnlyBuffer(), seed)
new Hash64(hash)
} finally {
if (randomFile != null) randomFile.close()
if (channel != null) channel.close()
}
}
}

final val forHash: (File => Stamp) = hashFile _
final val forLastModified = (toStamp: File) => tryStamp(new LastModified(toStamp.lastModified()))
}

object Stamps {
Expand Down
16 changes: 0 additions & 16 deletions internal/zinc-core/src/test/scala/sbt/internal/inc/HashSpec.scala

This file was deleted.

Loading