Skip to content

Commit

Permalink
Construct TreeArtifactValues on multiple threads.
Browse files Browse the repository at this point in the history
This makes it possible to use every core available for checksumming, which
makes a huge difference for large tree artifacts.

Fixes #17009.

RELNOTES: None.
PiperOrigin-RevId: 525085502
Change-Id: I2a995d3445940333c21eeb89b4ba60887f99e51b
  • Loading branch information
lberki authored and copybara-github committed Apr 18, 2023
1 parent 91584dd commit 368bf11
Show file tree
Hide file tree
Showing 6 changed files with 107 additions and 32 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -340,7 +340,10 @@ private TreeArtifactValue constructTreeArtifactValueFromFilesystem(SpecialArtifa
throw new IOException(errorMessage, e);
}

tree.putChild(child, metadata);
// visitTree() uses multiple threads and putChild() is not thread-safe
synchronized (tree) {
tree.putChild(child, metadata);
}
});

if (archivedTreeArtifactsEnabled) {
Expand Down
1 change: 1 addition & 0 deletions src/main/java/com/google/devtools/build/lib/skyframe/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -2893,6 +2893,7 @@ java_library(
"//src/main/java/com/google/devtools/build/lib/actions:artifacts",
"//src/main/java/com/google/devtools/build/lib/actions:file_metadata",
"//src/main/java/com/google/devtools/build/lib/actions:has_digest",
"//src/main/java/com/google/devtools/build/lib/concurrent",
"//src/main/java/com/google/devtools/build/lib/skyframe/serialization/autocodec",
"//src/main/java/com/google/devtools/build/lib/skyframe/serialization/autocodec:serialization-constant",
"//src/main/java/com/google/devtools/build/lib/util",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -448,7 +448,8 @@ private boolean treeArtifactIsDirty(Artifact artifact, TreeArtifactValue value)

// This could be improved by short-circuiting as soon as we see a child that is not present in
// the TreeArtifactValue, but it doesn't seem to be a major source of overhead.
Set<PathFragment> currentChildren = new HashSet<>();
// visitTree() is called from multiple threads in parallel so this need to be a hash set
Set<PathFragment> currentChildren = Sets.newConcurrentHashSet();
try {
TreeArtifactValue.visitTree(
path,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,15 @@
import com.google.devtools.build.lib.actions.FileStateType;
import com.google.devtools.build.lib.actions.HasDigest;
import com.google.devtools.build.lib.actions.cache.MetadataDigestUtils;
import com.google.devtools.build.lib.concurrent.AbstractQueueVisitor;
import com.google.devtools.build.lib.concurrent.ErrorClassifier;
import com.google.devtools.build.lib.concurrent.NamedForkJoinPool;
import com.google.devtools.build.lib.concurrent.ThreadSafety.ThreadSafe;
import com.google.devtools.build.lib.skyframe.serialization.autocodec.AutoCodec;
import com.google.devtools.build.lib.skyframe.serialization.autocodec.SerializationConstant;
import com.google.devtools.build.lib.util.Fingerprint;
import com.google.devtools.build.lib.vfs.Dirent;
import com.google.devtools.build.lib.vfs.IORuntimeException;
import com.google.devtools.build.lib.vfs.Path;
import com.google.devtools.build.lib.vfs.PathFragment;
import com.google.devtools.build.lib.vfs.Symlinks;
Expand All @@ -51,14 +56,17 @@
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.concurrent.ForkJoinPool;
import javax.annotation.Nullable;

/**
* Value for TreeArtifacts, which contains a digest and the {@link FileArtifactValue}s of its child
* {@link TreeFileArtifact}s.
*/
public class TreeArtifactValue implements HasDigest, SkyValue {

private static final ForkJoinPool VISITOR_POOL =
NamedForkJoinPool.newNamedPool(
"tree-artifact-visitor", Runtime.getRuntime().availableProcessors());
/**
* Comparator based on exec path which works on {@link ActionInput} as opposed to {@link
* com.google.devtools.build.lib.actions.Artifact}. This way, we can use an {@link ActionInput} to
Expand Down Expand Up @@ -486,10 +494,71 @@ public interface TreeArtifactVisitor {
*
* <p>If the implementation throws {@link IOException}, traversal is immediately halted and the
* exception is propagated.
*
* <p>This method can be called from multiple threads in parallel during a single call of {@link
* TreeArtifactVisitor#visitTree(Path, TreeArtifactVisitor)}.
*/
@ThreadSafe
void visit(PathFragment parentRelativePath, Dirent.Type type) throws IOException;
}

/** An {@link AbstractQueueVisitor} that visits every file in the tree artifact. */
static class Visitor extends AbstractQueueVisitor {
private final Path parentDir;
private final TreeArtifactVisitor visitor;

Visitor(Path parentDir, TreeArtifactVisitor visitor) {
super(
VISITOR_POOL,
/* shutdownOnCompletion= */ false,
/* failFastOnException= */ true,
ErrorClassifier.DEFAULT);
this.parentDir = checkNotNull(parentDir);
this.visitor = checkNotNull(visitor);
}

void run() throws IOException, InterruptedException {
execute(() -> visitTree(PathFragment.EMPTY_FRAGMENT));
try {
awaitQuiescence(true);
} catch (IORuntimeException e) {
throw e.getCauseIOException();
}
}

// IOExceptions are wrapped in IORuntimeException so that it can be propagated to the main
// thread
private void visitTree(PathFragment subdir) {
try {
for (Dirent dirent : parentDir.getRelative(subdir).readdir(Symlinks.NOFOLLOW)) {
PathFragment parentRelativePath = subdir.getChild(dirent.getName());
Dirent.Type type = dirent.getType();

if (type == Dirent.Type.UNKNOWN) {
throw new IOException(
"Could not determine type of file for "
+ parentRelativePath
+ " under "
+ parentDir);
}

if (type == Dirent.Type.SYMLINK) {
checkSymlink(subdir, parentDir.getRelative(parentRelativePath));
}

visitor.visit(parentRelativePath, type);

if (type == Dirent.Type.DIRECTORY) {
execute(() -> visitTree(parentRelativePath));
}
}
} catch (IOException e) {
// We can't throw checked exceptions here since AQV expects Runnables
throw new IORuntimeException(e);
}
}
}

/**
* Recursively visits all descendants under a directory.
*
Expand All @@ -501,35 +570,16 @@ public interface TreeArtifactVisitor {
* symlink that traverses outside of the tree artifact and any entry of {@link
* Dirent.Type#UNKNOWN}, such as a named pipe.
*
* <p>The visitor will be called on multiple threads in parallel. Accordingly, it must be
* thread-safe.
*
* @throws IOException if there is any problem reading or validating outputs under the given tree
* artifact directory, or if {@link TreeArtifactVisitor#visit} throws {@link IOException}
*/
public static void visitTree(Path parentDir, TreeArtifactVisitor visitor)
public static void visitTree(Path parentDir, TreeArtifactVisitor treeArtifactVisitor)
throws IOException, InterruptedException {
visitTree(parentDir, PathFragment.EMPTY_FRAGMENT, checkNotNull(visitor));
}

private static void visitTree(Path parentDir, PathFragment subdir, TreeArtifactVisitor visitor)
throws IOException {
for (Dirent dirent : parentDir.getRelative(subdir).readdir(Symlinks.NOFOLLOW)) {
PathFragment parentRelativePath = subdir.getChild(dirent.getName());
Dirent.Type type = dirent.getType();

if (type == Dirent.Type.UNKNOWN) {
throw new IOException(
"Could not determine type of file for " + parentRelativePath + " under " + parentDir);
}

if (type == Dirent.Type.SYMLINK) {
checkSymlink(subdir, parentDir.getRelative(parentRelativePath));
}

visitor.visit(parentRelativePath, type);

if (type == Dirent.Type.DIRECTORY) {
visitTree(parentDir, parentRelativePath, visitor);
}
}
Visitor visitor = new Visitor(parentDir, treeArtifactVisitor);
visitor.run();
}

private static void checkSymlink(PathFragment subDir, Path path) throws IOException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1393,7 +1393,9 @@ public TreeArtifactValue getTreeArtifactValue(SpecialArtifact output)
Artifact.TreeFileArtifact.createTreeOutput(output, parentRelativePath);
FileArtifactValue metadata =
FileArtifactValue.createForTesting(treeDir.getRelative(parentRelativePath));
tree.putChild(child, metadata);
synchronized (tree) {
tree.putChild(child, metadata);
}
});
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -373,7 +373,13 @@ public void visitTree_visitsEachChild() throws Exception {
scratch.resolve("tree/a/b/dangling_link").createSymbolicLink(PathFragment.create("?"));
List<Pair<PathFragment, Dirent.Type>> children = new ArrayList<>();

TreeArtifactValue.visitTree(treeDir, (child, type) -> children.add(Pair.of(child, type)));
TreeArtifactValue.visitTree(
treeDir,
(child, type) -> {
synchronized (children) {
children.add(Pair.of(child, type));
}
});

assertThat(children)
.containsExactly(
Expand Down Expand Up @@ -435,7 +441,13 @@ public void visitTree_pemitsUpLevelSymlinkInsideTree() throws Exception {
scratch.resolve("tree/a/up_link").createSymbolicLink(PathFragment.create("../file"));
List<Pair<PathFragment, Dirent.Type>> children = new ArrayList<>();

TreeArtifactValue.visitTree(treeDir, (child, type) -> children.add(Pair.of(child, type)));
TreeArtifactValue.visitTree(
treeDir,
(child, type) -> {
synchronized (children) {
children.add(Pair.of(child, type));
}
});

assertThat(children)
.containsExactly(
Expand All @@ -450,7 +462,13 @@ public void visitTree_permitsAbsoluteSymlink() throws Exception {
scratch.resolve("tree/absolute_link").createSymbolicLink(PathFragment.create("/tmp"));
List<Pair<PathFragment, Dirent.Type>> children = new ArrayList<>();

TreeArtifactValue.visitTree(treeDir, (child, type) -> children.add(Pair.of(child, type)));
TreeArtifactValue.visitTree(
treeDir,
(child, type) -> {
synchronized (children) {
children.add(Pair.of(child, type));
}
});

assertThat(children)
.containsExactly(Pair.of(PathFragment.create("absolute_link"), Dirent.Type.SYMLINK));
Expand Down

11 comments on commit 368bf11

@brentleyjones
Copy link
Contributor

@brentleyjones brentleyjones commented on 368bf11 Apr 18, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are we able to cherry-pick this into 6.2 (please 🙏)? cc @tjgq

@brentleyjones
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@lberki
Copy link
Contributor Author

@lberki lberki commented on 368bf11 Apr 19, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The cherry-pick wouldn't apply cleanly (as you can see on #17009 , I had to do a lot of prefactoring to make this work), but I could imagine manually cherry-picking this to 6.2 with some collateral damage (essentially, interrupts would be reported as IOExceptions if there is a good reason. How much pain would this save you?

cc @tbaing

@brentleyjones
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Lots of pain (the whole iOS community will thank you): #14125 (comment)

@lberki
Copy link
Contributor Author

@lberki lberki commented on 368bf11 Apr 24, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tried to cherry-pick this to 6.2 (and the changes it depends on), but the result was a large merge conflict. I haven't checked how easy the merge was because I had like 15 minutes for this, but from a quick glance, it did not look good :(

@lberki
Copy link
Contributor Author

@lberki lberki commented on 368bf11 Apr 24, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tried to cherry-pick this to 6.2 again and failed again; there have simply been too many changes since 6.0, especially in the Builds-without-the-Bytes code. However, if you manage to rebase these changes on top of the 6.2 branch (2481bb2 currently):

Or else only 368bf11 by way of wrapping InterrupteException into and IOException, I'd be happy to accept that contribution (it's just that the former approach takes more time I currently have and I don't have the hazmat suit for the latter one)

@brentleyjones
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks. Will see if we can do that.

@BalestraPatrick
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A first attempt with the latter approach described here: #18194

@lberki
Copy link
Contributor Author

@lberki lberki commented on 368bf11 Apr 24, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

#18194 looks reasonable at first glance.

@lberki
Copy link
Contributor Author

@lberki lberki commented on 368bf11 Apr 25, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@BalestraPatrick let me know when that pull request is ready for view (for now, I'm ignoring it until you say it is, since you said it's a "first attempt")

@BalestraPatrick
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@lberki It's ready for review now, added a comment in the PR showing the key part I modified.

Please sign in to comment.