Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Don't eagerly flatten a NestedSet in RepoMappingManifestAction #18349

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -1021,6 +1021,7 @@ java_library(
"//src/main/java/com/google/devtools/build/lib/actions:commandline_item",
"//src/main/java/com/google/devtools/build/lib/cmdline",
"//src/main/java/com/google/devtools/build/lib/collect/nestedset",
"//src/main/java/com/google/devtools/build/lib/packages",
"//src/main/java/com/google/devtools/build/lib/util",
"//src/main/java/net/starlark/java/eval",
"//third_party:auto_value",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,61 +13,67 @@
// limitations under the License.
package com.google.devtools.build.lib.analysis;

import static com.google.common.collect.ImmutableSet.toImmutableSet;
import static com.google.common.collect.ImmutableSortedMap.toImmutableSortedMap;
import static java.nio.charset.StandardCharsets.ISO_8859_1;
import static java.util.Comparator.comparing;

import com.google.auto.value.AutoValue;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.ImmutableSortedMap;
import com.google.devtools.build.lib.actions.ActionExecutionContext;
import com.google.devtools.build.lib.actions.ActionKeyContext;
import com.google.devtools.build.lib.actions.ActionOwner;
import com.google.devtools.build.lib.actions.Artifact;
import com.google.devtools.build.lib.actions.Artifact.ArtifactExpander;
import com.google.devtools.build.lib.actions.CommandLineExpansionException;
import com.google.devtools.build.lib.actions.CommandLineItem.MapFn;
import com.google.devtools.build.lib.actions.ExecException;
import com.google.devtools.build.lib.analysis.actions.AbstractFileWriteAction;
import com.google.devtools.build.lib.analysis.actions.DeterministicWriter;
import com.google.devtools.build.lib.cmdline.RepositoryMapping;
import com.google.devtools.build.lib.cmdline.RepositoryName;
import com.google.devtools.build.lib.collect.nestedset.NestedSet;
import com.google.devtools.build.lib.collect.nestedset.NestedSetBuilder;
import com.google.devtools.build.lib.collect.nestedset.Order;
import com.google.devtools.build.lib.packages.Package;
import com.google.devtools.build.lib.util.Fingerprint;
import java.io.PrintWriter;
import java.util.List;
import java.util.Map.Entry;
import java.util.UUID;
import javax.annotation.Nullable;
import net.starlark.java.eval.EvalException;

/** Creates a manifest file describing the repos and mappings relevant for a runfile tree. */
public class RepoMappingManifestAction extends AbstractFileWriteAction {
private static final UUID MY_UUID = UUID.fromString("458e351c-4d30-433d-b927-da6cddd4737f");

private final ImmutableList<Entry> entries;
private final String workspaceName;
public final class RepoMappingManifestAction extends AbstractFileWriteAction {

/** An entry in the repo mapping manifest file. */
@AutoValue
public abstract static class Entry {
public static Entry of(
RepositoryName sourceRepo, String targetRepoApparentName, RepositoryName targetRepo) {
return new AutoValue_RepoMappingManifestAction_Entry(
sourceRepo, targetRepoApparentName, targetRepo);
}
private static final UUID MY_UUID = UUID.fromString("458e351c-4d30-433d-b927-da6cddd4737f");

public abstract RepositoryName sourceRepo();
// Uses MapFn's args parameter just like Fingerprint#addString to compute a cacheable fingerprint
// of just the repo name and mapping of a given Package.
private static final MapFn<Package> REPO_AND_MAPPING_DIGEST_FN = (pkg, args) -> {
args.accept(pkg.getPackageIdentifier().getRepository().getName());

public abstract String targetRepoApparentName();
var mapping = pkg.getRepositoryMapping().entries();
args.accept(String.valueOf(mapping.size()));
mapping.forEach((apparentName, canonicalName) -> {
args.accept(apparentName);
args.accept(canonicalName.getName());
});
};

public abstract RepositoryName targetRepo();
}
private final NestedSet<Package> transitivePackages;
private final NestedSet<Artifact> runfilesArtifacts;
private final String workspaceName;

public RepoMappingManifestAction(
ActionOwner owner, Artifact output, List<Entry> entries, String workspaceName) {
ActionOwner owner,
Artifact output,
NestedSet<Package> transitivePackages,
NestedSet<Artifact> runfilesArtifacts,
String workspaceName) {
super(owner, NestedSetBuilder.emptySet(Order.STABLE_ORDER), output, /*makeExecutable=*/ false);
this.entries =
ImmutableList.sortedCopyOf(
comparing((Entry e) -> e.sourceRepo().getName())
.thenComparing(Entry::targetRepoApparentName),
entries);
this.transitivePackages = transitivePackages;
this.runfilesArtifacts = runfilesArtifacts;
this.workspaceName = workspaceName;
}

Expand All @@ -78,7 +84,7 @@ public String getMnemonic() {

@Override
protected String getRawProgressMessage() {
return "writing repo mapping manifest for " + getOwner().getLabel();
return "Writing repo mapping manifest for " + getOwner().getLabel();
}

@Override
Expand All @@ -88,35 +94,60 @@ protected void computeKey(
Fingerprint fp)
throws CommandLineExpansionException, EvalException, InterruptedException {
fp.addUUID(MY_UUID);
actionKeyContext.addNestedSetToFingerprint(REPO_AND_MAPPING_DIGEST_FN, fp, transitivePackages);
actionKeyContext.addNestedSetToFingerprint(fp, runfilesArtifacts);
fp.addString(workspaceName);
for (Entry entry : entries) {
fp.addString(entry.sourceRepo().getName());
fp.addString(entry.targetRepoApparentName());
fp.addString(entry.targetRepo().getName());
}
}

@Override
public DeterministicWriter newDeterministicWriter(ActionExecutionContext ctx)
throws InterruptedException, ExecException {
return out -> {
PrintWriter writer = new PrintWriter(out, /*autoFlush=*/ false, ISO_8859_1);
for (Entry entry : entries) {
if (entry.targetRepoApparentName().isEmpty()) {
// The apparent repo name can only be empty for the main repo. We skip this line as
// Rlocation paths can't reference an empty apparent name anyway.
continue;
}
// The canonical name of the main repo is the empty string, which is not a valid name for a
// directory, so the "workspace name" is used the name of the directory under the runfiles
// tree for it.
String targetRepoDirectoryName =
entry.targetRepo().isMain() ? workspaceName : entry.targetRepo().getName();
writer.format(
"%s,%s,%s\n",
entry.sourceRepo().getName(), entry.targetRepoApparentName(), targetRepoDirectoryName);
}

ImmutableSet<RepositoryName> reposContributingRunfiles =
runfilesArtifacts.toList().stream()
.filter(a -> a.getOwner() != null)
.map(a -> a.getOwner().getRepository())
.collect(toImmutableSet());
transitivePackages.toList()
.stream()
.collect(toImmutableSortedMap(
comparing(RepositoryName::getName),
pkg -> pkg.getPackageIdentifier().getRepository(),
Package::getRepositoryMapping,
// All packages in a given repository have the same repository mapping, so the
// particular way of resolving duplicates does not matter.
(first, second) -> first))
.forEach((repoName, mapping) ->
writeRepoMapping(writer, reposContributingRunfiles, repoName, mapping));
writer.flush();
};
}

private void writeRepoMapping(
PrintWriter writer,
ImmutableSet<RepositoryName> reposContributingRunfiles,
RepositoryName repoName,
RepositoryMapping repoMapping) {
for (Entry<String, RepositoryName> mappingEntry :
ImmutableSortedMap.copyOf(repoMapping.entries()).entrySet()) {
if (mappingEntry.getKey().isEmpty()) {
// The apparent repo name can only be empty for the main repo. We skip this line as
// Rlocation paths can't reference an empty apparent name anyway.
continue;
}
if (!reposContributingRunfiles.contains(mappingEntry.getValue())) {
// We only write entries for repos that actually contribute runfiles.
continue;
}
// The canonical name of the main repo is the empty string, which is not a valid name for a
// directory, so the "workspace name" is used the name of the directory under the runfiles
// tree for it.
String targetRepoDirectoryName =
mappingEntry.getValue().isMain() ? workspaceName : mappingEntry.getValue().getName();
writer.format("%s,%s,%s\n", repoName.getName(), mappingEntry.getKey(),
targetRepoDirectoryName);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,6 @@

package com.google.devtools.build.lib.analysis;

import static com.google.common.collect.ImmutableSet.toImmutableSet;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
Expand All @@ -25,25 +23,21 @@
import com.google.devtools.build.lib.actions.Artifact;
import com.google.devtools.build.lib.actions.CommandLine;
import com.google.devtools.build.lib.actions.RunfilesSupplier;
import com.google.devtools.build.lib.analysis.RepoMappingManifestAction.Entry;
import com.google.devtools.build.lib.analysis.SourceManifestAction.ManifestType;
import com.google.devtools.build.lib.analysis.actions.ActionConstructionContext;
import com.google.devtools.build.lib.analysis.actions.SymlinkTreeAction;
import com.google.devtools.build.lib.analysis.config.BuildConfigurationValue;
import com.google.devtools.build.lib.analysis.config.RunUnder;
import com.google.devtools.build.lib.cmdline.RepositoryName;
import com.google.devtools.build.lib.collect.nestedset.NestedSet;
import com.google.devtools.build.lib.collect.nestedset.NestedSetBuilder;
import com.google.devtools.build.lib.concurrent.ThreadSafety.Immutable;
import com.google.devtools.build.lib.packages.Package;
import com.google.devtools.build.lib.packages.TargetUtils;
import com.google.devtools.build.lib.packages.Type;
import com.google.devtools.build.lib.packages.semantics.BuildLanguageOptions;
import com.google.devtools.build.lib.vfs.FileSystemUtils;
import com.google.devtools.build.lib.vfs.Path;
import com.google.devtools.build.lib.vfs.PathFragment;
import java.util.Collection;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
Expand Down Expand Up @@ -558,48 +552,12 @@ private static Artifact createRepoMappingManifestAction(
new RepoMappingManifestAction(
ruleContext.getActionOwner(),
repoMappingManifest,
collectRepoMappings(
Preconditions.checkNotNull(
ruleContext.getTransitivePackagesForRunfileRepoMappingManifest()),
runfiles),
ruleContext.getTransitivePackagesForRunfileRepoMappingManifest(),
runfiles.getAllArtifacts(),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FYI: getAllArtifacts() does a partial flattening -- symlinks and root symlinks are still flattened. But those those are usually small, so still a big win.

Copy link
Collaborator Author

@fmeum fmeum May 11, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you think it would be worth improving that further? Can Runfiles be serialized? Then that could make for a better action constructor argument.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think so? SourceManifestAction, for example, accepts a Runfiles object directly.

I wouldn't expect a large gain in practice. The symlinks and root_symlinks are usually small. Runfiles.getAllArtifacts() is called in a few different places already.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for your comment, it led me to discover that the repo mapping manifest didn't handle symlinks correctly: #18381

ruleContext.getWorkspaceName()));
return repoMappingManifest;
}

/** Returns the list of entries (unsorted) that should appear in the repo mapping manifest. */
private static ImmutableList<Entry> collectRepoMappings(
NestedSet<Package> transitivePackages, Runfiles runfiles) {
// NOTE: It might appear that the flattening of `transitivePackages` is better suited to the
// execution phase rather than here in the analysis phase, but we can't do that since it would
// necessitate storing `transitivePackages` in an action, which breaks skyframe serialization
// since packages cannot be serialized here.

ImmutableSet<RepositoryName> reposContributingRunfiles =
runfiles.getAllArtifacts().toList().stream()
.filter(a -> a.getOwner() != null)
.map(a -> a.getOwner().getRepository())
.collect(toImmutableSet());
Set<RepositoryName> seenRepos = new HashSet<>();
ImmutableList.Builder<Entry> entries = ImmutableList.builder();
for (Package pkg : transitivePackages.toList()) {
if (!seenRepos.add(pkg.getPackageIdentifier().getRepository())) {
// Any package from the same repo would have the same repo mapping.
continue;
}
for (Map.Entry<String, RepositoryName> repoMappingEntry :
pkg.getRepositoryMapping().entries().entrySet()) {
if (reposContributingRunfiles.contains(repoMappingEntry.getValue())) {
entries.add(
Entry.of(
pkg.getPackageIdentifier().getRepository(),
repoMappingEntry.getKey(),
repoMappingEntry.getValue()));
}
}
}
return entries.build();
}

@Override
public NestedSet<Artifact> getArtifacts() {
return runfiles.getArtifacts();
Expand Down
3 changes: 3 additions & 0 deletions src/test/java/com/google/devtools/build/lib/analysis/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -368,16 +368,19 @@ java_test(
srcs = ["RunfilesRepoMappingManifestTest.java"],
deps = [
"//src/main/java/com/google/devtools/build/lib/actions",
"//src/main/java/com/google/devtools/build/lib/actions:commandline_item",
"//src/main/java/com/google/devtools/build/lib/analysis:blaze_directories",
"//src/main/java/com/google/devtools/build/lib/analysis:repo_mapping_manifest_action",
"//src/main/java/com/google/devtools/build/lib/bazel/bzlmod:resolution",
"//src/main/java/com/google/devtools/build/lib/bazel/bzlmod:resolution_impl",
"//src/main/java/com/google/devtools/build/lib/bazel/repository:repository_options",
"//src/main/java/com/google/devtools/build/lib/skyframe:precomputed_value",
"//src/main/java/com/google/devtools/build/lib/skyframe:sky_functions",
"//src/main/java/com/google/devtools/build/lib/util",
"//src/main/java/com/google/devtools/build/lib/vfs",
"//src/main/java/com/google/devtools/build/skyframe",
"//src/main/java/com/google/devtools/build/skyframe:skyframe-objects",
"//src/main/java/net/starlark/java/eval",
"//src/test/java/com/google/devtools/build/lib/analysis/util",
"//src/test/java/com/google/devtools/build/lib/bazel/bzlmod:util",
"//third_party:guava",
Expand Down
Loading