Skip to content

Commit

Permalink
UnixFileSystem: read cached hashes from extended attributes
Browse files Browse the repository at this point in the history
There are certain workloads where Bazel's running time gets dominated by
checksum computation. Examples include:

- People adding local_repository()s to their project that point to
  networked file shares.
- The use of repositories that contain very large input files.

When using remote execution, we need to compute digests to be able to
place such files in input roots. In many cases, a centralized CAS will
already contain these files. It would be nice if Bazel could efficiently
check for existence of such objects without needing to scan the file
locally.

This change extends UnixFileSystem to call getxattr() on attribute
~~"user.checksum.${algo}" prior to falling back to reading file contents.~~
~~There is no true standard on how these extended attributes should be~~
~~called, but "user.checksum.${algo}" already has some precedent. It is,~~
~~for example, used by BuildGrid internally:~~

~~https://gitlab.com/BuildGrid/buildbox/buildbox-fuse/-/merge_requests/9~~

**EDIT:** The name of the extended attribute is now configurable.

Using extended attributes to store this information also seems to be a
fairly common approach. Apparently it is also used within Google itself:

https://groups.google.com/g/bazel-discuss/c/6VmjSOLySnY/m/v2dpwt8jBgAJ

So far no code has been added to let Bazel write these attributes to
disk. The main goal so far is to speed up access to read-only corpora,
where the maintainers have spent the effort adding these attributes.

Closes bazelbuild#11662.

(@janakdr made some modifications from the original pull request, mainly to
deal with merge conflicts and address Google-internal style.)

PiperOrigin-RevId: 332256967
  • Loading branch information
EdSchouten authored and Yannic committed Oct 5, 2020
1 parent c192a6b commit ebc324f
Show file tree
Hide file tree
Showing 27 changed files with 331 additions and 25 deletions.
4 changes: 4 additions & 0 deletions src/main/cpp/blaze.cc
Original file line number Diff line number Diff line change
Expand Up @@ -485,6 +485,10 @@ static vector<string> GetServerExeArgs(const blaze_util::Path &jvm_path,
// being "null" to set the programmatic default in the server.
result.push_back("--digest_function=" + startup_options.digest_function);
}
if (!startup_options.unix_digest_hash_attribute_name.empty()) {
result.push_back("--unix_digest_hash_attribute_name=" +
startup_options.unix_digest_hash_attribute_name);
}
if (startup_options.idle_server_tasks) {
result.push_back("--idle_server_tasks");
} else {
Expand Down
6 changes: 6 additions & 0 deletions src/main/cpp/startup_options.cc
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,7 @@ StartupOptions::StartupOptions(const string &product_name,
RegisterUnaryStartupFlag("command_port");
RegisterUnaryStartupFlag("connect_timeout_secs");
RegisterUnaryStartupFlag("digest_function");
RegisterUnaryStartupFlag("unix_digest_hash_attribute_name");
RegisterUnaryStartupFlag("server_javabase");
RegisterUnaryStartupFlag("host_jvm_args");
RegisterUnaryStartupFlag("host_jvm_profile");
Expand Down Expand Up @@ -359,6 +360,11 @@ blaze_exit_code::ExitCode StartupOptions::ProcessArg(
NULL) {
digest_function = value;
option_sources["digest_function"] = rcfile;
} else if ((value = GetUnaryOption(arg, next_arg,
"--unix_digest_hash_attribute_name")) !=
NULL) {
unix_digest_hash_attribute_name = value;
option_sources["unix_digest_hash_attribute_name"] = rcfile;
} else if ((value = GetUnaryOption(arg, next_arg, "--command_port")) !=
NULL) {
if (!blaze_util::safe_strto32(value, &command_port) ||
Expand Down
2 changes: 2 additions & 0 deletions src/main/cpp/startup_options.h
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,8 @@ class StartupOptions {
// The hash function to use when computing file digests.
std::string digest_function;

std::string unix_digest_hash_attribute_name;

bool idle_server_tasks;

// The startup options as received from the user and rc files, tagged with
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,6 @@ public ModuleFileSystem getFileSystem(
return ModuleFileSystem.create(
OS.getCurrent() == OS.WINDOWS
? new WindowsFileSystem(digestHashFunction, enableSymLinks)
: new UnixFileSystem(digestHashFunction));
: new UnixFileSystem(digestHashFunction, options.unixDigestHashAttributeName));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -482,4 +482,17 @@ public String getTypeDescription() {
+ "Requires Windows developer mode to be enabled and Windows 10 version 1703 or "
+ "greater.")
public boolean enableWindowsSymlinks;

@Option(
name = "unix_digest_hash_attribute_name",
defaultValue = "",
documentationCategory = OptionDocumentationCategory.UNDOCUMENTED,
effectTags = {OptionEffectTag.CHANGES_INPUTS, OptionEffectTag.LOSES_INCREMENTAL_STATE},
help =
"The name of an extended attribute that can be placed on files to store a precomputed "
+ "copy of the file's hash, corresponding with --digest_function. This option "
+ "can be used to reduce disk I/O and CPU load caused by hash computation. This "
+ "extended attribute is checked on all source files and output files, meaning "
+ "that it causes a significant number of invocations of the getxattr() system call.")
public String unixDigestHashAttributeName;
}
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,11 @@
*/
@ThreadSafe
public class UnixFileSystem extends AbstractFileSystemWithCustomStat {
public UnixFileSystem(DigestHashFunction hashFunction) {
protected final String hashAttributeName;

public UnixFileSystem(DigestHashFunction hashFunction, String hashAttributeName) {
super(hashFunction);
this.hashAttributeName = hashAttributeName;
}

/**
Expand Down Expand Up @@ -403,6 +406,13 @@ public byte[] getxattr(Path path, String name, boolean followSymlinks) throws IO
}
}

@Override
protected byte[] getFastDigest(Path path) throws IOException {
// Attempt to obtain the digest from an extended attribute attached to the file. This prevents
// the checksum from being recomputed unnecessarily.
return hashAttributeName.isEmpty() ? null : getxattr(path, hashAttributeName, true);
}

@Override
protected byte[] getDigest(Path path) throws IOException {
String name = path.toString();
Expand Down
3 changes: 3 additions & 0 deletions src/test/java/com/google/devtools/build/lib/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,8 @@ java_test(
test_class = "com.google.devtools.build.lib.AllTests",
deps = [
":AllTests",
"//src/main/java/com/google/devtools/build/lib/actions",
"//src/main/java/com/google/devtools/build/lib/actions:file_metadata",
"//src/main/java/com/google/devtools/build/lib/unix",
"//src/main/java/com/google/devtools/build/lib/util",
"//src/main/java/com/google/devtools/build/lib/util:os",
Expand All @@ -211,6 +213,7 @@ java_test(
"//src/test/java/com/google/devtools/build/lib/testutil:JunitUtils",
"//src/test/java/com/google/devtools/build/lib/testutil:TestUtils",
"//src/test/java/com/google/devtools/build/lib/vfs:SymlinkAwareFileSystemTest",
"//src/test/java/com/google/devtools/build/lib/vfs:testutil",
"//src/test/java/com/google/devtools/build/lib/vfs/util",
"//third_party:guava",
"//third_party:guava-testlib",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ DecompressorDescriptor.Builder createDescriptorBuilder() throws IOException {
FileSystem testFS =
OS.getCurrent() == OS.WINDOWS
? new JavaIoFileSystem(DigestHashFunction.SHA256)
: new UnixFileSystem(DigestHashFunction.SHA256);
: new UnixFileSystem(DigestHashFunction.SHA256, /*hashAttributeName=*/ "");

// do not rely on TestConstants.JAVATESTS_ROOT end with slash, but ensure separators
// are not duplicated
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ public class LastBuildEventTest {

@Test
public void testForwardsReferencedLocalFilesCall() {
FileSystem fs = new UnixFileSystem(DigestHashFunction.SHA256);
FileSystem fs = new UnixFileSystem(DigestHashFunction.SHA256, /*hashAttributeName=*/ "");
LocalFile localFile = new LocalFile(fs.getPath("/some/file"), LocalFileType.FAILED_TEST_OUTPUT);
LastBuildEvent event = new LastBuildEvent(new BuildEvent() {
@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,7 @@ private static class CustomRealFilesystem extends UnixFileSystem {
private final Set<String> createDirectoryErrorNames = new HashSet<>();

private CustomRealFilesystem() {
super(DigestHashFunction.SHA256);
super(DigestHashFunction.SHA256, /*hashAttributeName=*/ "");
}

void alwaysError(Path path) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ protected boolean realFileSystem() {

@Override
protected FileSystem createFileSystem() {
return new UnixFileSystem(DigestHashFunction.SHA256) {
return new UnixFileSystem(DigestHashFunction.SHA256, /*hashAttributeName=*/ "") {
boolean threwException = false;

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ protected void setListener(FileListener listener) {
@Override
protected FileSystem createFileSystem() {
setListener(DUMMY_LISTENER);
return new UnixFileSystem(GoogleHashFunctions.PSHA2) {
return new UnixFileSystem(GoogleHashFunctions.PSHA2, /*hashAttributeName=*/ "") {
@Override
protected void chmod(Path path, int chmod) throws IOException {
listener.get().handle(PathOp.CHMOD, path);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,7 @@ protected boolean realFileSystem() {

@Override
protected FileSystem createFileSystem() {
return new UnixFileSystem(DigestHashFunction.SHA256) {

return new UnixFileSystem(DigestHashFunction.SHA256, /*hashAttributeName=*/ "") {
private void recordAccess(PathOp op, Path path) {
if (receiver != null) {
receiver.accept(path, op);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -884,7 +884,7 @@ public void hasExecutionStatistics_whenOptionIsEnabled() throws Exception {
// TODO(b/62588075) Currently no process-wrapper or execution statistics support in Windows.
assumeTrue(OS.getCurrent() != OS.WINDOWS);

FileSystem fs = new UnixFileSystem(DigestHashFunction.SHA256);
FileSystem fs = new UnixFileSystem(DigestHashFunction.SHA256, /*hashAttributeName=*/ "");

LocalExecutionOptions options = Options.getDefaults(LocalExecutionOptions.class);
options.collectLocalExecutionStatistics = true;
Expand Down Expand Up @@ -956,7 +956,7 @@ public void hasNoExecutionStatistics_whenOptionIsDisabled() throws Exception {
// TODO(b/62588075) Currently no process-wrapper or execution statistics support in Windows.
assumeTrue(OS.getCurrent() != OS.WINDOWS);

FileSystem fs = new UnixFileSystem(DigestHashFunction.SHA256);
FileSystem fs = new UnixFileSystem(DigestHashFunction.SHA256, /*hashAttributeName=*/ "");

LocalExecutionOptions options = Options.getDefaults(LocalExecutionOptions.class);
options.collectLocalExecutionStatistics = false;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@ public final class CommandUsingLinuxSandboxTest {
private Path runfilesDir;

@Before
public final void createFileSystem() throws Exception {
testFS = new UnixFileSystem(DigestHashFunction.SHA256);
public final void createFileSystem() {
testFS = new UnixFileSystem(DigestHashFunction.SHA256, /*hashAttributeName=*/ "");
runfilesDir = testFS.getPath(BlazeTestUtils.runfilesDir());
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,8 @@ public final class CommandUsingProcessWrapperTest {
private FileSystem testFS;

@Before
public final void createFileSystem() throws Exception {
testFS = new UnixFileSystem(DigestHashFunction.SHA256);
public final void createFileSystem() {
testFS = new UnixFileSystem(DigestHashFunction.SHA256, /*hashAttributeName=*/ "");
}

private ProcessWrapper getProcessWrapper() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ private TestConstants() {
"com.google.devtools.build.lib.bazel.rules.BazelStrategyModule";
public static final String TEST_REAL_UNIX_FILE_SYSTEM =
"com.google.devtools.build.lib.unix.UnixFileSystem";
public static final String TEST_UNIX_HASH_ATTRIBUTE = "";

public static final ImmutableList<String> IGNORED_MESSAGE_PREFIXES = ImmutableList.<String>of();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,13 +36,13 @@
/** Tests for the {@link NativePosixFiles} class. */
@RunWith(JUnit4.class)
public class NativePosixFilesTest {
private FileSystem testFS;

private Path workingDir;
private Path testFile;

@Before
public final void createFileSystem() throws Exception {
testFS = new UnixFileSystem(DigestHashFunction.SHA256);
FileSystem testFS = new UnixFileSystem(DigestHashFunction.SHA256, /*hashAttributeName=*/ "");
workingDir = testFS.getPath(new File(TestUtils.tmpDir()).getCanonicalPath());
testFile = workingDir.getRelative("test");
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
// Copyright 2014 The Bazel Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.devtools.build.lib.unix;

import static com.google.common.truth.Truth.assertThat;

import com.google.devtools.build.lib.vfs.DigestHashFunction;
import com.google.devtools.build.lib.vfs.DigestUtils;
import com.google.devtools.build.lib.vfs.FileSystem;
import com.google.devtools.build.lib.vfs.FileSystemTest;
import com.google.devtools.build.lib.vfs.Path;
import java.io.IOException;
import org.junit.Test;

/** Test for {@link com.google.devtools.build.lib.unix.UnixFileSystem#getFastDigest}. */
public class UnixDigestHashAttributeNameTest extends FileSystemTest {
private static final byte[] FAKE_DIGEST = {
0x18, 0x5f, 0x3d, 0x33, 0x22, 0x71, 0x7e, 0x25,
0x55, 0x61, 0x26, 0x0c, 0x03, 0x6b, 0x2e, 0x26,
0x43, 0x06, 0x7c, 0x30, 0x4e, 0x3a, 0x51, 0x20,
0x07, 0x71, 0x76, 0x48, 0x26, 0x38, 0x19, 0x69,
};

@Override
protected FileSystem getFreshFileSystem(DigestHashFunction digestHashFunction) {
return new FakeAttributeFileSystem(digestHashFunction);
}

@Test
public void testFoo() throws Exception {
// Instead of actually trying to access this file, a call to getxattr() should be made. We
// intercept this call and return a fake extended attribute value, thereby causing the checksum
// computation to be skipped entirely.
assertThat(DigestUtils.getDigestWithManualFallback(absolutize("myfile"), 123))
.isEqualTo(FAKE_DIGEST);
}

private class FakeAttributeFileSystem extends UnixFileSystem {
public FakeAttributeFileSystem(DigestHashFunction hashFunction) {
super(hashFunction, "user.checksum.sha256");
}

@Override
public byte[] getxattr(Path path, String name, boolean followSymlinks) throws IOException {
assertThat(path).isEqualTo(absolutize("myfile"));
assertThat(name).isEqualTo("user.checksum.sha256");
assertThat(followSymlinks).isTrue();
return FAKE_DIGEST;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ public class UnixFileSystemTest extends SymlinkAwareFileSystemTest {

@Override
protected FileSystem getFreshFileSystem(DigestHashFunction digestHashFunction) {
return new UnixFileSystem(digestHashFunction);
return new UnixFileSystem(digestHashFunction, /*hashAttributeName=*/ "");
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,9 @@ public class UnixPathEqualityTest {
private FileSystem unixFs;

@Before
public final void initializeFileSystem() throws Exception {
unixFs = new UnixFileSystem(DigestHashFunction.SHA256);
otherUnixFs = new UnixFileSystem(DigestHashFunction.SHA256);
public final void initializeFileSystem() {
unixFs = new UnixFileSystem(DigestHashFunction.SHA256, /*hashAttributeName=*/ "");
otherUnixFs = new UnixFileSystem(DigestHashFunction.SHA256, /*hashAttributeName=*/ "");
assertThat(unixFs != otherUnixFs).isTrue();
}

Expand Down
18 changes: 16 additions & 2 deletions src/test/java/com/google/devtools/build/lib/vfs/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -50,15 +50,14 @@ java_library(
],
exclude = ALL_WINDOWS_TESTS + [
"SymlinkAwareFileSystemTest.java",
"FileSystemTest.java",
],
),
deps = [
":SymlinkAwareFileSystemTest",
"//src/main/java/com/google/devtools/build/lib/clock",
"//src/main/java/com/google/devtools/build/lib/skyframe/serialization",
"//src/main/java/com/google/devtools/build/lib/skyframe/serialization/testutils",
"//src/main/java/com/google/devtools/build/lib/unix",
"//src/main/java/com/google/devtools/build/lib/util",
"//src/main/java/com/google/devtools/build/lib/vfs",
"//src/main/java/com/google/devtools/build/lib/vfs:pathfragment",
"//src/main/java/com/google/devtools/build/lib/vfs/inmemoryfs",
Expand All @@ -75,6 +74,21 @@ java_library(
],
)

java_library(
name = "FileSystemTest_lib",
srcs = ["FileSystemTest.java"],
deps = [
"//src/main/java/com/google/devtools/build/lib/unix",
"//src/main/java/com/google/devtools/build/lib/util",
"//src/main/java/com/google/devtools/build/lib/vfs",
"//src/main/java/com/google/devtools/build/lib/vfs:pathfragment",
"//src/test/java/com/google/devtools/build/lib/testutil:TestUtils",
"//third_party:guava",
"//third_party:junit4",
"//third_party:truth",
],
)

java_test(
name = "VfsTests",
size = "large",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@ public static FileSystem getNativeFileSystem(DigestHashFunction digestHashFuncti
try {
return Class.forName(TestConstants.TEST_REAL_UNIX_FILE_SYSTEM)
.asSubclass(FileSystem.class)
.getDeclaredConstructor(DigestHashFunction.class)
.newInstance(digestHashFunction);
.getDeclaredConstructor(DigestHashFunction.class, String.class)
.newInstance(digestHashFunction, TestConstants.TEST_UNIX_HASH_ATTRIBUTE);
} catch (Exception e) {
throw new IllegalStateException(e);
}
Expand Down
8 changes: 8 additions & 0 deletions src/test/shell/bazel/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -1319,3 +1319,11 @@ sh_test(
data = [":test-deps"],
deps = ["@bazel_tools//tools/bash/runfiles"],
)

sh_test(
name = "unix_digest_hash_attribute_name_test",
srcs = ["unix_digest_hash_attribute_name_test.sh"],
data = [":test-deps"],
tags = ["no_windows"],
deps = ["@bazel_tools//tools/bash/runfiles"],
)
13 changes: 13 additions & 0 deletions src/test/shell/bazel/remote/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -88,3 +88,16 @@ sh_test(
"@bazel_tools//tools/bash/runfiles",
],
)

sh_test(
name = "remote_execution_with_xattr_test",
size = "large",
timeout = "eternal",
srcs = ["remote_execution_with_xattr_test.sh"],
data = [
":remote_utils",
"//src/test/shell/bazel:test-deps",
"//src/tools/remote:worker",
"@bazel_tools//tools/bash/runfiles",
],
)
Loading

0 comments on commit ebc324f

Please sign in to comment.