Skip to content

Commit

Permalink
UnixFileSystem: read cached hashes from extended attributes
Browse files Browse the repository at this point in the history
There are certain workloads where Bazel's running time gets dominated by
checksum computation. Examples include:

- People adding local_repository()s to their project that point to
  networked file shares.
- The use of repositories that contain very large input files.

When using remote execution, we need to compute digests to be able to
place such files in input roots. In many cases, a centralized CAS will
already contain these files. It would be nice if Bazel could efficiently
check for existence of such objects without needing to scan the file
locally.

This change extends UnixFileSystem to call getxattr() on an attribute
prior to falling back to reading file contents. The name of the extended
attribute that is used is configurable through a command line flag.

Using extended attributes to store this information also seems to be a
fairly common approach. Apparently it is also used within Google itself:

https://groups.google.com/g/bazel-discuss/c/6VmjSOLySnY/m/v2dpwt8jBgAJ

So far no code has been added to let Bazel write these attributes to
disk. The main goal so far is to speed up access to read-only corpora,
where the maintainers have spent the effort adding these attributes.
  • Loading branch information
EdSchouten committed Jul 27, 2020
1 parent e6cce76 commit 1034111
Show file tree
Hide file tree
Showing 23 changed files with 187 additions and 19 deletions.
3 changes: 3 additions & 0 deletions src/main/cpp/blaze.cc
Original file line number Diff line number Diff line change
Expand Up @@ -485,6 +485,9 @@ static vector<string> GetServerExeArgs(const blaze_util::Path &jvm_path,
// being "null" to set the programmatic default in the server.
result.push_back("--digest_function=" + startup_options.digest_function);
}
if (!startup_options.unix_digest_hash_attribute_name.empty()) {
result.push_back("--unix_digest_hash_attribute_name=" + startup_options.unix_digest_hash_attribute_name);
}
if (startup_options.idle_server_tasks) {
result.push_back("--idle_server_tasks");
} else {
Expand Down
5 changes: 5 additions & 0 deletions src/main/cpp/startup_options.cc
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,7 @@ StartupOptions::StartupOptions(const string &product_name,
RegisterUnaryStartupFlag("command_port");
RegisterUnaryStartupFlag("connect_timeout_secs");
RegisterUnaryStartupFlag("digest_function");
RegisterUnaryStartupFlag("unix_digest_hash_attribute_name");
RegisterUnaryStartupFlag("server_javabase");
RegisterUnaryStartupFlag("host_jvm_args");
RegisterUnaryStartupFlag("host_jvm_profile");
Expand Down Expand Up @@ -359,6 +360,10 @@ blaze_exit_code::ExitCode StartupOptions::ProcessArg(
NULL) {
digest_function = value;
option_sources["digest_function"] = rcfile;
} else if ((value = GetUnaryOption(arg, next_arg, "--unix_digest_hash_attribute_name")) !=
NULL) {
unix_digest_hash_attribute_name = value;
option_sources["unix_digest_hash_attribute_name"] = rcfile;
} else if ((value = GetUnaryOption(arg, next_arg, "--command_port")) !=
NULL) {
if (!blaze_util::safe_strto32(value, &command_port) ||
Expand Down
2 changes: 2 additions & 0 deletions src/main/cpp/startup_options.h
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,8 @@ class StartupOptions {
// The hash function to use when computing file digests.
std::string digest_function;

std::string unix_digest_hash_attribute_name;

bool idle_server_tasks;

// The startup options as received from the user and rc files, tagged with
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ public ModuleFileSystem getFileSystem(
throws DefaultHashFunctionNotSetException {
BlazeServerStartupOptions options = startupOptions.getOptions(BlazeServerStartupOptions.class);
boolean enableSymLinks = options != null && options.enableWindowsSymlinks;
String unixDigestHashAttributeName = options != null ? options.unixDigestHashAttributeName : "";
if ("0".equals(System.getProperty("io.bazel.EnableJni"))) {
// Ignore UnixFileSystem, to be used for bootstrapping.
return ModuleFileSystem.create(
Expand All @@ -101,6 +102,6 @@ public ModuleFileSystem getFileSystem(
return ModuleFileSystem.create(
OS.getCurrent() == OS.WINDOWS
? new WindowsFileSystem(enableSymLinks)
: new UnixFileSystem());
: new UnixFileSystem(unixDigestHashAttributeName));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -1109,7 +1109,7 @@ private static FileSystem defaultFileSystemImplementation(
// The JNI-based UnixFileSystem is faster, but on Windows it is not available.
return OS.getCurrent() == OS.WINDOWS
? new WindowsFileSystem(startupOptions.enableWindowsSymlinks)
: new UnixFileSystem();
: new UnixFileSystem(startupOptions.unixDigestHashAttributeName);
}

private static SubprocessFactory subprocessFactoryImplementation() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -482,4 +482,15 @@ public String getTypeDescription() {
+ "Requires Windows developer mode to be enabled and Windows 10 version 1703 or "
+ "greater.")
public boolean enableWindowsSymlinks;

@Option(
name = "unix_digest_hash_attribute_name",
defaultValue = "",
documentationCategory = OptionDocumentationCategory.UNDOCUMENTED,
effectTags = {OptionEffectTag.CHANGES_INPUTS, OptionEffectTag.LOSES_INCREMENTAL_STATE},
help =
"The name of an extended attribute that can be placed on files to store a precomputed "
+ "copy of the file's hash, corresponding with --digest_function. This option "
+ "can be used to reduce disk I/O and CPU load caused by hash computation.")
public String unixDigestHashAttributeName;
}
Original file line number Diff line number Diff line change
Expand Up @@ -46,10 +46,15 @@
@ThreadSafe
public class UnixFileSystem extends AbstractFileSystemWithCustomStat {

public UnixFileSystem() throws DefaultHashFunctionNotSetException {}
private final String hashAttributeName;

public UnixFileSystem(DigestHashFunction hashFunction) {
public UnixFileSystem(String hashAttributeName) throws DefaultHashFunctionNotSetException {
this.hashAttributeName = hashAttributeName;
}

public UnixFileSystem(DigestHashFunction hashFunction, String hashAttributeName) {
super(hashFunction);
this.hashAttributeName = hashAttributeName;
}

/**
Expand Down Expand Up @@ -407,6 +412,13 @@ public byte[] getxattr(Path path, String name, boolean followSymlinks) throws IO
}
}

@Override
protected byte[] getFastDigest(Path path) throws IOException {
// Attempt to obtain the digest from an extended attribute attached to the file. This prevents
// the checksum from being recomputed unnecessarily.
return hashAttributeName.isEmpty() ? null : getxattr(path, hashAttributeName, true);
}

@Override
protected byte[] getDigest(Path path) throws IOException {
String name = path.toString();
Expand Down
2 changes: 2 additions & 0 deletions src/test/java/com/google/devtools/build/lib/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -195,12 +195,14 @@ java_test(
test_class = "com.google.devtools.build.lib.AllTests",
deps = [
":AllTests",
"//src/main/java/com/google/devtools/build/lib/actions",
"//src/main/java/com/google/devtools/build/lib/unix",
"//src/main/java/com/google/devtools/build/lib/util",
"//src/main/java/com/google/devtools/build/lib/util:os",
"//src/main/java/com/google/devtools/build/lib/util:string",
"//src/main/java/com/google/devtools/build/lib/vfs",
"//src/main/java/com/google/devtools/build/lib/vfs:pathfragment",
"//src/test/java/com/google/devtools/build/lib/vfs:testutil",
"//src/test/java/com/google/devtools/build/lib/events:testutil",
"//src/test/java/com/google/devtools/build/lib/testutil",
"//src/test/java/com/google/devtools/build/lib/testutil:JunitUtils",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ DecompressorDescriptor.Builder createDescriptorBuilder() throws IOException {
FileSystem testFS =
OS.getCurrent() == OS.WINDOWS
? new JavaIoFileSystem(DigestHashFunction.getDefaultUnchecked())
: new UnixFileSystem(DigestHashFunction.getDefaultUnchecked());
: new UnixFileSystem(DigestHashFunction.getDefaultUnchecked(), "");

// do not rely on TestConstants.JAVATESTS_ROOT end with slash, but ensure separators
// are not duplicated
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ public class LastBuildEventTest {

@Test
public void testForwardsReferencedLocalFilesCall() {
FileSystem fs = new UnixFileSystem(DigestHashFunction.getDefaultUnchecked());
FileSystem fs = new UnixFileSystem(DigestHashFunction.getDefaultUnchecked(), "");
LocalFile localFile = new LocalFile(fs.getPath("/some/file"), LocalFileType.FAILED_TEST_OUTPUT);
LastBuildEvent event = new LastBuildEvent(new BuildEvent() {
@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ protected boolean realFileSystem() {

@Override
protected FileSystem createFileSystem() {
return new UnixFileSystem(DigestHashFunction.getDefaultUnchecked()) {
return new UnixFileSystem(DigestHashFunction.getDefaultUnchecked(), "") {
boolean threwException = false;

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ protected void setListener(FileListener listener) {
@Override
protected FileSystem createFileSystem() {
setListener(DUMMY_LISTENER);
return new UnixFileSystem(DigestHashFunction.getDefaultUnchecked()) {
return new UnixFileSystem(DigestHashFunction.getDefaultUnchecked(), "") {
@Override
protected void chmod(Path path, int chmod) throws IOException {
listener.get().handle(PathOp.CHMOD, path);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ protected boolean realFileSystem() {

@Override
protected FileSystem createFileSystem() {
return new UnixFileSystem(DigestHashFunction.getDefaultUnchecked()) {
return new UnixFileSystem(DigestHashFunction.getDefaultUnchecked(), "") {

private void recordAccess(PathOp op, Path path) {
if (receiver != null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -882,7 +882,7 @@ public void hasExecutionStatistics_whenOptionIsEnabled() throws Exception {
// TODO(b/62588075) Currently no process-wrapper or execution statistics support in Windows.
assumeTrue(OS.getCurrent() != OS.WINDOWS);

FileSystem fs = new UnixFileSystem(DigestHashFunction.getDefaultUnchecked());
FileSystem fs = new UnixFileSystem(DigestHashFunction.getDefaultUnchecked(), "");

LocalExecutionOptions options = Options.getDefaults(LocalExecutionOptions.class);
options.collectLocalExecutionStatistics = true;
Expand Down Expand Up @@ -956,7 +956,7 @@ public void hasNoExecutionStatistics_whenOptionIsDisabled() throws Exception {
// TODO(b/62588075) Currently no process-wrapper or execution statistics support in Windows.
assumeTrue(OS.getCurrent() != OS.WINDOWS);

FileSystem fs = new UnixFileSystem(DigestHashFunction.getDefaultUnchecked());
FileSystem fs = new UnixFileSystem(DigestHashFunction.getDefaultUnchecked(), "");

LocalExecutionOptions options = Options.getDefaults(LocalExecutionOptions.class);
options.collectLocalExecutionStatistics = false;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ public final class CommandUsingLinuxSandboxTest {

@Before
public final void createFileSystem() throws Exception {
testFS = new UnixFileSystem(DigestHashFunction.getDefaultUnchecked());
testFS = new UnixFileSystem(DigestHashFunction.getDefaultUnchecked(), "");
runfilesDir = testFS.getPath(BlazeTestUtils.runfilesDir());
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ public final class CommandUsingProcessWrapperTest {

@Before
public final void createFileSystem() throws Exception {
testFS = new UnixFileSystem(DigestHashFunction.getDefaultUnchecked());
testFS = new UnixFileSystem(DigestHashFunction.getDefaultUnchecked(), "");
}

private ProcessWrapper getProcessWrapper() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ public class NativePosixFilesTest {

@Before
public final void createFileSystem() throws Exception {
testFS = new UnixFileSystem(DigestHashFunction.getDefaultUnchecked());
testFS = new UnixFileSystem(DigestHashFunction.getDefaultUnchecked(), "");
workingDir = testFS.getPath(new File(TestUtils.tmpDir()).getCanonicalPath());
testFile = workingDir.getRelative("test");
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
// Copyright 2014 The Bazel Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.devtools.build.lib.unix;

import static com.google.common.truth.Truth.assertThat;

import com.google.devtools.build.lib.actions.cache.DigestUtils;
import com.google.devtools.build.lib.vfs.DigestHashFunction;
import com.google.devtools.build.lib.vfs.FileSystem;
import com.google.devtools.build.lib.vfs.FileSystemTest;
import com.google.devtools.build.lib.vfs.Path;
import java.io.IOException;
import org.junit.Test;

/** Test for {@link com.google.devtools.build.lib.unix.UnixFileSystem#getFastDigest()}. */
public class UnixDigestHashAttributeNameTest extends FileSystemTest {
private static final byte FAKE_DIGEST[] = {
0x18, 0x5f, 0x3d, 0x33, 0x22, 0x71, 0x7e, 0x25,
0x55, 0x61, 0x26, 0x0c, 0x03, 0x6b, 0x2e, 0x26,
0x43, 0x06, 0x7c, 0x30, 0x4e, 0x3a, 0x51, 0x20,
0x07, 0x71, 0x76, 0x48, 0x26, 0x38, 0x19, 0x69,
};

@Override
protected FileSystem getFreshFileSystem(DigestHashFunction digestHashFunction) {
return new FakeAttributeFileSystem(digestHashFunction);
}

@Test
public void testFoo() throws Exception {
// Instead of actually trying to access this file, a call to getxattr() should be made. We
// intercept this call and return a fake extended attribute value, thereby causing the checksum
// computation to be skipped entirely.
assertThat(DigestUtils.getDigestWithManualFallback(absolutize("myfile"), 123))
.isEqualTo(FAKE_DIGEST);
}

private class FakeAttributeFileSystem extends UnixFileSystem {
public FakeAttributeFileSystem(DigestHashFunction hashFunction) {
super(hashFunction, "user.checksum.sha256");
}

@Override
public byte[] getxattr(Path path, String name, boolean followSymlinks) throws IOException {
assertThat(path).isEqualTo(absolutize("myfile"));
assertThat(name).isEqualTo("user.checksum.sha256");
assertThat(followSymlinks).isEqualTo(true);
return FAKE_DIGEST;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ public class UnixFileSystemTest extends SymlinkAwareFileSystemTest {

@Override
protected FileSystem getFreshFileSystem(DigestHashFunction digestHashFunction) {
return new UnixFileSystem(digestHashFunction);
return new UnixFileSystem(digestHashFunction, "");
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,8 @@ public class UnixPathEqualityTest {

@Before
public final void initializeFileSystem() throws Exception {
unixFs = new UnixFileSystem(DigestHashFunction.getDefaultUnchecked());
otherUnixFs = new UnixFileSystem(DigestHashFunction.getDefaultUnchecked());
unixFs = new UnixFileSystem(DigestHashFunction.getDefaultUnchecked(), "");
otherUnixFs = new UnixFileSystem(DigestHashFunction.getDefaultUnchecked(), "");
assertThat(unixFs != otherUnixFs).isTrue();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@ public static FileSystem getNativeFileSystem() {
try {
return Class.forName(TestConstants.TEST_REAL_UNIX_FILE_SYSTEM)
.asSubclass(FileSystem.class)
.getDeclaredConstructor(DigestHashFunction.class)
.newInstance(DigestHashFunction.getDefaultUnchecked());
.getDeclaredConstructor(DigestHashFunction.class, String.class)
.newInstance(DigestHashFunction.getDefaultUnchecked(), "");
} catch (Exception e) {
throw new IllegalStateException(e);
}
Expand Down
7 changes: 7 additions & 0 deletions src/test/shell/bazel/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -1282,3 +1282,10 @@ sh_test(
data = [":test-deps"],
deps = ["@bazel_tools//tools/bash/runfiles"],
)

sh_test(
name = "unix_digest_hash_attribute_name_test",
srcs = ["unix_digest_hash_attribute_name_test.sh"],
data = [":test-deps"],
deps = ["@bazel_tools//tools/bash/runfiles"],
)
63 changes: 63 additions & 0 deletions src/test/shell/bazel/unix_digest_hash_attribute_name_test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
#!/bin/bash
#
# Copyright 2020 The Bazel Authors. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# --- begin runfiles.bash initialization ---
# Copy-pasted from Bazel's Bash runfiles library (tools/bash/runfiles/runfiles.bash).
set -euo pipefail
if [[ ! -d "${RUNFILES_DIR:-/dev/null}" && ! -f "${RUNFILES_MANIFEST_FILE:-/dev/null}" ]]; then
if [[ -f "$0.runfiles_manifest" ]]; then
export RUNFILES_MANIFEST_FILE="$0.runfiles_manifest"
elif [[ -f "$0.runfiles/MANIFEST" ]]; then
export RUNFILES_MANIFEST_FILE="$0.runfiles/MANIFEST"
elif [[ -f "$0.runfiles/bazel_tools/tools/bash/runfiles/runfiles.bash" ]]; then
export RUNFILES_DIR="$0.runfiles"
fi
fi
if [[ -f "${RUNFILES_DIR:-/dev/null}/bazel_tools/tools/bash/runfiles/runfiles.bash" ]]; then
source "${RUNFILES_DIR}/bazel_tools/tools/bash/runfiles/runfiles.bash"
elif [[ -f "${RUNFILES_MANIFEST_FILE:-/dev/null}" ]]; then
source "$(grep -m1 "^bazel_tools/tools/bash/runfiles/runfiles.bash " \
"$RUNFILES_MANIFEST_FILE" | cut -d ' ' -f 2-)"
else
echo >&2 "ERROR: cannot find @bazel_tools//tools/bash/runfiles:runfiles.bash"
exit 1
fi
# --- end runfiles.bash initialization ---

source "$(rlocation "io_bazel/src/test/shell/integration_test_setup.sh")" \
|| { echo "integration_test_setup.sh not found!" >&2; exit 1; }

function test_xattr_operations_in_profile_log {
touch WORKSPACE
cat > BUILD << 'EOF'
genrule(
name = "foo",
outs = ["foo.out"],
cmd = "touch $@",
)
EOF

bazel \
--unix_digest_hash_attribute_name=user.checksum.sha256 \
build \
--profile=profile_log \
--record_full_profiler_data \
//:foo || fail "Build failed"
grep -q "VFS xattr.*BUILD" profile_log || \
fail "Bazel did not perform getxattr() calls"
}

run_suite "Integration tests for --unix_digest_hash_attribute_name"

0 comments on commit 1034111

Please sign in to comment.