Skip to content

Commit

Permalink
Implement combined disk and HTTP cache
Browse files Browse the repository at this point in the history
This PR adds the ability to enable disk and HTTP cache simultaneously. Specifically, if you set
```
build --remote_http_cache=http://some.remote.cache
build --disk_cache=/some/disk/cache
```
Then Bazel will look for cached items in the disk cache first. If an item is not found in the disk cache, it will be looked up in the HTTP cache. If it is found there, it will be copied into the disk cache. On put, Bazel will store items in both the disk and the HTTP cache.

We tested this change on a relatively large internal project and found that a fully cached, clean build was 5-10 times faster with the mixed cache (i.e. reading from disk cache) than with a pure remote cache, depending on the remote cache location.

cc @dajmaki @francesco-da

Closes #7512.

PiperOrigin-RevId: 237011131
  • Loading branch information
aherrmann authored and copybara-github committed Mar 6, 2019
1 parent 72f007d commit 76370d5
Show file tree
Hide file tree
Showing 5 changed files with 233 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -128,11 +128,6 @@ public void beforeCommand(CommandEnvironment env) throws AbruptExitException {

boolean enableRestCache = SimpleBlobStoreFactory.isRestUrlOptions(remoteOptions);
boolean enableDiskCache = SimpleBlobStoreFactory.isDiskCache(remoteOptions);
if (enableRestCache && enableDiskCache) {
throw new AbruptExitException(
"Cannot enable HTTP-based and local disk cache simultaneously",
ExitCode.COMMAND_LINE_ERROR);
}
boolean enableBlobStoreCache = enableRestCache || enableDiskCache;
boolean enableGrpcCache = GrpcRemoteCache.isRemoteCacheOptions(remoteOptions);
boolean enableRemoteExecution = shouldEnableRemoteExecution(remoteOptions);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import static com.google.common.base.Preconditions.checkNotNull;

import com.google.auth.Credentials;
import com.google.devtools.build.lib.remote.blobstore.CombinedDiskHttpBlobStore;
import com.google.devtools.build.lib.remote.blobstore.OnDiskBlobStore;
import com.google.devtools.build.lib.remote.blobstore.SimpleBlobStore;
import com.google.devtools.build.lib.remote.blobstore.http.HttpBlobStore;
Expand Down Expand Up @@ -68,9 +69,23 @@ public static SimpleBlobStore createDiskCache(Path workingDirectory, PathFragmen
return new OnDiskBlobStore(cacheDir);
}

public static SimpleBlobStore createCombinedCache(
Path workingDirectory, PathFragment diskCachePath, RemoteOptions options, Credentials cred)
throws IOException {
Path cacheDir = workingDirectory.getRelative(checkNotNull(diskCachePath));
if (!cacheDir.exists()) {
cacheDir.createDirectoryAndParents();
}
return new CombinedDiskHttpBlobStore(cacheDir, createRest(options, cred));
}

public static SimpleBlobStore create(
RemoteOptions options, @Nullable Credentials creds, @Nullable Path workingDirectory)
throws IOException {

if (isRestUrlOptions(options) && isDiskCache(options)) {
return createCombinedCache(workingDirectory, options.diskCache, options, creds);
}
if (isRestUrlOptions(options)) {
return createRest(options, creds);
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
// Copyright 2019 The Bazel Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.devtools.build.lib.remote.blobstore;

import com.google.common.base.Preconditions;
import com.google.common.io.ByteStreams;
import com.google.common.util.concurrent.Futures;
import com.google.common.util.concurrent.ListenableFuture;
import com.google.common.util.concurrent.MoreExecutors;
import com.google.common.util.concurrent.SettableFuture;
import com.google.devtools.build.lib.vfs.Path;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.UUID;
import java.util.logging.Level;
import java.util.logging.Logger;

/**
* A {@link SimpleBlobStore} implementation combining two blob stores. A local disk blob store and a
* remote http blob store. If a blob isn't found in the first store, the second store is used, and
* the blob added to the first. Put puts the blob on both stores.
*/
public final class CombinedDiskHttpBlobStore extends OnDiskBlobStore {
private static final Logger logger = Logger.getLogger(CombinedDiskHttpBlobStore.class.getName());
private final SimpleBlobStore bsHttp;

public CombinedDiskHttpBlobStore(Path root, SimpleBlobStore bsHttp) {
super(root);
this.bsHttp = Preconditions.checkNotNull(bsHttp);
}

@Override
public boolean containsKey(String key) {
// HTTP cache does not support containsKey.
// Don't support it here either for predictable semantics.
throw new UnsupportedOperationException("HTTP Caching does not use this method.");
}

@Override
public ListenableFuture<Boolean> get(String key, OutputStream out) {
boolean foundOnDisk = super.containsKey(key);

if (foundOnDisk) {
return super.get(key, out);
} else {
// Write a temporary file first, and then rename, to avoid data corruption in case of a crash.
Path temp = toPath(UUID.randomUUID().toString());

OutputStream tempOut;
try {
tempOut = temp.getOutputStream();
} catch (IOException e) {
return Futures.immediateFailedFuture(e);
}
ListenableFuture<Boolean> chained =
Futures.transformAsync(
bsHttp.get(key, tempOut),
(found) -> {
if (!found) {
return Futures.immediateFuture(false);
} else {
Path target = toPath(key);
// The following note and line is taken from OnDiskBlobStore.java
// TODO(ulfjack): Fsync temp here before we rename it to avoid data loss in the
// case of machine
// crashes (the OS may reorder the writes and the rename).
temp.renameTo(target);

SettableFuture<Boolean> f = SettableFuture.create();
try (InputStream in = target.getInputStream()) {
ByteStreams.copy(in, out);
f.set(true);
} catch (IOException e) {
f.setException(e);
}
return f;
}
},
MoreExecutors.directExecutor());
chained.addListener(
() -> {
try {
tempOut.close();
} catch (IOException e) {
// not sure what to do here, we either are here because of another exception being
// thrown,
// or we have successfully used the file we are trying (and failing) to close
logger.log(Level.WARNING, "Failed to close temporary file on get", e);
}
},
MoreExecutors.directExecutor());
return chained;
}
}

@Override
public boolean getActionResult(String key, OutputStream out)
throws IOException, InterruptedException {
if (super.getActionResult(key, out)) {
return true;
}

try (ByteArrayOutputStream tmpOut = new ByteArrayOutputStream()) {
if (bsHttp.getActionResult(key, tmpOut)) {
byte[] tmp = tmpOut.toByteArray();
super.putActionResult(key, tmp);
ByteStreams.copy(new ByteArrayInputStream(tmp), out);
return true;
}
}

return false;
}

@Override
public void put(String key, long length, InputStream in)
throws IOException, InterruptedException {
super.put(key, length, in);
try (InputStream inFile = toPath(key).getInputStream()) {
bsHttp.put(key, length, inFile);
}
}

@Override
public void putActionResult(String key, byte[] in) throws IOException, InterruptedException {
super.putActionResult(key, in);
bsHttp.putActionResult(key, in);
}

@Override
public void close() {
super.close();
bsHttp.close();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
import java.util.UUID;

/** A on-disk store for the remote action cache. */
public final class OnDiskBlobStore implements SimpleBlobStore {
public class OnDiskBlobStore implements SimpleBlobStore {
private final Path root;
static final String ACTION_KEY_PREFIX = "ac_";

Expand Down Expand Up @@ -63,7 +63,8 @@ public boolean getActionResult(String key, OutputStream out)
}

@Override
public void put(String key, long length, InputStream in) throws IOException {
public void put(String key, long length, InputStream in)
throws IOException, InterruptedException {
Path target = toPath(key);
if (target.exists()) {
return;
Expand All @@ -87,7 +88,7 @@ public void putActionResult(String key, byte[] in) throws IOException, Interrupt
@Override
public void close() {}

private Path toPath(String key) {
protected Path toPath(String key) {
return root.getChild(key);
}
}
65 changes: 65 additions & 0 deletions src/test/shell/bazel/remote/remote_execution_http_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -341,4 +341,69 @@ EOF
|| fail "Failed to build //a:gen1 without remote cache"
}

function test_genrule_combined_disk_http_cache() {
# Test for the combined disk and http cache.
# Built items should be pushed to both the disk and http cache.
# If an item is missing on disk cache, but present on http cache,
# then bazel should copy it from http cache to disk cache on fetch.

local cache="${TEST_TMPDIR}/cache"
local disk_flags="--disk_cache=$cache"
local http_flags="--remote_http_cache=http://localhost:${http_port}"

mkdir -p a
cat > a/BUILD <<EOF
package(default_visibility = ["//visibility:public"])
genrule(
name = 'test',
cmd = 'echo "Hello world" > \$@',
outs = [ 'test.txt' ],
)
EOF
rm -rf $cache
mkdir $cache

# Build and push to disk and http cache
bazel build $disk_flags $http_flags //a:test \
|| fail "Failed to build //a:test with combined disk http cache"
cp -f bazel-genfiles/a/test.txt ${TEST_TMPDIR}/test_expected

# Fetch from disk cache
bazel clean
bazel build $disk_flags //a:test &> $TEST_log \
|| fail "Failed to fetch //a:test from disk cache"
expect_log "1 remote cache hit"
diff bazel-genfiles/a/test.txt ${TEST_TMPDIR}/test_expected \
|| fail "Disk cache generated different result"

# Fetch from http cache
bazel clean
bazel build $http_flags //a:test &> $TEST_log \
|| fail "Failed to fetch //a:test from http cache"
expect_log "1 remote cache hit"
diff bazel-genfiles/a/test.txt ${TEST_TMPDIR}/test_expected \
|| fail "HTTP cache generated different result"

rm -rf $cache
mkdir $cache

# Copy from http cache to disk cache
bazel clean
bazel build $disk_flags $http_flags //a:test &> $TEST_log \
|| fail "Failed to copy //a:test from http cache to disk cache"
expect_log "1 remote cache hit"
diff bazel-genfiles/a/test.txt ${TEST_TMPDIR}/test_expected \
|| fail "HTTP cache generated different result"

# Fetch from disk cache
bazel clean
bazel build $disk_flags //a:test &> $TEST_log \
|| fail "Failed to fetch //a:test from disk cache"
expect_log "1 remote cache hit"
diff bazel-genfiles/a/test.txt ${TEST_TMPDIR}/test_expected \
|| fail "Disk cache generated different result"

rm -rf $cache
}

run_suite "Remote execution and remote cache tests"

0 comments on commit 76370d5

Please sign in to comment.