From 27a5c74f4e716e0044a8407353244f0ed989e686 Mon Sep 17 00:00:00 2001 From: Googler Date: Mon, 29 Jun 2020 02:22:08 -0700 Subject: [PATCH] Populate SpawnMetrics with more metrics in WorkerSpawnRunner The extra metrics that we'll collect in `SpawnMetrics` (on top of total time that we already collect): * Setup time (e.g., prefetching inputs, setting up sandbox, etc.) * Queue time (how long it took to acquire a worker/resources) * Execution time of the worker (time between sending a request and receiving a response) * Time to process outputs * Number of inputs RELNOTES: Collect more performance metrics for worker execution. PiperOrigin-RevId: 318774173 --- .../build/lib/worker/WorkerSpawnRunner.java | 35 ++++++++++++++----- 1 file changed, 26 insertions(+), 9 deletions(-) diff --git a/src/main/java/com/google/devtools/build/lib/worker/WorkerSpawnRunner.java b/src/main/java/com/google/devtools/build/lib/worker/WorkerSpawnRunner.java index bdb9b1d76c1b15..2f767015607306 100644 --- a/src/main/java/com/google/devtools/build/lib/worker/WorkerSpawnRunner.java +++ b/src/main/java/com/google/devtools/build/lib/worker/WorkerSpawnRunner.java @@ -18,6 +18,7 @@ import com.google.common.base.MoreObjects; import com.google.common.base.Preconditions; +import com.google.common.base.Stopwatch; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Multimap; @@ -158,6 +159,8 @@ private SpawnResult actuallyExec(Spawn spawn, SpawnExecutionContext context) Code.NO_TOOLS); } + Instant startTime = Instant.now(); + runfilesTreeUpdater.updateRunfilesDirectory( execRoot, spawn.getRunfilesSupplier(), @@ -201,14 +204,18 @@ private SpawnResult actuallyExec(Spawn spawn, SpawnExecutionContext context) context.speculating(), Spawns.supportsMultiplexWorkers(spawn)); - Instant startTime = Instant.now(); + SpawnMetrics.Builder spawnMetrics = + SpawnMetrics.Builder.forWorkerExec() + .setInputFiles(inputFiles.getFiles().size() + inputFiles.getSymlinks().size()); WorkResponse response = - execInWorker(spawn, key, context, inputFiles, outputs, flagFiles, inputFileCache); - Duration wallTime = Duration.between(startTime, Instant.now()); + execInWorker( + spawn, key, context, inputFiles, outputs, flagFiles, inputFileCache, spawnMetrics); FileOutErr outErr = context.getFileOutErr(); response.getOutputBytes().writeTo(outErr.getErrorStream()); + Duration wallTime = Duration.between(startTime, Instant.now()); + int exitCode = response.getExitCode(); SpawnResult.Builder builder = new SpawnResult.Builder() @@ -216,11 +223,7 @@ private SpawnResult actuallyExec(Spawn spawn, SpawnExecutionContext context) .setExitCode(exitCode) .setStatus(exitCode == 0 ? Status.SUCCESS : Status.NON_ZERO_EXIT) .setWallTime(wallTime) - .setSpawnMetrics( - SpawnMetrics.Builder.forWorkerExec() - .setTotalTime(wallTime) - .setExecutionWallTime(wallTime) - .build()); + .setSpawnMetrics(spawnMetrics.setTotalTime(wallTime).build()); if (exitCode != 0) { builder.setFailureDetail( FailureDetail.newBuilder() @@ -337,7 +340,8 @@ private WorkResponse execInWorker( SandboxInputs inputFiles, SandboxOutputs outputs, List flagFiles, - MetadataProvider inputFileCache) + MetadataProvider inputFileCache, + SpawnMetrics.Builder spawnMetrics) throws InterruptedException, ExecException { Worker worker = null; WorkResponse response; @@ -345,6 +349,7 @@ private WorkResponse execInWorker( ActionExecutionMetadata owner = spawn.getResourceOwner(); try { + Stopwatch setupInputsStopwatch = Stopwatch.createStarted(); try { inputFiles.materializeVirtualInputs(execRoot); } catch (IOException e) { @@ -358,7 +363,9 @@ private WorkResponse execInWorker( String message = "IOException while prefetching for worker:"; throw createUserExecException(e, message, Code.PREFETCH_FAILURE); } + Duration setupInputsTime = setupInputsStopwatch.elapsed(); + Stopwatch queueStopwatch = Stopwatch.createStarted(); try { worker = workers.borrowObject(key); request = @@ -370,9 +377,15 @@ private WorkResponse execInWorker( try (ResourceHandle handle = resourceManager.acquireResources(owner, spawn.getLocalResources())) { + // We acquired a worker and resources -- mark that as queuing time. + spawnMetrics.setQueueTime(queueStopwatch.elapsed()); + context.report(ProgressStatus.EXECUTING, WorkerKey.makeWorkerTypeName(key.getProxied())); try { + // We consider `prepareExecution` to be also part of setup. + Stopwatch prepareExecutionStopwatch = Stopwatch.createStarted(); worker.prepareExecution(inputFiles, outputs, key.getWorkerFilesWithHashes().keySet()); + spawnMetrics.setSetupTime(setupInputsTime.plus(prepareExecutionStopwatch.elapsed())); } catch (IOException e) { String message = ErrorMessage.builder() @@ -384,6 +397,7 @@ private WorkResponse execInWorker( throw createUserExecException(message, Code.PREPARE_FAILURE); } + Stopwatch executionStopwatch = Stopwatch.createStarted(); try { worker.putRequest(request); } catch (IOException e) { @@ -419,6 +433,7 @@ private WorkResponse execInWorker( .toString(); throw createUserExecException(message, Code.PARSE_RESPONSE_FAILURE); } + spawnMetrics.setExecutionWallTime(executionStopwatch.elapsed()); } if (response == null) { @@ -433,8 +448,10 @@ private WorkResponse execInWorker( } try { + Stopwatch processOutputsStopwatch = Stopwatch.createStarted(); context.lockOutputFiles(); worker.finishExecution(execRoot); + spawnMetrics.setProcessOutputsTime(processOutputsStopwatch.elapsed()); } catch (IOException e) { String message = ErrorMessage.builder()