Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Let measure-complexity output the worst performing verification tasks by resource count #5631

Merged
50 changes: 46 additions & 4 deletions Source/DafnyDriver/Commands/MeasureComplexityCommand.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,20 @@
using System.CommandLine;
using System.Linq;
using System.Reactive.Subjects;
using System.Threading;
using System.Threading.Tasks;
using DafnyCore;
using DafnyDriver.Commands;
using Microsoft.Boogie;
using VC;

namespace Microsoft.Dafny;

static class MeasureComplexityCommand {
public static IEnumerable<Option> Options => new Option[] {
Iterations,
RandomSeed,
TopX,
VerifyCommand.FilterSymbol,
VerifyCommand.FilterPosition,
}.Concat(DafnyCommands.VerificationOptions).
Expand All @@ -24,12 +28,16 @@ static MeasureComplexityCommand() {

DooFile.RegisterNoChecksNeeded(Iterations, false);
DooFile.RegisterNoChecksNeeded(RandomSeed, false);
DooFile.RegisterNoChecksNeeded(TopX, false);
}

private static readonly Option<uint> TopX = new("--worst-amount", () => 10U,
$"Configures the amount of worst performing verification tasks that are reported.");

private static readonly Option<uint> RandomSeed = new("--random-seed", () => 0U,
$"Turn on randomization of the input that Dafny passes to the SMT solver and turn on randomization in the SMT solver itself. Certain Dafny proofs are complex in the sense that changes to the proof that preserve its meaning may cause its verification result to change. This option simulates meaning-preserving changes to the proofs without requiring the user to actually make those changes. The proof changes are renaming variables and reordering declarations in the SMT input passed to the solver, and setting solver options that have similar effects.");

private static readonly Option<uint> Iterations = new("--iterations", () => 10U,
private static readonly Option<uint> Iterations = new("--iterations", () => 1U,
$"Attempt to verify each proof n times with n random seeds, each seed derived from the previous one. {RandomSeed.Name} can be used to specify the first seed, which will otherwise be 0.") {
ArgumentHelpName = "n"
};
Expand Down Expand Up @@ -62,7 +70,7 @@ private static async Task<int> Execute(DafnyOptions options) {
// For error diagnostics, we should group duplicates and say how often they occur.
// Performance data of individual verification tasks (VCs) should be grouped by VcNum (the assertion batch).
VerifyCommand.ReportVerificationDiagnostics(compilation, verificationResults);
var summaryReported = VerifyCommand.ReportVerificationSummary(compilation, verificationResults);
var summaryReported = ReportResourceSummary(compilation, verificationResults);
var proofDependenciesReported = VerifyCommand.ReportProofDependencies(compilation, resolution, verificationResults);
var verificationResultsLogged = VerifyCommand.LogVerificationResults(compilation, resolution, verificationResults);

Expand All @@ -75,13 +83,47 @@ private static async Task<int> Execute(DafnyOptions options) {
return await compilation.GetAndReportExitCode();
}

public static async Task ReportResourceSummary(
CliCompilation cliCompilation,
IObservable<CanVerifyResult> verificationResults) {

PriorityQueue<VerificationTaskResult, int> worstPerformers = new();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perfect place to use a priority queue!


var totalResources = 0;
var worstAmount = cliCompilation.Options.Get(TopX);
verificationResults.Subscribe(result => {
foreach (var taskResult in result.Results) {
var runResult = taskResult.Result;
totalResources += runResult.ResourceCount;
worstPerformers.Enqueue(taskResult, runResult.ResourceCount);
if (worstPerformers.Count > worstAmount) {
worstPerformers.Dequeue();
}
}
});
await verificationResults.WaitForComplete();
var output = cliCompilation.Options.OutputWriter;
var decreasingWorst = new Stack<VerificationTaskResult>();
while (worstPerformers.Count > 0) {
decreasingWorst.Push(worstPerformers.Dequeue());
}

await output.WriteLineAsync($"The total consumed resources are {totalResources}");
await output.WriteLineAsync($"The most demanding {worstAmount} verification tasks consumed these resources:");
foreach (var performer in decreasingWorst) {
var location = ((IToken)performer.Task.Token).TokenToString(cliCompilation.Options);
await output.WriteLineAsync($"{location}: {performer.Result.ResourceCount}");
}
}

private static async Task RunVerificationIterations(DafnyOptions options, CliCompilation compilation,
IObserver<CanVerifyResult> verificationResultsObserver) {
int iterationSeed = (int)options.Get(RandomSeed);
var random = new Random(iterationSeed);
foreach (var iteration in Enumerable.Range(0, (int)options.Get(Iterations))) {
var iterations = (int)options.Get(Iterations);
foreach (var iteration in Enumerable.Range(0, iterations)) {
await options.OutputWriter.WriteLineAsync(
$"Starting verification of iteration {iteration} with seed {iterationSeed}");
$"Starting verification of iteration {iteration + 1}/{iterations} with seed {iterationSeed}");
try {
await foreach (var result in compilation.VerifyAllLazily(iterationSeed)) {
verificationResultsObserver.OnNext(result);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
// RUN: %exits-with 4 %baredafny measure-complexity --use-basename-for-filename --isolate-assertions --worst-amount 100 "%s" > %t.raw
// RUN: %sed 's#\): \d+#): <redacted>#g' %t.raw > %t.raw2
// RUN: %sed 's#are \d+#are <redacted>#g' %t.raw2 > %t
// RUN: %diff "%s.expect" "%t"
method Foo() {
assert Ack(0,0) == 10;
assert Ack(1,3) == 10;
assert Ack(3,3) == 10;
assert Ack(3,4) == 10;
}

function Ack(m: nat, n: nat): nat
{
if m == 0 then
n + 1
else if n == 0 then
Ack(m - 1, 1)
else
Ack(m - 1, Ack(m, n - 1))
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
Starting verification of iteration 1/1 with seed 0
measure-complexity.dfy(6,18): Error: assertion might not hold
The total consumed resources are <redacted>
The most demanding 100 verification tasks consumed these resources:
measure-complexity.dfy(9,18): <redacted>
measure-complexity.dfy(8,18): <redacted>
measure-complexity.dfy(9,15): <redacted>
measure-complexity.dfy(7,18): <redacted>
measure-complexity.dfy(19,4): <redacted>
measure-complexity.dfy(6,18): <redacted>
measure-complexity.dfy(7,15): <redacted>
measure-complexity.dfy(19,10): <redacted>
measure-complexity.dfy(7,13): <redacted>
measure-complexity.dfy(17,4): <redacted>
measure-complexity.dfy(17,15): <redacted>
measure-complexity.dfy(19,15): <redacted>
measure-complexity.dfy(19,24): <redacted>
measure-complexity.dfy(17,10): <redacted>
measure-complexity.dfy(15,6): <redacted>
measure-complexity.dfy(19,4): <redacted>
measure-complexity.dfy(19,4): <redacted>
measure-complexity.dfy(9,13): <redacted>
measure-complexity.dfy(17,4): <redacted>
measure-complexity.dfy(6,13): <redacted>
measure-complexity.dfy(8,15): <redacted>
measure-complexity.dfy(17,4): <redacted>
measure-complexity.dfy(8,13): <redacted>
measure-complexity.dfy(19,15): <redacted>
measure-complexity.dfy(6,15): <redacted>
measure-complexity.dfy(19,15): <redacted>
measure-complexity.dfy(12,9): <redacted>
measure-complexity.dfy(5,7): <redacted>
5 changes: 5 additions & 0 deletions docs/dev/news/5631.feat
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
Let the command `measure-complexity` output which verification tasks performed the worst in terms of resource count. Output looks like:
...
Verification task on line 8 in file measure-complexity.dfy consumed 9984 resources
Verification task on line 7 in file measure-complexity.dfy consumed 9065 resources
...
Loading