Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Artifacts should only be logged in the SARIF if we have results #2431

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions src/Sarif.Driver/Sdk/MultithreadedAnalyzeCommandBase.cs
Original file line number Diff line number Diff line change
Expand Up @@ -307,6 +307,10 @@ private void LogCachingLogger(TContext rootContext, IAnalysisLogger logger, TCon

if (results?.Count > 0)
{
_run?.GetFileIndex(new ArtifactLocation { Uri = context.TargetUri },
dataToInsert: _dataToInsert,
hashData: _computeHashes ? context.Hashes : null);

foreach (KeyValuePair<ReportingDescriptor, IList<Result>> kv in results)
{
foreach (Result result in kv.Value)
Expand Down Expand Up @@ -485,10 +489,6 @@ private async Task<bool> HashAsync()
_hashToFilesMap[hashData.Sha256] = paths;
}

_run?.GetFileIndex(new ArtifactLocation { Uri = context.TargetUri },
dataToInsert: _dataToInsert,
hashData: hashData);
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

instead of adding artifacts for all files, we are moving this to the place when we have an actual result.

The hash itself will already be in the context, so, no need to calculate it again.


paths.Add(localPath);
context.Hashes = hashData;
}
Expand Down
11 changes: 5 additions & 6 deletions src/Sarif/Core/Run.cs
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,11 @@ public Uri ExpandUrisWithUriBaseId(string key, string currentValue = null)
throw new InvalidOperationException("Author this code along with tests for originalUriBaseIds that are nested");
}

public int GetFileIndex(
ArtifactLocation fileLocation,
bool addToFilesTableIfNotPresent = true,
OptionallyEmittedData dataToInsert = OptionallyEmittedData.None,
Encoding encoding = null,
HashData hashData = null)
public int GetFileIndex(ArtifactLocation fileLocation,
bool addToFilesTableIfNotPresent = true,
OptionallyEmittedData dataToInsert = OptionallyEmittedData.None,
Encoding encoding = null,
HashData hashData = null)
{
if (fileLocation == null) { throw new ArgumentNullException(nameof(fileLocation)); }

Expand Down
56 changes: 9 additions & 47 deletions src/Sarif/Writers/SarifLogger.cs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ public class SarifLogger : BaseLogger, IDisposable, IAnalysisLogger
{
private readonly Run _run;
private readonly TextWriter _textWriter;
private readonly bool _persistArtifacts;
private readonly bool _closeWriterOnDispose;
private readonly LogFilePersistenceOptions _logFilePersistenceOptions;
private readonly JsonTextWriter _jsonTextWriter;
Expand Down Expand Up @@ -98,8 +97,7 @@ public SarifLogger(
dataToRemove,
invocationTokensToRedact,
invocationPropertiesToLog,
defaultFileEncoding,
AnalysisTargetToHashDataMap);
defaultFileEncoding);

tool = tool ?? Tool.CreateFromAssemblyData();

Expand All @@ -116,11 +114,6 @@ public SarifLogger(
RuleToIndexMap[_run.Tool.Driver.Rules[i]] = i;
}
}

_persistArtifacts =
(_dataToInsert & OptionallyEmittedData.Hashes) != 0 ||
(_dataToInsert & OptionallyEmittedData.TextFiles) != 0 ||
(_dataToInsert & OptionallyEmittedData.BinaryFiles) != 0;
}

private SarifLogger(
Expand Down Expand Up @@ -155,52 +148,17 @@ private void EnhanceRun(
OptionallyEmittedData dataToRemove,
IEnumerable<string> invocationTokensToRedact,
IEnumerable<string> invocationPropertiesToLog,
string defaultFileEncoding = null,
IDictionary<string, HashData> filePathToHashDataMap = null)
string defaultFileEncoding = null)
{
_run.Invocations ??= new List<Invocation>();
if (defaultFileEncoding != null)
{
_run.DefaultEncoding = defaultFileEncoding;
}

Encoding encoding = SarifUtilities.GetEncodingFromName(_run.DefaultEncoding);

if (analysisTargets != null)
{
_run.Artifacts ??= new List<Artifact>();

foreach (string target in analysisTargets)
{
Uri uri = new Uri(UriHelper.MakeValidUri(target), UriKind.RelativeOrAbsolute);

HashData hashData = null;
if (dataToInsert.HasFlag(OptionallyEmittedData.Hashes))
{
filePathToHashDataMap?.TryGetValue(target, out hashData);
}

var artifact = Artifact.Create(
new Uri(target, UriKind.RelativeOrAbsolute),
dataToInsert,
encoding,
hashData: hashData);

var fileLocation = new ArtifactLocation
{
Uri = uri
};

artifact.Location = fileLocation;

// This call will insert the file object into run.Files if not already present
artifact.Location.Index = _run.GetFileIndex(
artifact.Location,
addToFilesTableIfNotPresent: true,
dataToInsert: dataToInsert,
encoding: encoding,
hashData: hashData);
}
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This code path is used by the single threaded analysis. When hashes is enabled, it was going to generate all hashes and store everything in the SARIF.

The issue is that if we analyze 1k files and we do not produce any results, we would still generate the artifacts.

As an optimization, we will only store the artifact if we have a result.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Reviewed all versions that we released and I saw that all versions after 1.4.2 are emitting the artifacts IF the list of analysisTargets is not empty.

Copy link
Collaborator Author

@eddynaka eddynaka Jan 28, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looking at the other variables, looks like we tried to improve this behavior by adding the _persistArtifacts, but that is occurring too late (after we already added the artifact to the run)

}

var invocation = Invocation.Create(
Expand Down Expand Up @@ -447,12 +405,16 @@ private void CaptureArtifact(ArtifactLocation fileLocation)
catch (ArgumentException) { } // Unrecognized encoding name
}

HashData hashData = null;
AnalysisTargetToHashDataMap?.TryGetValue(fileLocation.Uri.OriginalString, out hashData);
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

AnalysisTargetToHashDataMap?.TryGetValue(fileLocation.Uri.OriginalString, out hashData);

For the single threaded analysis, the AnalysisTargetToHashDataMap will have the hashes and here, we will just use the information that we already have and store in the artifacts.

The CaptureArtifact method is only used when we are logging the artifacts of a result.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If hashes isn't enabled, analysisTargetToHashDataMap will be null, so we are using the null check to guarantee and prevent a null reference exception.


// Ensure Artifact is in Run.Artifacts and ArtifactLocation.Index is set to point to it
int index = _run.GetFileIndex(
fileLocation,
addToFilesTableIfNotPresent: _persistArtifacts,
_dataToInsert,
encoding);
addToFilesTableIfNotPresent: true,
dataToInsert: _dataToInsert,
encoding,
hashData);

// Remove redundant Uri and UriBaseId once index has been set
if (index > -1 && this.Optimize)
Expand Down
132 changes: 74 additions & 58 deletions src/Test.UnitTests.Sarif.Driver/Sdk/AnalyzeCommandBaseTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -1258,21 +1258,81 @@ private void AnalyzeScenarios(int[] scenarios)
}

[Fact]
public void AnalyzeCommandBase_MultithreadedShouldUseCacheIfFilesAreTheSame()
public void AnalyzeCommandBase_Multithreaded_ShouldOnlyLogArtifactsWhenResultsAreFound()
{
// Generating 20 files with different names but same content.
RunMultithreadedAnalyzeCommand(ComprehensiveKindAndLevelsByFilePath,
generateSameOutput: true,
expectedResultCode: 0,
expectedResultCount: 20,
expectedCacheSize: 1);

// Generating 20 files with different names and content.
RunMultithreadedAnalyzeCommand(ComprehensiveKindAndLevelsByFilePath,
generateSameOutput: false,
expectedResultCode: 0,
expectedResultCount: 7,
expectedCacheSize: 0);
const int expectedNumberOfArtifacts = 2;
const int expectedNumberOfResultsWithErrors = 1;
const int expectedNumberOfResultsWithWarnings = 1;
var files = new List<string>
{
$@"{Environment.CurrentDirectory}\Error.dll",
$@"{Environment.CurrentDirectory}\Warning.dll",
$@"{Environment.CurrentDirectory}\Note.dll",
$@"{Environment.CurrentDirectory}\Pass.dll",
$@"{Environment.CurrentDirectory}\NotApplicable.exe",
$@"{Environment.CurrentDirectory}\Informational.sys",
$@"{Environment.CurrentDirectory}\Open.cab",
$@"{Environment.CurrentDirectory}\Review.dll",
$@"{Environment.CurrentDirectory}\NoIssues.dll",
};

var testCase = new ResultsCachingTestCase
{
Files = files,
PersistLogFileToDisk = true,
FileSystem = CreateDefaultFileSystemForResultsCaching(files, generateSameInput: false)
};

var options = new TestAnalyzeOptions
{
TestRuleBehaviors = testCase.TestRuleBehaviors,
OutputFilePath = testCase.PersistLogFileToDisk ? Guid.NewGuid().ToString() : null,
TargetFileSpecifiers = new string[] { Guid.NewGuid().ToString() },
Kind = new List<ResultKind> { ResultKind.Fail },
Level = new List<FailureLevel> { FailureLevel.Warning, FailureLevel.Error },
DataToInsert = new OptionallyEmittedData[] { OptionallyEmittedData.Hashes },
};

Run run = RunAnalyzeCommand(options, testCase, multithreaded: true);

// Hashes is enabled and we should expect to see two artifacts because we have:
// one result with Error level and one result with Warning level.
run.Artifacts.Should().HaveCount(expectedNumberOfArtifacts);
run.Results.Count(r => r.Level == FailureLevel.Error).Should().Be(expectedNumberOfResultsWithErrors);
run.Results.Count(r => r.Level == FailureLevel.Warning).Should().Be(expectedNumberOfResultsWithWarnings);
}

[Fact]
public void AnalyzeCommandBase_SingleThreaded_ShouldOnlyLogArtifactsWhenResultsAreFound()
{
const int expectedNumberOfResultsWithErrors = 5;
const int expectedNumberOfResultsWithWarnings = 2;
int totalNumber = expectedNumberOfResultsWithErrors + expectedNumberOfResultsWithWarnings;

var testCase = new ResultsCachingTestCase
{
Files = ComprehensiveKindAndLevelsByFileName,
PersistLogFileToDisk = true,
FileSystem = null
};

var options = new TestAnalyzeOptions
{
TestRuleBehaviors = testCase.TestRuleBehaviors,
OutputFilePath = testCase.PersistLogFileToDisk ? Guid.NewGuid().ToString() : null,
TargetFileSpecifiers = new string[] { Guid.NewGuid().ToString() },
Kind = new List<ResultKind> { ResultKind.Fail },
Level = new List<FailureLevel> { FailureLevel.Warning, FailureLevel.Error },
DataToInsert = new OptionallyEmittedData[] { OptionallyEmittedData.Hashes },
};

Run runSingleThread = RunAnalyzeCommand(options, testCase, multithreaded: false);

// Hashes is enabled and we should expect to see seven artifacts because we have seven distinct results
// of which five are error and two are warnings.
runSingleThread.Artifacts.Should().NotBeEmpty();
runSingleThread.Results.Should().HaveCount(totalNumber);
runSingleThread.Artifacts.Should().HaveCount(totalNumber);
}

private static readonly IList<string> ComprehensiveKindAndLevelsByFileName = new List<string>
Expand Down Expand Up @@ -1513,50 +1573,6 @@ private static SarifLog RunAnalyzeCommand(TestAnalyzeOptions options,
: JsonConvert.DeserializeObject<SarifLog>(File.ReadAllText(options.OutputFilePath));
}

private static void RunMultithreadedAnalyzeCommand(IList<string> files,
bool generateSameOutput,
int expectedResultCode,
int expectedResultCount,
int expectedCacheSize)
{
var testCase = new ResultsCachingTestCase
{
Files = files,
PersistLogFileToDisk = true,
FileSystem = CreateDefaultFileSystemForResultsCaching(files, generateSameOutput)
};

var options = new TestAnalyzeOptions
{
OutputFilePath = Guid.NewGuid().ToString(),
TargetFileSpecifiers = new string[] { Guid.NewGuid().ToString() },
Kind = new List<ResultKind> { ResultKind.Fail },
Level = new List<FailureLevel> { FailureLevel.Warning, FailureLevel.Error },
DataToInsert = new OptionallyEmittedData[] { OptionallyEmittedData.Hashes }
};

try
{
TestRule.s_testRuleBehaviors = testCase.TestRuleBehaviors.AccessibleOutsideOfContextOnly();

var command = new TestMultithreadedAnalyzeCommand(testCase.FileSystem)
{
DefaultPluginAssemblies = new Assembly[] { typeof(AnalyzeCommandBaseTests).Assembly }
};

HashUtilities.FileSystem = testCase.FileSystem;
command.Run(options).Should().Be(expectedResultCode);

SarifLog sarifLog = JsonConvert.DeserializeObject<SarifLog>(File.ReadAllText(options.OutputFilePath));
sarifLog.Runs[0].Results.Count.Should().Be(expectedResultCount);
command._analysisLoggerCache.Count.Should().Be(expectedCacheSize);
}
finally
{
TestRule.s_testRuleBehaviors = TestRuleBehaviors.None;
}
}

private static SarifLog ConvertConsoleOutputToSarifLog(string consoleOutput)
{
var sb = new StringBuilder();
Expand Down
Loading