Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Zip work #2838

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
129 changes: 82 additions & 47 deletions src/Sarif.Driver/Sdk/MultithreadedAnalyzeCommandBase.cs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
using System.Diagnostics;
using System.Diagnostics.Tracing;
using System.IO;
using System.IO.Compression;
using System.Linq;
using System.Net;
using System.Net.Http;
Expand Down Expand Up @@ -648,72 +649,106 @@ private async Task<bool> EnumerateTargetsAsync(TContext globalContext)
return true;
}

private async Task<bool> EnumerateFilesFromArtifactsProvider(TContext globalContext)
private async Task<bool> EnumerateArtifact(IEnumeratedArtifact artifact, TContext globalContext)
{
foreach (IEnumeratedArtifact artifact in globalContext.TargetsProvider.Artifacts)
globalContext.CancellationToken.ThrowIfCancellationRequested();

string filePath = artifact.Uri.GetFilePath();

if (globalContext.CompiledGlobalFileDenyRegex?.Match(filePath).Success == true)
{
globalContext.CancellationToken.ThrowIfCancellationRequested();
_filesMatchingGlobalFileDenyRegex++;
DriverEventSource.Log.ArtifactNotScanned(filePath, DriverEventNames.FilePathDenied, artifact.SizeInBytes.Value, globalContext.GlobalFilePathDenyRegex);

string filePath = artifact.Uri.GetFilePath();
string reason = $"its file path matched the global file deny regex: {globalContext.GlobalFilePathDenyRegex}";
Notes.LogFileSkipped(globalContext, filePath, reason);
return false;
}

if (globalContext.CompiledGlobalFileDenyRegex?.Match(filePath).Success == true)
{
_filesMatchingGlobalFileDenyRegex++;
DriverEventSource.Log.ArtifactNotScanned(filePath, DriverEventNames.FilePathDenied, artifact.SizeInBytes.Value, globalContext.GlobalFilePathDenyRegex);
string extension = Path.GetExtension(filePath);
if (string.IsNullOrEmpty(artifact.Uri.Query) &&
extension.Equals(".zip", StringComparison.OrdinalIgnoreCase))
{
var context = new TContext();
context.Policy = globalContext.Policy;
context.Logger = globalContext.Logger;

string reason = $"its file path matched the global file deny regex: {globalContext.GlobalFilePathDenyRegex}";
Notes.LogFileSkipped(globalContext, filePath, reason);
continue;
}
var uri = new Uri(filePath, UriKind.RelativeOrAbsolute);
ZipArchive archive = null;

if (artifact.SizeInBytes == 0)
try
{
DriverEventSource.Log.ArtifactNotScanned(filePath, DriverEventNames.EmptyFile, 00, data2: null);
Notes.LogEmptyFileSkipped(globalContext, filePath);
continue;
archive = ZipFile.OpenRead(filePath);
}

if (!IsTargetWithinFileSizeLimit(artifact.SizeInBytes.Value, globalContext.MaxFileSizeInKilobytes))
catch(InvalidDataException)
{
_filesExceedingSizeLimitCount++;
DriverEventSource.Log.ArtifactNotScanned(filePath, DriverEventNames.FileExceedsSizeLimits, artifact.SizeInBytes.Value, $"{globalContext.MaxFileSizeInKilobytes}");
Notes.LogFileExceedingSizeLimitSkipped(globalContext, artifact.Uri.GetFilePath(), artifact.SizeInBytes.Value / 1000);
continue;
// TBD log exception
return false;
}

TContext fileContext = CreateScanTargetContext(globalContext);
var artifactProvider = new MultithreadedZipArchiveArtifactProvider(uri, archive, globalContext.FileSystem);
context.TargetsProvider = artifactProvider;

await EnumerateFilesFromArtifactsProvider(context);
return true;
}

fileContext.Logger =
new CachingLogger(globalContext.FailureLevels,
globalContext.ResultKinds);
filePath = $"{filePath}{artifact.Uri.Query}";

Debug.Assert(fileContext.Logger != null);
fileContext.CurrentTarget = artifact;
fileContext.CancellationToken = globalContext.CancellationToken;
if (artifact.SizeInBytes == 0)
{
DriverEventSource.Log.ArtifactNotScanned(filePath, DriverEventNames.EmptyFile, 00, data2: null);
Notes.LogEmptyFileSkipped(globalContext, filePath);
return true;
}

lock (globalContext)
{
// We need to generate this event on the global logger, though as
// a result this event means 'target enumerated for analysis'
// rather than literally 'we are analyzing the target'.
//
// This call needs to be protected with a lock as the actual
// logging occurs on a separated thread.
globalContext.Logger.AnalyzingTarget(fileContext);
}
if (!IsTargetWithinFileSizeLimit(artifact.SizeInBytes.Value, globalContext.MaxFileSizeInKilobytes))
{
_filesExceedingSizeLimitCount++;
DriverEventSource.Log.ArtifactNotScanned(filePath, DriverEventNames.FileExceedsSizeLimits, artifact.SizeInBytes.Value, $"{globalContext.MaxFileSizeInKilobytes}");
Notes.LogFileExceedingSizeLimitSkipped(globalContext, filePath, artifact.SizeInBytes.Value / 1000);
return false;
}

bool added = _fileContexts.TryAdd(_fileContextsCount, fileContext);
Debug.Assert(added);
TContext fileContext = CreateScanTargetContext(globalContext);

if (_fileContextsCount == 0)
{
DriverEventSource.Log.FirstArtifactQueued(fileContext.CurrentTarget.Uri.GetFilePath());
}
fileContext.Logger = new CachingLogger(globalContext.FailureLevels,
globalContext.ResultKinds);

Debug.Assert(fileContext.Logger != null);
fileContext.CurrentTarget = artifact;
fileContext.CancellationToken = globalContext.CancellationToken;

await readyToScanChannel.Writer.WriteAsync(_fileContextsCount++);
lock (globalContext)
{
// We need to generate this event on the global logger, though as
// a result this event means 'target enumerated for analysis'
// rather than literally 'we are analyzing the target'.
//
// This call needs to be protected with a lock as the actual
// logging occurs on a separated thread.
globalContext.Logger.AnalyzingTarget(fileContext);
}

bool added = _fileContexts.TryAdd(_fileContextsCount, fileContext);
Debug.Assert(added);

if (_fileContextsCount == 0)
{
DriverEventSource.Log.FirstArtifactQueued(fileContext.CurrentTarget.Uri.GetFilePath());
}

// TBD get all skipped artifacts.
await readyToScanChannel.Writer.WriteAsync(_fileContextsCount++);

return true;
}

private async Task<bool> EnumerateFilesFromArtifactsProvider(TContext globalContext)
{
foreach (IEnumeratedArtifact artifact in globalContext.TargetsProvider.Artifacts)
{
await EnumerateArtifact(artifact, globalContext);
}

return true;
}
Expand Down
2 changes: 2 additions & 0 deletions src/Sarif/EnumeratedArtifact.cs
Original file line number Diff line number Diff line change
Expand Up @@ -98,10 +98,12 @@ public byte[] Bytes

private void RetrieveDataFromStream()
{
/*
if (!this.Stream.CanSeek)
{
this.Stream = new PeekableStream(this.Stream, BinarySniffingHeaderSizeBytes);
}
*/

byte[] header = new byte[BinarySniffingHeaderSizeBytes];
int readLength = this.Stream.Read(header, 0, header.Length);
Expand Down
7 changes: 5 additions & 2 deletions src/Sarif/MultithreadedZipArchiveArtifactProvider.cs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ public class MultithreadedZipArchiveArtifactProvider : ArtifactProvider
{
private readonly ZipArchive zipArchive;
private ISet<string> binaryExtensions;
private readonly Uri uri;

public ISet<string> BinaryExtensions
{
Expand All @@ -23,9 +24,10 @@ public ISet<string> BinaryExtensions
set { this.binaryExtensions = value; }
}

public MultithreadedZipArchiveArtifactProvider(ZipArchive zipArchive, IFileSystem fileSystem) : base(fileSystem)
public MultithreadedZipArchiveArtifactProvider(Uri uri, ZipArchive zipArchive, IFileSystem fileSystem) : base(fileSystem)
{
this.zipArchive = zipArchive;
this.uri = uri;
}

public ISet<string> CreateDefaultBinaryExtensionsSet()
Expand Down Expand Up @@ -67,7 +69,8 @@ public override IEnumerable<IEnumeratedArtifact> Artifacts
{
foreach (ZipArchiveEntry entry in this.zipArchive.Entries)
{
yield return new ZipArchiveArtifact(this.zipArchive, entry, BinaryExtensions);
if (entry.FullName.EndsWith("/")) { continue; }
yield return new ZipArchiveArtifact(this.uri, this.zipArchive, entry, BinaryExtensions);
}
}
}
Expand Down
6 changes: 4 additions & 2 deletions src/Sarif/SinglethreadedZipArchiveArtifactProvider.cs
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,17 @@ namespace Microsoft.CodeAnalysis.Sarif
{
public class SinglethreadedZipArchiveArtifactProvider : ArtifactProvider
{
public SinglethreadedZipArchiveArtifactProvider(ZipArchive zipArchive, IFileSystem fileSystem) : base(fileSystem)
public SinglethreadedZipArchiveArtifactProvider(Uri uri, ZipArchive zipArchive, IFileSystem fileSystem) : base(fileSystem)
{
var artifacts = new List<IEnumeratedArtifact>();

foreach (ZipArchiveEntry entry in zipArchive.Entries)
{
if (entry.FullName.EndsWith("/")) { continue; }

var artifact = new EnumeratedArtifact(Sarif.FileSystem.Instance)
{
Uri = new Uri(entry.FullName, UriKind.RelativeOrAbsolute),
Uri = new Uri($"{uri}?{entry.FullName}"),
Stream = entry.Open(),
};

Expand Down
17 changes: 8 additions & 9 deletions src/Sarif/Writers/ConsoleLogger.cs
Original file line number Diff line number Diff line change
Expand Up @@ -122,14 +122,13 @@ public void Log(ReportingDescriptor rule, Result result, int? extensionIndex = n
WriteLineToConsole(GetMessageText(_toolName, physicalLocation?.ArtifactLocation?.Uri, physicalLocation?.Region, result.RuleId, message, result.Kind, result.Level));
}

public static string GetMessageText(
string toolName,
Uri uri,
Region region,
string ruleId,
string message,
ResultKind kind,
FailureLevel level)
public static string GetMessageText(string toolName,
Uri uri,
Region region,
string ruleId,
string message,
ResultKind kind,
FailureLevel level)
{
string path = ConstructPathFromUri(uri);

Expand Down Expand Up @@ -302,7 +301,7 @@ private static string ConstructPathFromUri(Uri uri)
}
}

return path;
return $"{path}{uri?.Query}";
}
}
}
9 changes: 6 additions & 3 deletions src/Sarif/ZipArchiveArtifact.cs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@

namespace Microsoft.CodeAnalysis.Sarif
{

public class ZipArchiveArtifact : IEnumeratedArtifact
{
private readonly ISet<string> binaryExtensions;
Expand All @@ -19,13 +18,17 @@ public class ZipArchiveArtifact : IEnumeratedArtifact
private string contents;
private byte[] bytes;

public ZipArchiveArtifact(ZipArchive archive, ZipArchiveEntry entry, ISet<string> binaryExtensions = null)
public ZipArchiveArtifact(Uri uri,
ZipArchive archive,
ZipArchiveEntry entry,
ISet<string> binaryExtensions = null)
{
this.uri = uri ?? throw new ArgumentNullException(nameof(uri));
this.entry = entry ?? throw new ArgumentNullException(nameof(entry));
this.archive = archive ?? throw new ArgumentNullException(nameof(archive));

this.binaryExtensions = binaryExtensions ?? new HashSet<string>();
this.uri = new Uri(entry.FullName, UriKind.RelativeOrAbsolute);
this.uri = new Uri($"{uri}?path={entry.FullName}");
}

public Uri Uri => this.uri;
Expand Down
Loading