diff --git a/src/Sarif.Driver/Sdk/MultithreadedAnalyzeCommandBase.cs b/src/Sarif.Driver/Sdk/MultithreadedAnalyzeCommandBase.cs index 704dc6291..27f3bf690 100644 --- a/src/Sarif.Driver/Sdk/MultithreadedAnalyzeCommandBase.cs +++ b/src/Sarif.Driver/Sdk/MultithreadedAnalyzeCommandBase.cs @@ -8,6 +8,7 @@ using System.Diagnostics; using System.Diagnostics.Tracing; using System.IO; +using System.IO.Compression; using System.Linq; using System.Net; using System.Net.Http; @@ -648,72 +649,106 @@ private async Task EnumerateTargetsAsync(TContext globalContext) return true; } - private async Task EnumerateFilesFromArtifactsProvider(TContext globalContext) + private async Task EnumerateArtifact(IEnumeratedArtifact artifact, TContext globalContext) { - foreach (IEnumeratedArtifact artifact in globalContext.TargetsProvider.Artifacts) + globalContext.CancellationToken.ThrowIfCancellationRequested(); + + string filePath = artifact.Uri.GetFilePath(); + + if (globalContext.CompiledGlobalFileDenyRegex?.Match(filePath).Success == true) { - globalContext.CancellationToken.ThrowIfCancellationRequested(); + _filesMatchingGlobalFileDenyRegex++; + DriverEventSource.Log.ArtifactNotScanned(filePath, DriverEventNames.FilePathDenied, artifact.SizeInBytes.Value, globalContext.GlobalFilePathDenyRegex); - string filePath = artifact.Uri.GetFilePath(); + string reason = $"its file path matched the global file deny regex: {globalContext.GlobalFilePathDenyRegex}"; + Notes.LogFileSkipped(globalContext, filePath, reason); + return false; + } - if (globalContext.CompiledGlobalFileDenyRegex?.Match(filePath).Success == true) - { - _filesMatchingGlobalFileDenyRegex++; - DriverEventSource.Log.ArtifactNotScanned(filePath, DriverEventNames.FilePathDenied, artifact.SizeInBytes.Value, globalContext.GlobalFilePathDenyRegex); + string extension = Path.GetExtension(filePath); + if (string.IsNullOrEmpty(artifact.Uri.Query) && + extension.Equals(".zip", StringComparison.OrdinalIgnoreCase)) + { + var context = new TContext(); + context.Policy = globalContext.Policy; + context.Logger = globalContext.Logger; - string reason = $"its file path matched the global file deny regex: {globalContext.GlobalFilePathDenyRegex}"; - Notes.LogFileSkipped(globalContext, filePath, reason); - continue; - } + var uri = new Uri(filePath, UriKind.RelativeOrAbsolute); + ZipArchive archive = null; - if (artifact.SizeInBytes == 0) + try { - DriverEventSource.Log.ArtifactNotScanned(filePath, DriverEventNames.EmptyFile, 00, data2: null); - Notes.LogEmptyFileSkipped(globalContext, filePath); - continue; + archive = ZipFile.OpenRead(filePath); } - - if (!IsTargetWithinFileSizeLimit(artifact.SizeInBytes.Value, globalContext.MaxFileSizeInKilobytes)) + catch(InvalidDataException) { - _filesExceedingSizeLimitCount++; - DriverEventSource.Log.ArtifactNotScanned(filePath, DriverEventNames.FileExceedsSizeLimits, artifact.SizeInBytes.Value, $"{globalContext.MaxFileSizeInKilobytes}"); - Notes.LogFileExceedingSizeLimitSkipped(globalContext, artifact.Uri.GetFilePath(), artifact.SizeInBytes.Value / 1000); - continue; + // TBD log exception + return false; } - TContext fileContext = CreateScanTargetContext(globalContext); + var artifactProvider = new MultithreadedZipArchiveArtifactProvider(uri, archive, globalContext.FileSystem); + context.TargetsProvider = artifactProvider; + + await EnumerateFilesFromArtifactsProvider(context); + return true; + } - fileContext.Logger = - new CachingLogger(globalContext.FailureLevels, - globalContext.ResultKinds); + filePath = $"{filePath}{artifact.Uri.Query}"; - Debug.Assert(fileContext.Logger != null); - fileContext.CurrentTarget = artifact; - fileContext.CancellationToken = globalContext.CancellationToken; + if (artifact.SizeInBytes == 0) + { + DriverEventSource.Log.ArtifactNotScanned(filePath, DriverEventNames.EmptyFile, 00, data2: null); + Notes.LogEmptyFileSkipped(globalContext, filePath); + return true; + } - lock (globalContext) - { - // We need to generate this event on the global logger, though as - // a result this event means 'target enumerated for analysis' - // rather than literally 'we are analyzing the target'. - // - // This call needs to be protected with a lock as the actual - // logging occurs on a separated thread. - globalContext.Logger.AnalyzingTarget(fileContext); - } + if (!IsTargetWithinFileSizeLimit(artifact.SizeInBytes.Value, globalContext.MaxFileSizeInKilobytes)) + { + _filesExceedingSizeLimitCount++; + DriverEventSource.Log.ArtifactNotScanned(filePath, DriverEventNames.FileExceedsSizeLimits, artifact.SizeInBytes.Value, $"{globalContext.MaxFileSizeInKilobytes}"); + Notes.LogFileExceedingSizeLimitSkipped(globalContext, filePath, artifact.SizeInBytes.Value / 1000); + return false; + } - bool added = _fileContexts.TryAdd(_fileContextsCount, fileContext); - Debug.Assert(added); + TContext fileContext = CreateScanTargetContext(globalContext); - if (_fileContextsCount == 0) - { - DriverEventSource.Log.FirstArtifactQueued(fileContext.CurrentTarget.Uri.GetFilePath()); - } + fileContext.Logger = new CachingLogger(globalContext.FailureLevels, + globalContext.ResultKinds); + + Debug.Assert(fileContext.Logger != null); + fileContext.CurrentTarget = artifact; + fileContext.CancellationToken = globalContext.CancellationToken; - await readyToScanChannel.Writer.WriteAsync(_fileContextsCount++); + lock (globalContext) + { + // We need to generate this event on the global logger, though as + // a result this event means 'target enumerated for analysis' + // rather than literally 'we are analyzing the target'. + // + // This call needs to be protected with a lock as the actual + // logging occurs on a separated thread. + globalContext.Logger.AnalyzingTarget(fileContext); + } + + bool added = _fileContexts.TryAdd(_fileContextsCount, fileContext); + Debug.Assert(added); + + if (_fileContextsCount == 0) + { + DriverEventSource.Log.FirstArtifactQueued(fileContext.CurrentTarget.Uri.GetFilePath()); } - // TBD get all skipped artifacts. + await readyToScanChannel.Writer.WriteAsync(_fileContextsCount++); + + return true; + } + + private async Task EnumerateFilesFromArtifactsProvider(TContext globalContext) + { + foreach (IEnumeratedArtifact artifact in globalContext.TargetsProvider.Artifacts) + { + await EnumerateArtifact(artifact, globalContext); + } return true; } diff --git a/src/Sarif/EnumeratedArtifact.cs b/src/Sarif/EnumeratedArtifact.cs index ca37ce503..37295d66a 100644 --- a/src/Sarif/EnumeratedArtifact.cs +++ b/src/Sarif/EnumeratedArtifact.cs @@ -98,10 +98,12 @@ public byte[] Bytes private void RetrieveDataFromStream() { + /* if (!this.Stream.CanSeek) { this.Stream = new PeekableStream(this.Stream, BinarySniffingHeaderSizeBytes); } + */ byte[] header = new byte[BinarySniffingHeaderSizeBytes]; int readLength = this.Stream.Read(header, 0, header.Length); diff --git a/src/Sarif/MultithreadedZipArchiveArtifactProvider.cs b/src/Sarif/MultithreadedZipArchiveArtifactProvider.cs index 4fcb8d6d7..0bdbd5d55 100644 --- a/src/Sarif/MultithreadedZipArchiveArtifactProvider.cs +++ b/src/Sarif/MultithreadedZipArchiveArtifactProvider.cs @@ -11,6 +11,7 @@ public class MultithreadedZipArchiveArtifactProvider : ArtifactProvider { private readonly ZipArchive zipArchive; private ISet binaryExtensions; + private readonly Uri uri; public ISet BinaryExtensions { @@ -23,9 +24,10 @@ public ISet BinaryExtensions set { this.binaryExtensions = value; } } - public MultithreadedZipArchiveArtifactProvider(ZipArchive zipArchive, IFileSystem fileSystem) : base(fileSystem) + public MultithreadedZipArchiveArtifactProvider(Uri uri, ZipArchive zipArchive, IFileSystem fileSystem) : base(fileSystem) { this.zipArchive = zipArchive; + this.uri = uri; } public ISet CreateDefaultBinaryExtensionsSet() @@ -67,7 +69,8 @@ public override IEnumerable Artifacts { foreach (ZipArchiveEntry entry in this.zipArchive.Entries) { - yield return new ZipArchiveArtifact(this.zipArchive, entry, BinaryExtensions); + if (entry.FullName.EndsWith("/")) { continue; } + yield return new ZipArchiveArtifact(this.uri, this.zipArchive, entry, BinaryExtensions); } } } diff --git a/src/Sarif/SinglethreadedZipArchiveArtifactProvider.cs b/src/Sarif/SinglethreadedZipArchiveArtifactProvider.cs index 922034e91..26d4a7b77 100644 --- a/src/Sarif/SinglethreadedZipArchiveArtifactProvider.cs +++ b/src/Sarif/SinglethreadedZipArchiveArtifactProvider.cs @@ -9,15 +9,17 @@ namespace Microsoft.CodeAnalysis.Sarif { public class SinglethreadedZipArchiveArtifactProvider : ArtifactProvider { - public SinglethreadedZipArchiveArtifactProvider(ZipArchive zipArchive, IFileSystem fileSystem) : base(fileSystem) + public SinglethreadedZipArchiveArtifactProvider(Uri uri, ZipArchive zipArchive, IFileSystem fileSystem) : base(fileSystem) { var artifacts = new List(); foreach (ZipArchiveEntry entry in zipArchive.Entries) { + if (entry.FullName.EndsWith("/")) { continue; } + var artifact = new EnumeratedArtifact(Sarif.FileSystem.Instance) { - Uri = new Uri(entry.FullName, UriKind.RelativeOrAbsolute), + Uri = new Uri($"{uri}?{entry.FullName}"), Stream = entry.Open(), }; diff --git a/src/Sarif/Writers/ConsoleLogger.cs b/src/Sarif/Writers/ConsoleLogger.cs index 3dfb343d3..78c578f30 100644 --- a/src/Sarif/Writers/ConsoleLogger.cs +++ b/src/Sarif/Writers/ConsoleLogger.cs @@ -122,14 +122,13 @@ public void Log(ReportingDescriptor rule, Result result, int? extensionIndex = n WriteLineToConsole(GetMessageText(_toolName, physicalLocation?.ArtifactLocation?.Uri, physicalLocation?.Region, result.RuleId, message, result.Kind, result.Level)); } - public static string GetMessageText( - string toolName, - Uri uri, - Region region, - string ruleId, - string message, - ResultKind kind, - FailureLevel level) + public static string GetMessageText(string toolName, + Uri uri, + Region region, + string ruleId, + string message, + ResultKind kind, + FailureLevel level) { string path = ConstructPathFromUri(uri); @@ -302,7 +301,7 @@ private static string ConstructPathFromUri(Uri uri) } } - return path; + return $"{path}{uri?.Query}"; } } } diff --git a/src/Sarif/ZipArchiveArtifact.cs b/src/Sarif/ZipArchiveArtifact.cs index 208b8eec6..6da875dd1 100644 --- a/src/Sarif/ZipArchiveArtifact.cs +++ b/src/Sarif/ZipArchiveArtifact.cs @@ -9,7 +9,6 @@ namespace Microsoft.CodeAnalysis.Sarif { - public class ZipArchiveArtifact : IEnumeratedArtifact { private readonly ISet binaryExtensions; @@ -19,13 +18,17 @@ public class ZipArchiveArtifact : IEnumeratedArtifact private string contents; private byte[] bytes; - public ZipArchiveArtifact(ZipArchive archive, ZipArchiveEntry entry, ISet binaryExtensions = null) + public ZipArchiveArtifact(Uri uri, + ZipArchive archive, + ZipArchiveEntry entry, + ISet binaryExtensions = null) { + this.uri = uri ?? throw new ArgumentNullException(nameof(uri)); this.entry = entry ?? throw new ArgumentNullException(nameof(entry)); this.archive = archive ?? throw new ArgumentNullException(nameof(archive)); this.binaryExtensions = binaryExtensions ?? new HashSet(); - this.uri = new Uri(entry.FullName, UriKind.RelativeOrAbsolute); + this.uri = new Uri($"{uri}?path={entry.FullName}"); } public Uri Uri => this.uri;