From 0177c03340f1857ce6db4cfb2918bd68a9fbefe7 Mon Sep 17 00:00:00 2001 From: Eddy Nakamura Date: Mon, 14 Feb 2022 12:45:03 -0800 Subject: [PATCH] Fix `InvalidOperationException` when `--hashes` is enabled and using `MultithreadedAnalyzeCommandBase`. (#2447) * Simplifying multithreaded analysis when hashes are the same * Test ideas * Removing unused field and fixing test * Fixing all tests based on testrule errorcount, improving hashutilities to use FileSystem * Addressing PR feedback * Reducing to 50 iterations * Updating release history and updating style * Addressing PR feedback Co-authored-by: Michael C. Fanning --- src/ReleaseHistory.md | 1 + .../Sdk/MultithreadedAnalyzeCommandBase.cs | 34 +++------ src/Sarif.Driver/Sdk/PluginDriverCommand.cs | 3 +- src/Sarif/HashUtilities.cs | 6 +- .../Sdk/AnalyzeCommandBaseTests.cs | 71 +++++++++++++++++-- src/Test.UnitTests.Sarif.Driver/TestRule.cs | 31 +++++--- 6 files changed, 106 insertions(+), 40 deletions(-) diff --git a/src/ReleaseHistory.md b/src/ReleaseHistory.md index 3441bbe16..170ff397a 100644 --- a/src/ReleaseHistory.md +++ b/src/ReleaseHistory.md @@ -7,6 +7,7 @@ * BREAKING: Fix `InvalidOperationException` when using PropertiesDictionary in a multithreaded application, and remove `[Serializable]` from it. Now use of BinaryFormatter on it will result in `SerializationException`: Type `PropertiesDictionary` is not marked as serializable. [#2415](https://github.com/microsoft/sarif-sdk/pull/2415) * BREAKING: `SarifLogger` now emits an artifacts table entry if `artifactLocation` is not null for tool configuration and tool execution notifications. [#2437](https://github.com/microsoft/sarif-sdk/pull/2437) * BUGFIX: Fix `ArgumentException` when `--recurse` is enabled and two file target specifiers generates the same file path. [#2438](https://github.com/microsoft/sarif-sdk/pull/2438) +* BUGFIX: Fix 'InvalidOperationException' with message `Collection was modified; enumeration operation may not execute` in `MultithreadedAnalyzeCommandBase`, which is raised when analyzing with the `--hashes` switch. [#2447](https://github.com/microsoft/sarif-sdk/pull/2447) ## **v2.4.12** [Sdk](https://www.nuget.org/packages/Sarif.Sdk/2.4.12) | [Driver](https://www.nuget.org/packages/Sarif.Driver/2.4.12) | [Converters](https://www.nuget.org/packages/Sarif.Converters/2.4.12) | [Multitool](https://www.nuget.org/packages/Sarif.Multitool/2.4.12) | [Multitool Library](https://www.nuget.org/packages/Sarif.Multitool.Library/2.4.12) diff --git a/src/Sarif.Driver/Sdk/MultithreadedAnalyzeCommandBase.cs b/src/Sarif.Driver/Sdk/MultithreadedAnalyzeCommandBase.cs index 5777209ad..c0bbbca93 100644 --- a/src/Sarif.Driver/Sdk/MultithreadedAnalyzeCommandBase.cs +++ b/src/Sarif.Driver/Sdk/MultithreadedAnalyzeCommandBase.cs @@ -39,7 +39,6 @@ public abstract class MultithreadedAnalyzeCommandBase : Plug private OptionallyEmittedData _dataToInsert; private Channel _resultsWritingChannel; private Channel _fileEnumerationChannel; - private Dictionary> _hashToFilesMap; private IDictionary _pathToHashDataMap; private ConcurrentDictionary _fileContexts; @@ -186,9 +185,9 @@ private void MultithreadedAnalyzeTargets(TOptions options, IEnumerable> skimmers, ISet disabledSkimmers) { - options.Threads = options.Threads > 0 ? - options.Threads : - (Debugger.IsAttached) ? 1 : Environment.ProcessorCount; + options.Threads = options.Threads > 0 + ? options.Threads + : (Debugger.IsAttached) ? 1 : Environment.ProcessorCount; var channelOptions = new BoundedChannelOptions(2000) { @@ -470,23 +469,11 @@ private async Task HashAsync() { if (_computeHashes) { - if (_hashToFilesMap == null) - { - _hashToFilesMap = new Dictionary>(); - } - TContext context = _fileContexts[index]; string localPath = context.TargetUri.LocalPath; - HashData hashData = HashUtilities.ComputeHashes(localPath); - - if (!_hashToFilesMap.TryGetValue(hashData.Sha256, out List paths)) - { - paths = new List(); - _hashToFilesMap[hashData.Sha256] = paths; - } + HashData hashData = HashUtilities.ComputeHashes(localPath, FileSystem); - paths.Add(localPath); context.Hashes = hashData; if (_pathToHashDataMap != null && !_pathToHashDataMap.ContainsKey(localPath)) @@ -592,7 +579,7 @@ protected virtual TContext CreateContext(TOptions options, if (filePath != null) { - context.TargetUri = new Uri(filePath); + context.TargetUri = new Uri(filePath, UriKind.RelativeOrAbsolute); } return context; @@ -873,11 +860,9 @@ protected virtual TContext DetermineApplicabilityAndAnalyze( IAnalysisLogger logger = context.Logger; - int numberOfFiles = 1; if (_computeHashes) { - numberOfFiles = _hashToFilesMap[context.Hashes.Sha256].Count; - if (numberOfFiles > 1 && _analysisLoggerCache.ContainsKey(context.Hashes.Sha256)) + if (_analysisLoggerCache.ContainsKey(context.Hashes.Sha256)) { return context; } @@ -885,9 +870,12 @@ protected virtual TContext DetermineApplicabilityAndAnalyze( context.Logger.AnalyzingTarget(context); - if (_computeHashes && numberOfFiles > 1) + if (_computeHashes) { - _analysisLoggerCache[context.Hashes.Sha256] = logger; + if (!_analysisLoggerCache.TryAdd(context.Hashes.Sha256, logger)) + { + return context; + } } IEnumerable> applicableSkimmers = DetermineApplicabilityForTarget(context, skimmers, disabledSkimmers); diff --git a/src/Sarif.Driver/Sdk/PluginDriverCommand.cs b/src/Sarif.Driver/Sdk/PluginDriverCommand.cs index a664552bd..5b8122111 100644 --- a/src/Sarif.Driver/Sdk/PluginDriverCommand.cs +++ b/src/Sarif.Driver/Sdk/PluginDriverCommand.cs @@ -112,7 +112,8 @@ internal bool ValidateOutputFileCanBeCreated(IAnalysisContext context, string ou { bool succeeded = true; - if (!DriverUtilities.CanCreateOutputFile(outputFilePath, force, FileSystem)) + if (!string.IsNullOrWhiteSpace(outputFilePath) && + !DriverUtilities.CanCreateOutputFile(outputFilePath, force, FileSystem)) { Errors.LogOutputFileAlreadyExists(context, outputFilePath); succeeded = false; diff --git a/src/Sarif/HashUtilities.cs b/src/Sarif/HashUtilities.cs index 325a50e66..3e8ca9434 100644 --- a/src/Sarif/HashUtilities.cs +++ b/src/Sarif/HashUtilities.cs @@ -69,11 +69,13 @@ public static IDictionary MultithreadedComputeTargetFileHashes [SuppressMessage("Microsoft.Security.Cryptography", "CA5354:SHA1CannotBeUsed")] [SuppressMessage("Microsoft.Security.Cryptography", "CA5350:MD5CannotBeUsed")] - public static HashData ComputeHashes(string fileName) + public static HashData ComputeHashes(string fileName, IFileSystem fileSystem = null) { + fileSystem ??= FileSystem; + try { - using (Stream stream = FileSystem.FileOpenRead(fileName)) + using (Stream stream = fileSystem.FileOpenRead(fileName)) { using (var bufferedStream = new BufferedStream(stream, 1024 * 32)) { diff --git a/src/Test.UnitTests.Sarif.Driver/Sdk/AnalyzeCommandBaseTests.cs b/src/Test.UnitTests.Sarif.Driver/Sdk/AnalyzeCommandBaseTests.cs index 75051c069..8e5d01ba7 100644 --- a/src/Test.UnitTests.Sarif.Driver/Sdk/AnalyzeCommandBaseTests.cs +++ b/src/Test.UnitTests.Sarif.Driver/Sdk/AnalyzeCommandBaseTests.cs @@ -661,7 +661,7 @@ public void AnalyzeCommandBase_DefaultEndToEndAnalysis() // As configured by injected TestRuleBehaviors, we should // see an error per scan target (one file in this case). - resultCount.Should().Be(1); + resultCount.Should().Be((int)TestRule.ErrorsCount.DefaultValue()); run.Results[0].Kind.Should().Be(ResultKind.Fail); toolNotificationCount.Should().Be(0); @@ -676,6 +676,65 @@ public void AnalyzeCommandBase_EndToEndAnalysisWithPostUri() PostUriTestHelper(@"https://host.does.not.exist", TestAnalyzeCommand.FAILURE, RuntimeConditions.ExceptionPostingLogFile); } + [Fact] + public void MultithreadedAnalyzeCommandBase_EndToEndMultithreadedAnalysis() + { + string specifier = "*.xyz"; + + int filesCount = 10; + var files = new List(); + for (int i = 0; i < filesCount; i++) + { + files.Add(Path.GetFullPath($@".\File{i}.txt")); + } + + var propertiesDictionary = new PropertiesDictionary(); + propertiesDictionary.SetProperty(TestRule.ErrorsCount, (uint)15); + propertiesDictionary.SetProperty(TestRule.Behaviors, TestRuleBehaviors.LogError); + + using var tempFile = new TempFile(".xml"); + propertiesDictionary.SaveToXml(tempFile.Name); + + var mockStream = new Mock(); + mockStream.Setup(m => m.CanRead).Returns(true); + mockStream.Setup(m => m.CanSeek).Returns(true); + mockStream.Setup(m => m.ReadByte()).Returns('a'); + + var mockFileSystem = new Mock(); + mockFileSystem.Setup(x => x.DirectoryExists(It.IsAny())).Returns(true); + mockFileSystem.Setup(x => x.DirectoryGetFiles(It.IsAny(), specifier)).Returns(files); + mockFileSystem.Setup(x => x.FileExists(It.Is(s => s.EndsWith(specifier)))).Returns(true); + mockFileSystem.Setup(x => x.DirectoryEnumerateFiles(It.IsAny(), + It.IsAny(), + It.IsAny())).Returns(files); + mockFileSystem.Setup(x => x.FileOpenRead(It.IsAny())).Returns(mockStream.Object); + mockFileSystem.Setup(x => x.FileExists(tempFile.Name)).Returns(true); + + Output.WriteLine($"The seed that will be used is: {TestRule.s_seed}"); + + for (int i = 0; i < 50; i++) + { + var options = new TestAnalyzeOptions + { + Threads = 10, + TargetFileSpecifiers = new[] { specifier }, + SarifOutputVersion = SarifVersion.Current, + TestRuleBehaviors = TestRuleBehaviors.LogError, + DataToInsert = new[] { OptionallyEmittedData.Hashes }, + ConfigurationFilePath = tempFile.Name + }; + + var command = new TestMultithreadedAnalyzeCommand(mockFileSystem.Object); + command.DefaultPluginAssemblies = new Assembly[] { this.GetType().Assembly }; + + int result = command.Run(options); + + command.ExecutionException?.InnerException.Should().BeNull(); + + result.Should().Be(CommandBase.SUCCESS, $"Iteration: {i}, Seed: {TestRule.s_seed}"); + } + } + [Fact] public void AnalyzeCommandBase_PersistsSarifOneZeroZero() { @@ -739,7 +798,7 @@ public void AnalyzeCommandBase_RunDefaultRules() // As configured by the inject TestRuleBehaviors value, we should see // an error for every scan target (of which there is one file in this test). - resultCount.Should().Be(1); + resultCount.Should().Be((int)TestRule.ErrorsCount.DefaultValue()); run.Results[0].Level.Should().Be(FailureLevel.Error); toolNotificationCount.Should().Be(0); @@ -774,8 +833,8 @@ public void AnalyzeCommandBase_FireAllRules() (configurationNotification) => { configurationNotificationCount++; }); // As configured by context, we should see a single error raised. - resultCount.Should().Be(1); - run.Results.Count((result) => result.Level == FailureLevel.Error).Should().Be(1); + resultCount.Should().Be((int)TestRule.ErrorsCount.DefaultValue()); + run.Results.Count((result) => result.Level == FailureLevel.Error).Should().Be((int)TestRule.ErrorsCount.DefaultValue()); toolNotificationCount.Should().Be(0); configurationNotificationCount.Should().Be(0); @@ -1370,7 +1429,7 @@ public void AnalyzeCommandBase_MultithreadedShouldUseCacheIfFilesAreTheSame() generateSameOutput: false, expectedResultCode: 0, expectedResultCount: 7, - expectedCacheSize: 0); + expectedCacheSize: 20); } private static readonly IList ComprehensiveKindAndLevelsByFileName = new List @@ -1777,7 +1836,7 @@ private void PostUriTestHelper(string postUri, int expectedReturnCode, RuntimeCo // As configured by injected TestRuleBehaviors, we should // see an error per scan target (one file in this case). - resultCount.Should().Be(1); + resultCount.Should().Be((int)TestRule.ErrorsCount.DefaultValue()); run.Results[0].Kind.Should().Be(ResultKind.Fail); toolNotificationCount.Should().Be(0); diff --git a/src/Test.UnitTests.Sarif.Driver/TestRule.cs b/src/Test.UnitTests.Sarif.Driver/TestRule.cs index a7d5e686c..9d51cfbad 100644 --- a/src/Test.UnitTests.Sarif.Driver/TestRule.cs +++ b/src/Test.UnitTests.Sarif.Driver/TestRule.cs @@ -7,6 +7,7 @@ using System.IO; using System.Reflection; using System.Resources; +using System.Threading; using FluentAssertions; @@ -22,6 +23,9 @@ namespace Microsoft.CodeAnalysis.Sarif [Export(typeof(ReportingDescriptor)), Export(typeof(IOptionsProvider)), Export(typeof(Skimmer))] internal class TestRule : TestRuleBase, IOptionsProvider { + internal static int s_seed = (int)DateTime.UtcNow.Ticks; + internal static Random s_random = new Random(s_seed); + [ThreadStatic] internal static TestRuleBehaviors s_testRuleBehaviors; @@ -169,13 +173,20 @@ public override void Analyze(TestAnalysisContext context) case TestRuleBehaviors.LogError: { - context.Logger.Log(this, - new Result - { - RuleId = this.Id, - Level = FailureLevel.Error, - Message = new Message { Text = "Simple test rule message." } - }); + uint errorsCount = context.Policy.GetProperty(ErrorsCount); + + for (uint i = 0; i < errorsCount; i++) + { + context.Logger.Log(this, + new Result + { + RuleId = this.Id, + Level = FailureLevel.Error, + Message = new Message { Text = "Simple test rule message." } + }); + + Thread.Sleep(s_random.Next(0, 10)); + } break; } @@ -245,11 +256,15 @@ public static void RaiseExceptionViaReflection() public IEnumerable GetOptions() { - return new IOption[] { Behaviors, UnusedOption }; + return new IOption[] { Behaviors, UnusedOption, ErrorsCount }; } private const string AnalyzerName = TestRuleId + "." + nameof(TestRule); + public static PerLanguageOption ErrorsCount { get; } = + new PerLanguageOption( + AnalyzerName, nameof(ErrorsCount), defaultValue: () => { return 1; }); + public static PerLanguageOption Behaviors { get; } = new PerLanguageOption( AnalyzerName, nameof(TestRuleBehaviors), defaultValue: () => { return TestRuleBehaviors.None; });