diff --git a/src/ReleaseHistory.md b/src/ReleaseHistory.md index dfb1256af..1a4ff1683 100644 --- a/src/ReleaseHistory.md +++ b/src/ReleaseHistory.md @@ -226,4 +226,5 @@ * API BREAKING: remove 'run.architecture' https://github.com/oasis-tcs/sarif-spec/issues/262 * API BREAKING: 'result.message' is now a required property https://github.com/oasis-tcs/sarif-spec/issues/283 * API BREAKING: rename 'tool.fileVersion' to 'tool.dottedQuadFileVersion' https://github.com/oasis-tcs/sarif-spec/issues/274 -* API BREAKING: remove 'open' from valid rule default configuration levels. https://github.com/oasis-tcs/sarif-spec/issues/288. The transformer remaps this value to 'note'. \ No newline at end of file +* API BREAKING: remove 'open' from valid rule default configuration levels. https://github.com/oasis-tcs/sarif-spec/issues/288. The transformer remaps this value to 'note'. +* API BREAKING: 'run.columnKind' default value is now 'unicodeCodePoints'. https://github.com/Microsoft/sarif-sdk/pull/1160. The transformer will inject 'utf16CodeUnits', however, when this property is absent, as this value is a more appropriate default for the Windows platform. \ No newline at end of file diff --git a/src/Sarif.FunctionalTests/PropertyBagConverterTests.cs b/src/Sarif.FunctionalTests/PropertyBagConverterTests.cs index 54e4c5b8f..79350034f 100644 --- a/src/Sarif.FunctionalTests/PropertyBagConverterTests.cs +++ b/src/Sarif.FunctionalTests/PropertyBagConverterTests.cs @@ -23,6 +23,7 @@ public void PropertyBagConverter_RoundTripsStringPropertyWithEscapedCharacters() ""tool"": { ""name"": ""CodeScanner"" }, + ""columnKind"": ""utf16CodeUnits"", ""properties"": { ""int"": 42, ""string"": ""'\""\\'"" diff --git a/src/Sarif.FunctionalTests/UpdateBaselines.ps1 b/src/Sarif.FunctionalTests/UpdateBaselines.ps1 index 11a503abc..c90a4f496 100644 --- a/src/Sarif.FunctionalTests/UpdateBaselines.ps1 +++ b/src/Sarif.FunctionalTests/UpdateBaselines.ps1 @@ -56,10 +56,10 @@ function Build-Baselines($toolName) $toolDirectory = Join-Path "$PSScriptRoot\v2\ConverterTestData" $toolName $sourceExtension = "*.$sourceExtension" Get-ChildItem $toolDirectory -Filter $sourceExtension | ForEach-Object { - Write-Host " $_ -> $_.sarif" $input = $_.FullName $output = "$input.sarif" $outputTemp = "$output.temp" + Write-Host "$utility convert "$input" --tool $toolName --output "$outputTemp" --pretty-print" # Actually run the converter Remove-Item $outputTemp -ErrorAction SilentlyContinue diff --git a/src/Sarif.FunctionalTests/v2/SpecExamples/Comprehensive.sarif b/src/Sarif.FunctionalTests/v2/SpecExamples/Comprehensive.sarif index a542c3628..851e27858 100644 --- a/src/Sarif.FunctionalTests/v2/SpecExamples/Comprehensive.sarif +++ b/src/Sarif.FunctionalTests/v2/SpecExamples/Comprehensive.sarif @@ -8,6 +8,7 @@ "baselineInstanceGuid": "0A106451-C9B1-4309-A7EE-06988B95F723", "automationLogicalId": "Build-14.0.1.2-Release-20160716-13:22:18", "architecture": "x86", + "columnKind": "utf16CodeUnits", "tool": { "name": "CodeScanner", "fullName": "CodeScanner 1.1 for Unix (en-US)", diff --git a/src/Sarif.UnitTests/Core/RunTests.cs b/src/Sarif.UnitTests/Core/RunTests.cs new file mode 100644 index 000000000..67c3f9b78 --- /dev/null +++ b/src/Sarif.UnitTests/Core/RunTests.cs @@ -0,0 +1,47 @@ +// Copyright (c) Microsoft. All Rights Reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. + +using System; +using Xunit; +using Newtonsoft.Json; +using FluentAssertions; + +namespace Microsoft.CodeAnalysis.Sarif +{ + public class RunTests + { + [Fact] + public void Run_ColumnKindSerializesProperly() + { + // In our Windows-specific SDK, if no one has explicitly set ColumnKind, we + // will set it to the windows-specific value of Utf16CodeUnits. Otherwise, + // the SARIF file will pick up the ColumnKind default value of + // UnicodeCodePoints, which is not appropriate for Windows frameworks. + RoundTripColumnKind(persistedValue: ColumnKind.None, expectedRoundTrippedValue: ColumnKind.Utf16CodeUnits); + + // When explicitly set, we should always preserve that setting + RoundTripColumnKind(persistedValue: ColumnKind.Utf16CodeUnits, expectedRoundTrippedValue: ColumnKind.Utf16CodeUnits); + RoundTripColumnKind(persistedValue: ColumnKind.UnicodeCodePoints, expectedRoundTrippedValue: ColumnKind.UnicodeCodePoints); + } + + private void RoundTripColumnKind(ColumnKind persistedValue, ColumnKind expectedRoundTrippedValue) + { + var sarifLog = new SarifLog + { + Runs = new[] + { + new Run + { + Tool = new Tool { Name = "Test tool"}, + ColumnKind = persistedValue + } + } + }; + + string json = JsonConvert.SerializeObject(sarifLog); + + sarifLog = JsonConvert.DeserializeObject(json); + sarifLog.Runs[0].ColumnKind.Should().Be(expectedRoundTrippedValue); + } + } +} diff --git a/src/Sarif.UnitTests/TestData/SarifVersionOneToCurrentVisitor/v1/RestoreFromPropertyBag.sarif b/src/Sarif.UnitTests/TestData/SarifVersionOneToCurrentVisitor/v1/RestoreFromPropertyBag.sarif index 9184ae0f3..9ffcac26e 100644 --- a/src/Sarif.UnitTests/TestData/SarifVersionOneToCurrentVisitor/v1/RestoreFromPropertyBag.sarif +++ b/src/Sarif.UnitTests/TestData/SarifVersionOneToCurrentVisitor/v1/RestoreFromPropertyBag.sarif @@ -9,7 +9,7 @@ }, "results": [], "properties": { - "sarifv2/run": {"tool":{"name":"CodeScanner","semanticVersion":"2.1.0"},"results":[]} + "sarifv2/run": {"tool":{"name":"CodeScanner","semanticVersion":"2.1.0"},"results":[],"columnKind":"utf16CodeUnits"} } } ] diff --git a/src/Sarif/Autogenerated/Run.cs b/src/Sarif/Autogenerated/Run.cs index 0caaf7a62..a712e9e96 100644 --- a/src/Sarif/Autogenerated/Run.cs +++ b/src/Sarif/Autogenerated/Run.cs @@ -6,6 +6,7 @@ using System.Collections.Generic; using System.Runtime.Serialization; using Microsoft.CodeAnalysis.Sarif.Readers; +using Newtonsoft.Json; namespace Microsoft.CodeAnalysis.Sarif { @@ -138,6 +139,7 @@ public SarifNodeKind SarifNodeKind /// Specifies the unit in which the tool measures columns. /// [DataMember(Name = "columnKind", IsRequired = false, EmitDefaultValue = false)] + [JsonConverter(typeof(EnumConverter))] public ColumnKind ColumnKind { get; set; } /// diff --git a/src/Sarif/CodeGenHints.json b/src/Sarif/CodeGenHints.json index 2063c51a4..437848a5d 100644 --- a/src/Sarif/CodeGenHints.json +++ b/src/Sarif/CodeGenHints.json @@ -656,6 +656,14 @@ "unicodeCodePoints" ] } + }, + { + "kind": "AttributeHint", + "arguments": { + "namespaceName": "Newtonsoft.Json", + "typeName": "JsonConverter", + "arguments": [ "typeof(EnumConverter)" ] + } } ], "Run.Files": [ @@ -738,7 +746,6 @@ "arguments": [ "typeof(Microsoft.CodeAnalysis.Sarif.Readers.SarifVersionConverter)" ] } } - ], "stack": [ { diff --git a/src/Sarif/Core/Run.cs b/src/Sarif/Core/Run.cs index a3d7b94ba..0ac29664d 100644 --- a/src/Sarif/Core/Run.cs +++ b/src/Sarif/Core/Run.cs @@ -13,6 +13,25 @@ public partial class Run private static Invocation EmptyInvocation = new Invocation(); private static LogicalLocation EmptyLogicalLocation = new LogicalLocation(); + public bool ShouldSerializeColumnKind() + { + // This serialization helper does two things. + // + // First, if ColumnKind has not been + // explicitly set, we will set it to the value that works for the Microsoft + // platform (which is not the specified SARIF default). This makes sure that + // the value is set appropriate for code running on the Microsoft platform, + // even if the SARIF producer is not aware of this rather obscure value. + if (this.ColumnKind == ColumnKind.None) + { + this.ColumnKind = ColumnKind.Utf16CodeUnits; + } + + // Second, we will always explicitly serialize this value. Otherwise, we can't easily + // distinguish between earlier versions of the format for which this property was typically absent. + return true; + } + public bool ShouldSerializeFiles() { return this.Files != null && this.Files.Values.Any(); } public bool ShouldSerializeGraphs() { return this.Graphs != null && this.Graphs.Values.Any(); } diff --git a/src/Sarif/Readers/EnumConverter.cs b/src/Sarif/Readers/EnumConverter.cs index cbe93c4f6..718eb6709 100644 --- a/src/Sarif/Readers/EnumConverter.cs +++ b/src/Sarif/Readers/EnumConverter.cs @@ -21,7 +21,6 @@ public override object ReadJson(JsonReader reader, Type objectType, object exist { throw new ArgumentNullException(nameof(reader)); } - string value = (string)reader.Value; return Enum.Parse(objectType, ConvertToPascalCase(value)); } diff --git a/src/Sarif/Schemata/sarif-schema.json b/src/Sarif/Schemata/sarif-schema.json index 8e2e689f8..a837dd536 100644 --- a/src/Sarif/Schemata/sarif-schema.json +++ b/src/Sarif/Schemata/sarif-schema.json @@ -1585,7 +1585,8 @@ "columnKind": { "description": "Specifies the unit in which the tool measures columns.", - "enum": [ "utf16CodeUnits", "unicodeCodePoints" ] + "enum": [ "utf16CodeUnits", "unicodeCodePoints" ], + "default": "unicodeCodePoints" }, "properties": { diff --git a/src/Sarif/Visitors/SarifVersionOneToCurrentVisitor.cs b/src/Sarif/Visitors/SarifVersionOneToCurrentVisitor.cs index 7db286186..22e576ae4 100644 --- a/src/Sarif/Visitors/SarifVersionOneToCurrentVisitor.cs +++ b/src/Sarif/Visitors/SarifVersionOneToCurrentVisitor.cs @@ -917,7 +917,8 @@ internal Run CreateRun(RunVersionOne v1Run) AggregateIds = aggregateIds, BaselineInstanceGuid = v1Run.BaselineId, Properties = v1Run.Properties, - Tool = CreateTool(v1Run.Tool) + Tool = CreateTool(v1Run.Tool), + ColumnKind = ColumnKind.Utf16CodeUnits }; _currentRun = run; diff --git a/src/Sarif/Writers/PrereleaseCompatibilityTransformer.cs b/src/Sarif/Writers/PrereleaseCompatibilityTransformer.cs index 877c77cdd..c08ab6f74 100644 --- a/src/Sarif/Writers/PrereleaseCompatibilityTransformer.cs +++ b/src/Sarif/Writers/PrereleaseCompatibilityTransformer.cs @@ -122,6 +122,16 @@ private static bool ApplyChangesFromTC25ThroughTC28(JObject sarifLog) JObject resources = (JObject)run["resources"]; modifiedLog |= RemapRuleDefaultLevelFromOpenToNote(resources); + + // Specify columnKind as ColumndKind.Utf16CodeUnits in cases where the enum is missing from + // the SARIF file. Moving forward, the absence of this enum will be interpreted as + // the new default, which is ColumnKind.UnicodeCodePoints. + // https://github.com/oasis-tcs/sarif-spec/issues/188 + JValue columnKind = (JValue)run["columnKind"]; + if (columnKind == null) + { + run["columnKind"] = "utf16CodeUnits"; + } } } diff --git a/src/Sarif/Writers/ResultLogJsonWriter.cs b/src/Sarif/Writers/ResultLogJsonWriter.cs index 11346cc37..0af07a32d 100644 --- a/src/Sarif/Writers/ResultLogJsonWriter.cs +++ b/src/Sarif/Writers/ResultLogJsonWriter.cs @@ -63,6 +63,13 @@ public void Initialize(Run run) throw new ArgumentNullException(nameof(run.Tool)); } + // For this Windows-relevant SDK, if the column kind + // isn't explicitly specified, we will set it to Utf16CodeUnits + if (run.ColumnKind == ColumnKind.None) + { + run.ColumnKind = ColumnKind.Utf16CodeUnits; + } + this.EnsureStateNotAlreadySet(Conditions.Disposed | Conditions.RunInitialized); SarifVersion sarifVersion = SarifVersion.Current;