-
Notifications
You must be signed in to change notification settings - Fork 93
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Get v2 -> v1 transform working with files array #1211
Changes from 8 commits
7139747
f79fe68
622c2ab
d5f9620
1c9c6a0
c16f1f4
e3a51ca
718a420
43a85cd
49ebc34
3fb517d
ee8c807
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -27,7 +27,7 @@ | |
} | ||
}, | ||
"file:///home/buildAgent/bin/app.zip#/docs/intro.docx": { | ||
"uri": "file:///docs/intro.docx", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I fixed this existing bug in the transformer. #Resolved There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes good catch, can you please add another test to this file that nests a second zip in this one, and another file object that is a nested file within that nested zip? this guarantees completeness in nested files testing. In reply to: 247374942 [](ancestors = 247374942) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
"uri": "/docs/intro.docx", | ||
"parentKey": "file:///home/buildAgent/bin/app.zip", | ||
"offset": 17522, | ||
"length": 4050, | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -11,16 +11,13 @@ | |
"invocation": {}, | ||
"files": { | ||
"file:///src/base/driver.cs": { | ||
"uri": "file:///src/base/driver.cs", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Again, the transformer now doesn't bother to emit these. #ByDesign |
||
"contents": "YQBiAGMAZAANAAoAZQBmAGcADQAKAGgAaQBqAGsADQAKAGwAbQBuAA==" | ||
}, | ||
"file:///src/ui/client.cs": { | ||
"uri": "file:///src/ui/client.cs", | ||
"mimeType": "text/x-csharp", | ||
"contents": "VGhlIHF1aWNrIGJyb3duIGZveA0KanVtcHMgb3ZlciB0aGUgbGFhenkgZG9n" | ||
}, | ||
"file:///src/ui/unicodeText.cs": { | ||
"uri": "file:///src/ui/unicodeText.cs", | ||
"mimeType": "text/x-csharp", | ||
"contents": "VGhlIHF1aWNrIGJyb3duIGZveA0KanVtcHMgb3ZlciB0aGUgbGFhenkgZG9n" | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,7 +14,7 @@ | |
using Xunit; | ||
using Xunit.Abstractions; | ||
|
||
namespace Microsoft.CodeAnalysis.Sarif.UnitTests.Transformers | ||
namespace Microsoft.CodeAnalysis.Sarif.UnitTests.Visitors | ||
{ | ||
public class SarifCurrentToVersionOneVisitorTests : FileDiffingTests | ||
{ | ||
|
@@ -69,9 +69,7 @@ private void VerifyCurrentToVersionOneTransformationFromResource(string v2InputR | |
File.WriteAllText(expectedFilePath, v1ExpectedLogText); | ||
File.WriteAllText(actualFilePath, v1ActualLogText); | ||
|
||
|
||
string errorMessage = string.Format(@"V2 conversion from V1 produced unexpected diffs for test: '{0}'.", v2InputResourceName); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
This variable was never used, so this message never appeared. #Resolved There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok. this code needs to be converted to actually use the helpers in the filediff base class. maybe good for a followon change. In reply to: 247374990 [](ancestors = 247374990) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done! This SarifCurrentToVersionOneVisitorTests.cs is now 65 lines long :-) Take a look at the changes to FileDiffingTests.cs to see how it works. In reply to: 247582997 [](ancestors = 247582997,247377138,247374990) |
||
|
||
sb.AppendLine($"Conversion from current to V1 produced unexpected diffs for test: '{v2InputResourceName}'."); | ||
sb.AppendLine("To compare all difference for this test suite:"); | ||
sb.AppendLine(GenerateDiffCommand("SarifCurrentToVersionOneVisitor", Path.GetDirectoryName(expectedFilePath), Path.GetDirectoryName(actualFilePath)) + Environment.NewLine); | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -22,6 +22,10 @@ public class SarifCurrentToVersionOneVisitor : SarifRewritingVisitor | |
private Run _currentV2Run = null; | ||
private RunVersionOne _currentRun = null; | ||
|
||
// To understand the purpose of these fields, see the comment on CreateFileKeyIndexMappings. | ||
private IDictionary<string, int> _fileKeyToIndexDictionary; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
can you specify the version-ness of these things in the name? e.g, v1FileKeytoV2Index dictionary? Helps with clarity. For both members please. #Resolved There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
private IDictionary<int, string> _fileIndexToKeyDictionary; | ||
|
||
public bool EmbedVersionTwoContentInPropertyBag { get; set; } | ||
|
||
public SarifLogVersionOne SarifLogVersionOne { get; private set; } | ||
|
@@ -148,15 +152,15 @@ private string GetFileEncodingName(Uri uri) | |
{ | ||
string encodingName = null; | ||
IList<FileData> files = _currentV2Run.Files; | ||
#if FILES_ARRAY_WORKS | ||
FileData fileData; | ||
|
||
if (uri != null && | ||
files != null && | ||
files.TryGetValue(uri.OriginalString, out fileData)) | ||
files != null | ||
&& _fileKeyToIndexDictionary != null | ||
&& _fileKeyToIndexDictionary.TryGetValue(uri.OriginalString, out int index)) | ||
{ | ||
encodingName = fileData.Encoding; | ||
encodingName = files[index].Encoding; | ||
} | ||
#endif | ||
|
||
return encodingName; | ||
} | ||
|
||
|
@@ -173,21 +177,24 @@ private Encoding GetFileEncoding(string encodingName) | |
return encoding; | ||
} | ||
|
||
internal FileDataVersionOne CreateFileData(FileData v2FileData) | ||
private FileDataVersionOne CreateFileData(FileData v2FileData) | ||
{ | ||
FileDataVersionOne fileData = null; | ||
|
||
if (v2FileData != null) | ||
{ | ||
int parentIndex = v2FileData.ParentIndex; | ||
string parentKey = parentIndex == -1 | ||
? null | ||
: _fileIndexToKeyDictionary?[parentIndex]; | ||
|
||
fileData = new FileDataVersionOne | ||
{ | ||
Hashes = CreateHashVersionOneListFromV2Hashes(v2FileData.Hashes), | ||
Length = v2FileData.Length, | ||
MimeType = v2FileData.MimeType, | ||
Offset = v2FileData.Offset, | ||
#if FILES_ARRAY_WORKS | ||
ParentKey = v2FileData.ParentKey, | ||
#endif | ||
ParentKey = parentKey, | ||
Properties = v2FileData.Properties, | ||
Uri = v2FileData.FileLocation?.Uri, | ||
UriBaseId = v2FileData.FileLocation?.UriBaseId | ||
|
@@ -496,9 +503,8 @@ internal RegionVersionOne CreateRegion(Region v2Region, Uri uri) | |
private int ConvertCharOffsetToByteOffset(int charOffset, Uri uri) | ||
{ | ||
int byteOffset = 0; | ||
Encoding encoding; | ||
|
||
using (StreamReader reader = GetFileStreamReader(uri, out encoding)) | ||
using (StreamReader reader = GetFileStreamReader(uri, out Encoding encoding)) | ||
{ | ||
if (reader != null) | ||
{ | ||
|
@@ -518,9 +524,8 @@ private int ConvertCharOffsetToByteOffset(int charOffset, Uri uri) | |
private int GetRegionByteLength(Region v2Region, Uri uri) | ||
{ | ||
int byteLength = 0; | ||
Encoding encoding; | ||
|
||
using (StreamReader reader = GetFileStreamReader(uri, out encoding)) | ||
using (StreamReader reader = GetFileStreamReader(uri, out Encoding encoding)) | ||
{ | ||
if (reader != null) | ||
{ | ||
|
@@ -572,9 +577,8 @@ private int GetRegionByteLength(Region v2Region, Uri uri) | |
private int GetRegionEndColumn(Region v2Region, Uri uri) | ||
{ | ||
int endColumn = 0; | ||
Encoding encoding; | ||
|
||
using (StreamReader reader = GetFileStreamReader(uri, out encoding)) | ||
using (StreamReader reader = GetFileStreamReader(uri, out Encoding encoding)) | ||
{ | ||
if (reader != null) | ||
{ | ||
|
@@ -600,13 +604,15 @@ private Stream GetContentStream(Uri uri, out Encoding encoding) | |
Stream stream = null; | ||
encoding = null; | ||
string failureReason = null; | ||
IList<FileData> files = _currentV2Run.Files; | ||
|
||
if (uri != null && _currentV2Run.Files != null) | ||
if (uri != null && files != null && _fileKeyToIndexDictionary != null) | ||
{ | ||
#if FILES_ARRAY_WORKS | ||
FileData fileData; | ||
if (_currentV2Run.Files.TryGetValue(uri.OriginalString, out fileData)) | ||
if (_fileKeyToIndexDictionary.TryGetValue(uri.OriginalString, out int index) | ||
&& index < files.Count) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
when would this index < files.Count not be true? that seems like an adverse conition #Resolved There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You're right: it's guaranteed by the way the dictionary is constructed. Removed this condition. In reply to: 247376623 [](ancestors = 247376623) |
||
{ | ||
FileData fileData = files[index]; | ||
|
||
// We need the encoding because the content might have been transcoded to UTF-8 | ||
string encodingName = fileData.Encoding ?? _currentV2Run.DefaultFileEncoding; | ||
encoding = GetFileEncoding(encodingName); | ||
|
@@ -654,7 +660,6 @@ private Stream GetContentStream(Uri uri, out Encoding encoding) | |
failureReason = $"Encoding for file '{uri.OriginalString}' could not be determined"; | ||
} | ||
} | ||
#endif | ||
} | ||
|
||
if (stream == null && failureReason == null) | ||
|
@@ -731,16 +736,11 @@ internal Dictionary<string, string> CreateResponseFilesDictionary(IList<FileLoca | |
foreach (FileLocation fileLocation in v2ResponseFilesList) | ||
{ | ||
string key = fileLocation.Uri.OriginalString; | ||
string fileContent = null; | ||
#if FILES_ARRAY_WORKS | ||
FileData responseFile; | ||
if (_currentV2Run.Files != null && _currentV2Run.Files.TryGetValue(key, out responseFile)) | ||
if (_fileKeyToIndexDictionary != null && _fileKeyToIndexDictionary.TryGetValue(key, out int responseFileIndex)) | ||
{ | ||
fileContent = responseFile.Contents?.Text; | ||
FileData responseFile = _currentV2Run.Files[responseFileIndex]; | ||
responseFiles.Add(key, responseFile.Contents?.Text); | ||
} | ||
#endif | ||
|
||
responseFiles.Add(key, fileContent); | ||
} | ||
} | ||
|
||
|
@@ -825,8 +825,7 @@ internal RuleVersionOne CreateRule(IRule v2IRule) | |
{ | ||
RuleVersionOne rule = null; | ||
|
||
Rule v2Rule = v2IRule as Rule; | ||
var properties = v2Rule != null ? v2Rule.Properties : null; | ||
var properties = v2IRule is Rule v2Rule ? v2Rule.Properties : null; | ||
|
||
if (v2IRule != null) | ||
{ | ||
|
@@ -872,10 +871,10 @@ internal RunVersionOne CreateRun(Run v2Run) | |
run = new RunVersionOne(); | ||
_currentRun = run; | ||
|
||
CreateFileKeyIndexMappings(v2Run.Files, out _fileKeyToIndexDictionary, out _fileIndexToKeyDictionary); | ||
|
||
run.BaselineId = v2Run.BaselineInstanceGuid; | ||
#if FILES_ARRAY_WORKS | ||
run.Files = v2Run.Files?.ToDictionary(v => v.Key, v => CreateFileData(v.Value)); | ||
#endif | ||
run.Files = CreateV1FileDictionary(); | ||
run.Id = v2Run.Id?.InstanceGuid; | ||
run.AutomationId = v2Run.AggregateIds?.FirstOrDefault()?.InstanceId; | ||
|
||
|
@@ -904,6 +903,105 @@ internal RunVersionOne CreateRun(Run v2Run) | |
return run; | ||
} | ||
|
||
// In SARIF v1, run.files was a dictionary, whereas in v2 it is an array. | ||
// During the v2-to-v1 conversion, it is sometimes necessary to look up | ||
// information from a v2 FileData object. Rather than having to search the | ||
// v2 run.files array to find the required FileData object, we construct a | ||
// dictionary from the array so we can access the required FileData object | ||
// directly. It turns out that we need mappings in both directions: from | ||
// array index to dictionary key, and vice versa. | ||
private static void CreateFileKeyIndexMappings( | ||
IList<FileData> v2Files, | ||
out IDictionary<string, int> fileKeyToIndexDictionary, | ||
out IDictionary<int, string> fileIndexToKeyDictionary) | ||
{ | ||
fileKeyToIndexDictionary = null; | ||
fileIndexToKeyDictionary = null; | ||
|
||
if (v2Files != null) | ||
{ | ||
fileKeyToIndexDictionary = new Dictionary<string, int>(); | ||
fileIndexToKeyDictionary = new Dictionary<int, string>(); | ||
|
||
for (int index = 0; index < v2Files.Count; ++index) | ||
{ | ||
FileData v2File = v2Files[index]; | ||
string key = CreateFileDictionaryKey(v2File, v2Files); | ||
|
||
fileKeyToIndexDictionary[key] = index; | ||
fileIndexToKeyDictionary[index] = key; | ||
} | ||
} | ||
} | ||
|
||
// Given a v2 FileData object, synthesize a key that will be used to locate that object | ||
// in the v2 file data dictionary. We will use the same key to locate the corresponding | ||
// v1 FileData object when we create the v1 run.files dictionary. | ||
// | ||
// This method is necessary because, although ideally we would use the FileData | ||
// object's URI as the dictionary key, it is possible for two FileData objects to | ||
// have the same URI. For example: | ||
// | ||
// 1. Nested files with the same name might exist in two different containers. | ||
// 2. There might be two files with the same URI but difference uriBaseIds. | ||
// | ||
// To deal with Case #1, we synthesize a key as follows: | ||
// | ||
// <root file URI>#<nested file path>/<level 2 nested file path>/... | ||
// | ||
// To deal with Case #2, we would follow the chain or uriBaseIds, prepending | ||
// the value of each one to the URI. | ||
// | ||
// WE DO NOT YET HANDLE CASE #2. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is the missing code and test case. #Pending There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I will handle this in a separate PR, after lunch, after I review your PR. In reply to: 247375067 [](ancestors = 247375067) |
||
private static string CreateFileDictionaryKey(FileData v2File, IList<FileData> v2Files) | ||
{ | ||
var sb = new StringBuilder(v2File.FileLocation.Uri.OriginalString); | ||
while (v2File.ParentIndex != -1) | ||
{ | ||
FileData parentFile = v2Files[v2File.ParentIndex]; | ||
|
||
// The convention for building the key is as follows: | ||
// The root file URI is separated from the chain of nested files by '#'; | ||
// The nested file URIs are separated from each other by '/'. | ||
string separator = parentFile.ParentIndex == -1 ? "#" : "/"; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
are you sure this code doesn't double slashes at the point where we jump from a nested archive to an element in it? i.e., need a test case for myarchive.zip#myarchive2.zip/foo.cpp in this three-part file object, the most nested item will have a URL of /foo.cpp does your code double that slash? like so myarchive.zip#myarchive2.zip//foo.cpp? #Resolved There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good catch! I forgot that nested file URIs already start with a slash. I'll add the test case you asked for to expose this bug, and then fix it. In reply to: 247376946 [](ancestors = 247376946) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
|
||
sb.Insert(0, separator); | ||
sb.Insert(0, parentFile.FileLocation.Uri.OriginalString); | ||
|
||
v2File = parentFile; | ||
} | ||
|
||
return sb.ToString(); | ||
} | ||
|
||
private IDictionary<string, FileDataVersionOne> CreateV1FileDictionary() | ||
{ | ||
Dictionary<string, FileDataVersionOne> filesVersionOne = null; | ||
|
||
if (_fileKeyToIndexDictionary != null) | ||
{ | ||
filesVersionOne = new Dictionary<string, FileDataVersionOne>(); | ||
foreach (KeyValuePair<string, int> entry in _fileKeyToIndexDictionary) | ||
{ | ||
string key = entry.Key; | ||
int index = entry.Value; | ||
FileData v2File = _currentV2Run.Files[index]; | ||
FileDataVersionOne fileDataVersionOne = CreateFileData(v2File); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Again prefer to version these things, e.g., CreateFileDataVersionOne #Resolved There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, I agree: I always found this code confusing for that reason, and all the In reply to: 247376992 [](ancestors = 247376992) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
|
||
// There's no need to repeat the URI in the v1 FileData object | ||
// if it matches the dictionary key. | ||
if (fileDataVersionOne.Uri.OriginalString.Equals(key)) | ||
{ | ||
fileDataVersionOne.Uri = null; | ||
} | ||
|
||
filesVersionOne[key] = fileDataVersionOne; | ||
} | ||
} | ||
|
||
return filesVersionOne; | ||
} | ||
|
||
private IDictionary<string, LogicalLocationVersionOne> CreateLogicalLocationsDictionary(IList<LogicalLocation> logicalLocations) | ||
{ | ||
Dictionary<string, LogicalLocationVersionOne> logicalLocationsVersionOne = null; | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The transformer now refrains from emitting
uri
if it matches the key, as permitted by the v1 spec. We could go further and omit the whole object if it's empty but that's more work that I'm willing to do, and would cause problems if the empty object were in a nested file chain. #ByDesign