From 1c742872229ffc8d6a6056dcbcec270cf50bf42e Mon Sep 17 00:00:00 2001 From: David Wrighton Date: Thu, 2 Apr 2020 11:35:24 -0700 Subject: [PATCH] Experimental pgo tool (#34433) * Experimental pgo tool - Experiment and rough design going forward for providing profile data to the compiler * Update build so dotnet-pgo is built --- eng/Subsets.props | 1 + src/coreclr/src/tools/dotnet-pgo/Logger.cs | 34 + .../JitTraceRuntime.cs | 273 +++++ .../src/tools/dotnet-pgo/ModuleLoadLogger.cs | 61 + .../src/tools/dotnet-pgo/PgoTraceProcess.cs | 105 ++ src/coreclr/src/tools/dotnet-pgo/Program.cs | 1019 +++++++++++++++++ src/coreclr/src/tools/dotnet-pgo/README.md | 50 + .../TraceRuntimeDescToTypeSystemDesc.cs | 514 +++++++++ .../dotnet-pgo/TraceTypeSystemContext.cs | 363 ++++++ .../dotnet-pgo/TypeSystemMetadataEmitter.cs | 375 ++++++ .../tools/dotnet-pgo/dotnet-pgo-experiment.md | 238 ++++ .../src/tools/dotnet-pgo/dotnet-pgo.csproj | 22 + 12 files changed, 3055 insertions(+) create mode 100644 src/coreclr/src/tools/dotnet-pgo/Logger.cs create mode 100644 src/coreclr/src/tools/dotnet-pgo/Microsoft.Diagnostics.JitTrace/JitTraceRuntime.cs create mode 100644 src/coreclr/src/tools/dotnet-pgo/ModuleLoadLogger.cs create mode 100644 src/coreclr/src/tools/dotnet-pgo/PgoTraceProcess.cs create mode 100644 src/coreclr/src/tools/dotnet-pgo/Program.cs create mode 100644 src/coreclr/src/tools/dotnet-pgo/README.md create mode 100644 src/coreclr/src/tools/dotnet-pgo/TraceRuntimeDescToTypeSystemDesc.cs create mode 100644 src/coreclr/src/tools/dotnet-pgo/TraceTypeSystemContext.cs create mode 100644 src/coreclr/src/tools/dotnet-pgo/TypeSystemMetadataEmitter.cs create mode 100644 src/coreclr/src/tools/dotnet-pgo/dotnet-pgo-experiment.md create mode 100644 src/coreclr/src/tools/dotnet-pgo/dotnet-pgo.csproj diff --git a/eng/Subsets.props b/eng/Subsets.props index af31b4c424ed4..aa7a89b22fcf3 100644 --- a/eng/Subsets.props +++ b/eng/Subsets.props @@ -185,6 +185,7 @@ + diff --git a/src/coreclr/src/tools/dotnet-pgo/Logger.cs b/src/coreclr/src/tools/dotnet-pgo/Logger.cs new file mode 100644 index 0000000000000..814eea144d26e --- /dev/null +++ b/src/coreclr/src/tools/dotnet-pgo/Logger.cs @@ -0,0 +1,34 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; +using System.Collections.Generic; +using System.Text; + +namespace Microsoft.Diagnostics.Tools.Pgo +{ + class Logger + { + public void PrintWarning(string warning) + { + ConsoleColor oldColor = Console.ForegroundColor; + Console.ForegroundColor = ConsoleColor.Yellow; + Console.WriteLine("Warning: " + warning); + Console.ForegroundColor = oldColor; + } + + public void PrintError(string error) + { + ConsoleColor oldColor = Console.ForegroundColor; + Console.ForegroundColor = ConsoleColor.Red; + Console.Error.WriteLine("Error: " + error); + Console.ForegroundColor = oldColor; + } + + public void PrintMessage(string message) + { + Console.WriteLine(message); + } + } +} diff --git a/src/coreclr/src/tools/dotnet-pgo/Microsoft.Diagnostics.JitTrace/JitTraceRuntime.cs b/src/coreclr/src/tools/dotnet-pgo/Microsoft.Diagnostics.JitTrace/JitTraceRuntime.cs new file mode 100644 index 0000000000000..a3898438e7807 --- /dev/null +++ b/src/coreclr/src/tools/dotnet-pgo/Microsoft.Diagnostics.JitTrace/JitTraceRuntime.cs @@ -0,0 +1,273 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; +using System.Collections.Generic; +using System.IO; +using System.Reflection; +using System.Runtime.CompilerServices; + +namespace Microsoft.Diagnostics.JitTrace +{ + public static class JitTraceRuntime + { + /// + /// When a jittrace entry caused a failure, it will call this event with the + /// line in the jittrace file that triggered the failure. "" will be passed for stream reading failures. + /// + public static event Action LogFailure; + + private static void LogOnFailure(string failure) + { + var log = LogFailure; + if (log != null) + { + log(failure); + } + } + + /// + /// Prepare all the methods specified in a .jittrace file for execution + /// + /// Filename of .jittrace file + /// count of successful prepare operations. May exceed the could of lines in the jittrace file due to fuzzy matching + /// count of failed prepare operations. May exceed the could of lines in the jittrace file due to fuzzy matching + public static void Prepare(FileInfo fileName, out int successfulPrepares, out int failedPrepares) + { + using (StreamReader sr = new StreamReader(fileName.FullName)) + { + Prepare(sr, out successfulPrepares, out failedPrepares); + } + } + + private static string UnescapeStr(string input, string separator) + { + return input.Replace("\\s", separator).Replace("\\\\", "\\"); + } + + private static string[] SplitAndUnescape(string input, string separator, char[] seperatorCharArray) + { + string[] returnValue = input.Split(seperatorCharArray); + for (int i = 0; i < returnValue.Length; i++) + { + returnValue[i] = UnescapeStr(returnValue[i], separator); + } + return returnValue; + } + + /// + /// Prepare all the methods specified string that matches the .jittrace file format + /// for execution. Useful for embedding via data via resource. + /// + /// string with .jittrace data + /// count of successful prepare operations. May exceed the could of lines in the jittrace file due to fuzzy matching + /// count of failed prepare operations. May exceed the could of lines in the jittrace file due to fuzzy matching + public static void Prepare(string jittraceString, out int successfulPrepares, out int failedPrepares) + { + MemoryStream strStream = new MemoryStream(); + using (var writer = new StreamWriter(strStream, encoding: null, bufferSize: -1, leaveOpen: true)) + { + writer.Write(jittraceString); + } + + strStream.Position = 0; + Prepare(new StreamReader(strStream), out successfulPrepares, out failedPrepares); + } + + /// + /// Prepare all the methods specified Stream that matches the .jittrace file format + /// for execution. Handles general purpose stream data. + /// + /// Stream with .jittrace data + /// count of successful prepare operations. May exceed the could of lines in the jittrace file due to fuzzy matching + /// count of failed prepare operations. May exceed the could of lines in the jittrace file due to fuzzy matching + public static void Prepare(StreamReader jittraceStream, out int successfulPrepares, out int failedPrepares) + { + const string outerCsvEscapeChar = "~"; + const string innerCsvEscapeChar = ":"; + char[] outerCsvEscapeCharArray = new char[] { '~' }; + char[] innerCsvEscapeCharArray = new char[] { ':' }; + successfulPrepares = 0; + failedPrepares = 0; + + while (true) + { + string methodString = string.Empty; + try + { + methodString = jittraceStream.ReadLine(); + if (methodString == null) + { + break; + } + if (methodString.Trim() == string.Empty) + { + break; + } + + string[] methodStrComponents = SplitAndUnescape(methodString, outerCsvEscapeChar, outerCsvEscapeCharArray); + + Type owningType = Type.GetType(methodStrComponents[1], false); + + // owningType failed to load above. Skip rest of method discovery + if (owningType == null) + { + failedPrepares++; + LogOnFailure(methodString); + continue; + } + + int signatureLen = int.Parse(methodStrComponents[2]); + string[] methodInstantiationArgComponents = SplitAndUnescape(methodStrComponents[3], innerCsvEscapeChar, innerCsvEscapeCharArray); + int genericMethodArgCount = int.Parse(methodInstantiationArgComponents[0]); + Type[] methodArgs = genericMethodArgCount != 0 ? new Type[genericMethodArgCount] : Array.Empty(); + bool abortMethodDiscovery = false; + for (int iMethodArg = 0; iMethodArg < genericMethodArgCount; iMethodArg++) + { + Type methodArg = Type.GetType(methodInstantiationArgComponents[1 + iMethodArg], false); + methodArgs[iMethodArg] = methodArg; + + // methodArg failed to load above. Skip rest of method discovery + if (methodArg == null) + { + abortMethodDiscovery = true; + break; + } + } + + if (abortMethodDiscovery) + { + failedPrepares++; + LogOnFailure(methodString); + continue; + } + + string methodName = methodStrComponents[4]; + + // Now all data is parsed + // Find method + IEnumerable membersFound; + + BindingFlags bindingFlags = BindingFlags.Public | BindingFlags.NonPublic | BindingFlags.Instance | BindingFlags.Static | BindingFlags.DeclaredOnly; + if (methodName == ".ctor") + { + if (genericMethodArgCount != 0) + { + // Ctors with generic args don't make sense + failedPrepares++; + LogOnFailure(methodString); + continue; + } + membersFound = CtorMethodsThatMatch(); + IEnumerable CtorMethodsThatMatch() + { + ConstructorInfo[] constructors = owningType.GetConstructors(bindingFlags); + foreach (ConstructorInfo ci in constructors) + { + ConstructorInfo returnConstructorInfo = null; + + try + { + if (ci.GetParameters().Length == signatureLen) + { + returnConstructorInfo = ci; + } + } + catch + { + } + if (returnConstructorInfo != null) + { + yield return returnConstructorInfo.MethodHandle; + } + } + } + } + else if (methodName == ".cctor") + { + MemberInfo mi = owningType.TypeInitializer; + if (mi == null) + { + // This type no longer has a type initializer + failedPrepares++; + LogOnFailure(methodString); + continue; + } + membersFound = new RuntimeMethodHandle[] { owningType.TypeInitializer.MethodHandle }; + } + else + { + membersFound = MethodsThatMatch(); + IEnumerable MethodsThatMatch() + { + MethodInfo[] methods = owningType.GetMethods(bindingFlags); + foreach (MethodInfo mi in methods) + { + MethodInfo returnMethodInfo = null; + try + { + if (mi.Name != methodName) + { + continue; + } + + if (mi.GetParameters().Length != signatureLen) + { + continue; + } + if (mi.GetGenericArguments().Length != genericMethodArgCount) + { + continue; + } + if (genericMethodArgCount != 0) + { + returnMethodInfo = mi.MakeGenericMethod(methodArgs); + } + else + { + returnMethodInfo = mi; + } + } + catch + { + } + + if (returnMethodInfo != null) + { + yield return returnMethodInfo.MethodHandle; + } + } + } + } + + bool foundAtLeastOneEntry = false; + foreach (RuntimeMethodHandle memberHandle in membersFound) + { + foundAtLeastOneEntry = true; + try + { + System.Runtime.CompilerServices.RuntimeHelpers.PrepareMethod(memberHandle); + successfulPrepares++; + } + catch + { + failedPrepares++; + LogOnFailure(methodString); + } + } + if (!foundAtLeastOneEntry) + { + failedPrepares++; + LogOnFailure(methodString); + } + } + catch + { + failedPrepares++; + LogOnFailure(methodString); + } + } + } + } +} \ No newline at end of file diff --git a/src/coreclr/src/tools/dotnet-pgo/ModuleLoadLogger.cs b/src/coreclr/src/tools/dotnet-pgo/ModuleLoadLogger.cs new file mode 100644 index 0000000000000..170fca801f293 --- /dev/null +++ b/src/coreclr/src/tools/dotnet-pgo/ModuleLoadLogger.cs @@ -0,0 +1,61 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; +using System.Collections.Generic; +using System.Text; + +namespace Microsoft.Diagnostics.Tools.Pgo +{ + class ModuleLoadLogger + { + HashSet _simpleNamesReported = new HashSet(); + + public ModuleLoadLogger(Logger logger) + { + _logger = logger; + } + + Logger _logger; + + public void LogModuleLoadFailure(string simpleName, string filePath) + { + if (_simpleNamesReported.Add(simpleName)) + { + string str = $"Failed to load assembly '{simpleName}' from '{filePath}'"; + + if (String.Compare("System.Private.CoreLib", simpleName, StringComparison.OrdinalIgnoreCase) == 0) + { + _logger.PrintError(str); + } + else + { + _logger.PrintWarning(str); + } + } + } + + public void LogModuleLoadFailure(string simpleName) + { + if (_simpleNamesReported.Add(simpleName)) + { + string str = $"Failed to load assembly '{simpleName}'"; + + if (String.Compare("System.Private.CoreLib", simpleName, StringComparison.OrdinalIgnoreCase) == 0) + { + _logger.PrintError(str); + } + else + { + _logger.PrintWarning(str); + } + } + } + + public void LogModuleLoadSuccess(string simpleName, string filePath) + { + _logger.PrintMessage($"Loaded '{simpleName}' from '{filePath}'"); + } + } +} diff --git a/src/coreclr/src/tools/dotnet-pgo/PgoTraceProcess.cs b/src/coreclr/src/tools/dotnet-pgo/PgoTraceProcess.cs new file mode 100644 index 0000000000000..e096d7355794a --- /dev/null +++ b/src/coreclr/src/tools/dotnet-pgo/PgoTraceProcess.cs @@ -0,0 +1,105 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; +using System.Collections.Generic; +using System.IO; +using Microsoft.Diagnostics.Tracing.Etlx; +using Microsoft.Diagnostics.Tracing.Parsers.Clr; + +namespace Microsoft.Diagnostics.Tools.Pgo +{ + struct LoadedModule + { + public LoadedModule(int clrInstanceID, TraceManagedModule managedModule) + { + ClrInstanceID = clrInstanceID; + ManagedModule = managedModule; + } + + public readonly int ClrInstanceID; + public readonly TraceManagedModule ManagedModule; + } + + class PgoTraceProcess + { + public PgoTraceProcess(TraceProcess traceProcess) + { + TraceProcess = traceProcess; + foreach (var assemblyLoadTrace in traceProcess.EventsInProcess.ByEventType()) + { + _assemblyToCLRInstanceIDMap[assemblyLoadTrace.AssemblyID] = assemblyLoadTrace.ClrInstanceID; + } + } + + private Dictionary _assemblyToCLRInstanceIDMap = new Dictionary(); + + public readonly TraceProcess TraceProcess; + + public IEnumerable EnumerateLoadedManagedModules() + { + foreach (var moduleFile in TraceProcess.LoadedModules) + { + if (moduleFile is TraceManagedModule) + { + var managedModule = moduleFile as TraceManagedModule; + + int clrInstanceIDModule; + if (!_assemblyToCLRInstanceIDMap.TryGetValue(managedModule.AssemblyID, out clrInstanceIDModule)) + continue; + + yield return new LoadedModule(clrInstanceIDModule, managedModule); + } + } + } + + public bool ClrInstanceIsCoreCLRInstance(int clrInstanceId) + { + foreach (var module in EnumerateLoadedManagedModules()) + { + if (module.ClrInstanceID != clrInstanceId) + continue; + if (CompareModuleAgainstSimpleName("System.Private.CoreLib", module.ManagedModule)) + { + return true; + } + } + + return false; + } + + public static bool CompareModuleAgainstSimpleName(string simpleName, TraceManagedModule managedModule) + { + if (managedModule.ModuleFile != null) + { + if ((String.Compare(managedModule.ModuleFile.Name, simpleName, StringComparison.OrdinalIgnoreCase) == 0) || (String.Compare(managedModule.ModuleFile.Name, (simpleName + ".il"), StringComparison.OrdinalIgnoreCase) == 0)) + { + return true; + } + } + return false; + } + + public static string ComputeFilePathOnDiskForModule(TraceManagedModule managedModule) + { + string filePath = ""; + if (managedModule.ModuleFile != null) + { + filePath = managedModule.ModuleFile.FilePath; + string ildllstr = ".il.dll"; + string ilexestr = ".il.exe"; + if (!File.Exists(filePath) && filePath.EndsWith(ildllstr)) + { + filePath = filePath.Substring(0, filePath.Length - ildllstr.Length) + ".dll"; + } + else if (!File.Exists(filePath) && filePath.EndsWith(ilexestr)) + { + filePath = filePath.Substring(0, filePath.Length - ilexestr.Length) + ".exe"; + } + } + + return filePath; + } + } +} diff --git a/src/coreclr/src/tools/dotnet-pgo/Program.cs b/src/coreclr/src/tools/dotnet-pgo/Program.cs new file mode 100644 index 0000000000000..942681e72e459 --- /dev/null +++ b/src/coreclr/src/tools/dotnet-pgo/Program.cs @@ -0,0 +1,1019 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using Internal.TypeSystem; +using Internal.TypeSystem.Ecma; +using Internal.IL; +using Microsoft.Diagnostics.Tracing; +using Microsoft.Diagnostics.Tracing.Etlx; +using Microsoft.Diagnostics.Tracing.Parsers.Clr; +using System; +using System.Collections.Generic; +using System.IO; +using System.Reflection; +using System.Text; +using System.Linq; +using System.Diagnostics; +using System.CommandLine; +using System.CommandLine.Invocation; +using System.Globalization; +using System.Threading.Tasks; +using System.Reflection.Metadata; +using System.Reflection.Metadata.Ecma335; +using System.IO.Compression; + +namespace Microsoft.Diagnostics.Tools.Pgo +{ + public enum PgoFileType + { + jittrace = 1, + mibc = 2, + } + + [Flags] + public enum jittraceoptions + { + none = 0, + sorted = 1, + showtimestamp = 2, + } + + class Program + { + static bool s_reachedInnerMain; + static Logger s_logger = new Logger(); + static int Main(string []args) + { + var rootCommand = new RootCommand(@"dotnet-pgo - A tool for generating jittrace files so that a process can gain profile guided benefits. It relies on tracefiles as might be generated from perfview collect or dotnet trace.") + { + new Option("--trace-file") + { + Description = "Specify the trace file to be parsed", + Argument = new Argument() + { + Arity = ArgumentArity.ExactlyOne + } + }, + new Option("--output-file-name") + { + Description = "Specify the jittrace filename to be created", + Argument = new Argument() + { + Arity = ArgumentArity.ZeroOrOne + } + }, + new Option("--pid") + { + Description = "The pid within the trace of the process to examine. If this is a multi-process trace, at least one of --pid or --process-name must be specified", + Argument = new Argument() + { + Arity = ArgumentArity.ZeroOrOne + } + }, + new Option("--pgo-file-type") + { + Description = "The type of pgo file to generate. A valid value must be specified if --output-file-name is specified. Currently the only valid value is jittrace", + Argument = new Argument() + { + Arity = ArgumentArity.ExactlyOne + } + }, + new Option("--process-name") + { + Description = "The process name within the trace of the process to examine. If this is a multi-process trace, at least one of --pid or --process-name must be specified", + Argument = new Argument() + { + Arity = ArgumentArity.ZeroOrOne + } + }, + new Option("--reference") + { + Description = "If a reference is not located on disk at the same location as used in the process, it may be specified with a --reference parameter", + Argument = new Argument>() + { + Arity = ArgumentArity.ZeroOrMore + } + }, + new Option("--clr-instance-id") + { + Description = "If the process contains multiple .NET runtimes, the instance ID must be specified", + Argument = new Argument() + { + Arity = ArgumentArity.ZeroOrOne + } + }, + new Option("--process-jit-events") + { + Description = "Process JIT events. Defaults to true", + Argument = new Argument() + }, + new Option("--process-r2r-events") + { + Description = "Process R2R events. Defaults to true", + Argument = new Argument() + }, + new Option("--display-processed-events") + { + Description = "Process R2R events. Defaults to true", + Argument = new Argument() + }, + new Option("--warnings") + { + Description = "Display warnings for methods which could not be processed. Defaults to true", + Argument = new Argument() + }, + new Option("--verbose-warnings") + { + Description = "Display information about why jit events may be not processed. Defaults to false", + Argument = new Argument() + }, + new Option("--validate-output-file") + { + Description = "Validate output file. Defaults to true. Not all output formats support validation", + Argument = new Argument() + }, + new Option("--jittrace-options") + { + Description = "Jit Trace emit options (defaults to sorted) Valid options are 'none', 'sorted', 'showtimestamp', 'sorted,showtimestamp'", + Argument = new Argument() + }, + new Option("--exclude-events-before") + { + Description = "Exclude data from events before specified time", + Argument = new Argument() + }, + new Option("--exclude-events-after") + { + Description = "Exclude data from events after specified time", + Argument = new Argument() + } + }; + + bool oldReachedInnerMain = s_reachedInnerMain; + try + { + s_reachedInnerMain = false; + rootCommand.Handler = CommandHandler.Create(new Func, int?, bool, bool, bool, bool, bool, jittraceoptions, double, double, bool, int>(InnerMain)); + Task command = rootCommand.InvokeAsync(args); + + command.Wait(); + int result = command.Result; + if (!s_reachedInnerMain) + { + // Print example tracing commands here, as the autogenerated help logic doesn't allow customizing help with newlines and such + Console.WriteLine(@" +Example tracing commands used to generate the input to this tool: +""dotnet trace collect -p 73060 --providers Microsoft-Windows-DotNETRuntime:0x6000080018:5"" + - Capture events from process 73060 where we capture both JIT and R2R events using EventPipe tracing + +""dotnet trace collect -p 73060 --providers Microsoft-Windows-DotNETRuntime:0x4000080018:5"" + - Capture events from process 73060 where we capture only JIT events using EventPipe tracing + +""perfview collect -LogFile:logOfCollection.txt -DataFile:jittrace.etl -Zip:false -merge:false -providers:Microsoft-Windows-DotNETRuntime:0x6000080018:5"" + - Capture Jit and R2R events via perfview of all processes running using ETW tracing +"); + } + return result; + } + finally + { + s_reachedInnerMain = oldReachedInnerMain; + } + } + + static void PrintUsage(string argValidationIssue) + { + if (argValidationIssue != null) + { + ConsoleColor oldColor = Console.ForegroundColor; + Console.ForegroundColor = ConsoleColor.Red; + Console.Error.WriteLine(argValidationIssue); + Console.ForegroundColor = oldColor; + } + Main(new string[] { "-h" }); + } + + static void PrintWarning(string warning) + { + s_logger.PrintWarning(warning); + } + + static void PrintError(string error) + { + s_logger.PrintWarning(error); + } + + struct ProcessedMethodData + { + public ProcessedMethodData(double millisecond, MethodDesc method, string reason) + { + Millisecond = millisecond; + Method = method; + Reason = reason; + } + + public readonly double Millisecond; + public readonly MethodDesc Method; + public readonly string Reason; + } + + static int InnerMain(FileInfo traceFile, + FileInfo outputFileName, + int? pid, + string processName, + PgoFileType? pgoFileType, + IEnumerable reference, + int? clrInstanceId = null, + bool processJitEvents = true, + bool processR2REvents = true, + bool displayProcessedEvents = false, + bool validateOutputFile = true, + bool verboseWarnings = false, + jittraceoptions jitTraceOptions = jittraceoptions.sorted, + double excludeEventsBefore = 0, + double excludeEventsAfter = Double.MaxValue, + bool warnings = true) + { + s_reachedInnerMain = true; + + if (traceFile == null) + { + PrintUsage("--trace-file must be specified"); + return -8; + } + + if (outputFileName != null) + { + if (!pgoFileType.HasValue) + { + PrintUsage($"--pgo-file-type must be specified"); + return -9; + } + if ((pgoFileType.Value != PgoFileType.jittrace) && (pgoFileType != PgoFileType.mibc)) + { + PrintUsage($"Invalid output pgo type {pgoFileType} specified."); + return -9; + } + if (pgoFileType == PgoFileType.jittrace) + { + if (!outputFileName.Name.EndsWith(".jittrace")) + { + PrintUsage($"jittrace output file name must end with .jittrace"); + return -9; + } + } + if (pgoFileType == PgoFileType.mibc) + { + if (!outputFileName.Name.EndsWith(".mibc")) + { + PrintUsage($"jittrace output file name must end with .mibc"); + return -9; + } + } + } + + string etlFileName = traceFile.FullName; + foreach (string nettraceExtension in new string[] { ".netperf", ".netperf.zip", ".nettrace" }) + { + if (traceFile.FullName.EndsWith(nettraceExtension)) + { + etlFileName = traceFile.FullName.Substring(0, traceFile.FullName.Length - nettraceExtension.Length) + ".etlx"; + Console.WriteLine($"Creating ETLX file {etlFileName} from {traceFile.FullName}"); + TraceLog.CreateFromEventPipeDataFile(traceFile.FullName, etlFileName); + } + } + + string lttngExtension = ".trace.zip"; + if (traceFile.FullName.EndsWith(lttngExtension)) + { + etlFileName = traceFile.FullName.Substring(0, traceFile.FullName.Length - lttngExtension.Length) + ".etlx"; + Console.WriteLine($"Creating ETLX file {etlFileName} from {traceFile.FullName}"); + TraceLog.CreateFromLttngTextDataFile(traceFile.FullName, etlFileName); + } + + using (var traceLog = TraceLog.OpenOrConvert(etlFileName)) + { + if ((!pid.HasValue && processName == null) && traceLog.Processes.Count != 1) + { + Console.WriteLine("Either a pid or process name from the following list must be specified"); + foreach (TraceProcess proc in traceLog.Processes) + { + Console.WriteLine($"Procname = {proc.Name} Pid = {proc.ProcessID}"); + } + return 0; + } + + if (pid.HasValue && (processName != null)) + { + PrintError("--pid and --process-name cannot be specified together"); + return -1; + } + + // For a particular process + TraceProcess p; + if (pid.HasValue) + { + p = traceLog.Processes.LastProcessWithID(pid.Value); + } + else if (processName != null) + { + List matchingProcesses = new List(); + foreach (TraceProcess proc in traceLog.Processes) + { + if (String.Compare(proc.Name, processName, StringComparison.OrdinalIgnoreCase) == 0) + { + matchingProcesses.Add(proc); + } + } + + if (matchingProcesses.Count == 0) + { + PrintError("Unable to find matching process in trace"); + return -1; + } + if (matchingProcesses.Count > 1) + { + StringBuilder errorMessage = new StringBuilder(); + + errorMessage.AppendLine("Found multiple matching processes in trace"); + foreach (TraceProcess proc in matchingProcesses) + { + errorMessage.AppendLine($"{proc.Name}\tpid={proc.ProcessID}\tCPUMSec={proc.CPUMSec}"); + } + PrintError(errorMessage.ToString()); + return -2; + } + p = matchingProcesses[0]; + } + else + { + p = traceLog.Processes.First(); + } + + if (!p.EventsInProcess.ByEventType().Any()) + { + PrintError($"No MethodDetails\nWas the trace collected with provider at least \"Microsoft-Windows-DotNETRuntime:0x4000080018:5\"?"); + return -3; + } + + if (!p.EventsInProcess.ByEventType().Any()) + { + PrintError($"No BulkType data\nWas the trace collected with provider at least \"Microsoft-Windows-DotNETRuntime:0x4000080018:5\"?"); + return -4; + } + + if (!p.EventsInProcess.ByEventType().Any()) + { + PrintError($"No managed module load data\nWas the trace collected with provider at least \"Microsoft-Windows-DotNETRuntime:0x4000080018:5\"?"); + return -5; + } + + if (!p.EventsInProcess.ByEventType().Any()) + { + PrintError($"No managed jit starting data\nWas the trace collected with provider at least \"Microsoft-Windows-DotNETRuntime:0x4000080018:5\"?"); + return -5; + } + + if (processR2REvents) + { + if (!p.EventsInProcess.ByEventType().Any()) + { + PrintError($"No r2r entrypoint data. This is not an error as in this case we can examine the jitted methods only\nWas the trace collected with provider at least \"Microsoft-Windows-DotNETRuntime:0x6000080018:5\"?"); + } + } + + PgoTraceProcess pgoProcess = new PgoTraceProcess(p); + + if (!clrInstanceId.HasValue) + { + HashSet clrInstanceIds = new HashSet(); + HashSet examinedClrInstanceIds = new HashSet(); + foreach (var assemblyLoadTrace in p.EventsInProcess.ByEventType()) + { + if (examinedClrInstanceIds.Add(assemblyLoadTrace.ClrInstanceID)) + { + if (pgoProcess.ClrInstanceIsCoreCLRInstance(assemblyLoadTrace.ClrInstanceID)) + clrInstanceIds.Add(assemblyLoadTrace.ClrInstanceID); + } + } + + if (clrInstanceIds.Count != 1) + { + if (clrInstanceIds.Count == 0) + { + PrintError($"No managed CLR in target process, or per module information could not be loaded from the trace."); + } + else + { + // There are multiple clr processes... search for the one that implements + int[] clrInstanceIdsArray = clrInstanceIds.ToArray(); + Array.Sort(clrInstanceIdsArray); + StringBuilder errorMessage = new StringBuilder(); + errorMessage.AppendLine("Multiple CLR instances used in process. Choose one to examine with -clrInstanceID: Valid ids:"); + foreach (int instanceID in clrInstanceIds) + { + errorMessage.AppendLine(instanceID.ToString()); + } + PrintError(errorMessage.ToString()); + } + return -10; + } + else + { + clrInstanceId = clrInstanceIds.First(); + } + } + + var tsc = new TraceTypeSystemContext(pgoProcess, clrInstanceId.Value, s_logger); + + if (verboseWarnings) + Console.WriteLine($"{traceLog.EventsLost} Lost events"); + + bool filePathError = false; + if (reference != null) + { + foreach (FileInfo fileReference in reference) + { + if (!File.Exists(fileReference.FullName)) + { + PrintError($"Unable to find reference '{fileReference.FullName}'"); + filePathError = true; + } + else + tsc.GetModuleFromPath(fileReference.FullName); + } + } + + if (filePathError) + return -6; + + if (!tsc.Initialize()) + return -12; + + TraceRuntimeDescToTypeSystemDesc idParser = new TraceRuntimeDescToTypeSystemDesc(p, tsc, clrInstanceId.Value); + + SortedDictionary methodsToAttemptToPrepare = new SortedDictionary(); + + if (processR2REvents) + { + foreach (var e in p.EventsInProcess.ByEventType()) + { + int parenIndex = e.MethodSignature.IndexOf('('); + string retArg = e.MethodSignature.Substring(0, parenIndex); + string paramsArgs = e.MethodSignature.Substring(parenIndex); + string methodNameFromEventDirectly = retArg + e.MethodNamespace + "." + e.MethodName + paramsArgs; + if (e.ClrInstanceID != clrInstanceId.Value) + { + if (!warnings) + continue; + + PrintWarning($"Skipped R2REntryPoint {methodNameFromEventDirectly} due to ClrInstanceID of {e.ClrInstanceID}"); + continue; + } + MethodDesc method = null; + string extraWarningText = null; + try + { + method = idParser.ResolveMethodID(e.MethodID, verboseWarnings); + } + catch (Exception exception) + { + extraWarningText = exception.ToString(); + } + + if (method == null) + { + if ((e.MethodNamespace == "dynamicClass") || !warnings) + continue; + + PrintWarning($"Unable to parse {methodNameFromEventDirectly} when looking up R2R methods"); + if (extraWarningText != null) + PrintWarning(extraWarningText); + continue; + } + if ((e.TimeStampRelativeMSec >= excludeEventsBefore) && (e.TimeStampRelativeMSec <= excludeEventsAfter)) + methodsToAttemptToPrepare.Add((int)e.EventIndex, new ProcessedMethodData(e.TimeStampRelativeMSec, method, "R2RLoad")); + } + } + + // Find all the jitStart events. + if (processJitEvents) + { + foreach (var e in p.EventsInProcess.ByEventType()) + { + int parenIndex = e.MethodSignature.IndexOf('('); + string retArg = e.MethodSignature.Substring(0, parenIndex); + string paramsArgs = e.MethodSignature.Substring(parenIndex); + string methodNameFromEventDirectly = retArg + e.MethodNamespace + "." + e.MethodName + paramsArgs; + if (e.ClrInstanceID != clrInstanceId.Value) + { + if (!warnings) + continue; + + PrintWarning($"Skipped {methodNameFromEventDirectly} due to ClrInstanceID of {e.ClrInstanceID}"); + continue; + } + + MethodDesc method = null; + string extraWarningText = null; + try + { + method = idParser.ResolveMethodID(e.MethodID, verboseWarnings); + } + catch (Exception exception) + { + extraWarningText = exception.ToString(); + } + + if (method == null) + { + if (!warnings) + continue; + + PrintWarning($"Unable to parse {methodNameFromEventDirectly}"); + if (extraWarningText != null) + PrintWarning(extraWarningText); + continue; + } + + if ((e.TimeStampRelativeMSec >= excludeEventsBefore) && (e.TimeStampRelativeMSec <= excludeEventsAfter)) + methodsToAttemptToPrepare.Add((int)e.EventIndex, new ProcessedMethodData(e.TimeStampRelativeMSec, method, "JitStart")); + } + } + + if (displayProcessedEvents) + { + foreach (var entry in methodsToAttemptToPrepare) + { + MethodDesc method = entry.Value.Method; + string reason = entry.Value.Reason; + Console.WriteLine($"{entry.Value.Millisecond.ToString("F4")} {reason} {method}"); + } + } + + Console.WriteLine($"Done processing input file"); + + if (outputFileName == null) + { + return 0; + } + + // Deduplicate entries + HashSet methodsInListAlready = new HashSet(); + List methodsUsedInProcess = new List(); + foreach (var entry in methodsToAttemptToPrepare) + { + if (methodsInListAlready.Add(entry.Value.Method)) + { + methodsUsedInProcess.Add(entry.Value); + } + } + + if (pgoFileType.Value == PgoFileType.jittrace) + GenerateJittraceFile(outputFileName, methodsUsedInProcess, jitTraceOptions); + else if (pgoFileType.Value == PgoFileType.mibc) + return GenerateMibcFile(tsc, outputFileName, methodsUsedInProcess, validateOutputFile); + } + return 0; + } + + class MIbcGroup + { + private static int s_emitCount = 0; + + public MIbcGroup(string name, TypeSystemMetadataEmitter emitter) + { + _buffer = new BlobBuilder(); + _il = new InstructionEncoder(_buffer); + _name = name; + _emitter = emitter; + } + + private BlobBuilder _buffer; + private InstructionEncoder _il; + private string _name; + TypeSystemMetadataEmitter _emitter; + + public void AddProcessedMethodData(ProcessedMethodData processedMethodData) + { + MethodDesc method = processedMethodData.Method; + string reason = processedMethodData.Reason; + + // Format is + // ldtoken method + // variable amount of extra metadata about the method + // pop + try + { + EntityHandle methodHandle = _emitter.GetMethodRef(method); + _il.OpCode(ILOpCode.Ldtoken); + _il.Token(methodHandle); + _il.OpCode(ILOpCode.Pop); + } + catch (Exception ex) + { + PrintWarning($"Exception {ex} while attempting to generate method lists"); + } + } + + public MethodDefinitionHandle EmitMethod() + { + s_emitCount++; + string basicName = "Assemblies_" + _name; + if (_name.Length > 200) + basicName = basicName.Substring(0, 200); // Cap length of name at 200, which is reasonably small. + + string methodName = basicName + "_" + s_emitCount.ToString(CultureInfo.InvariantCulture); + return _emitter.AddGlobalMethod(methodName, _il, 8); + } + } + + private static void AddAssembliesAssociatedWithType(TypeDesc type, HashSet assemblies, out string definingAssembly) + { + definingAssembly = ((MetadataType)type).Module.Assembly.GetName().Name; + assemblies.Add(definingAssembly); + AddAssembliesAssociatedWithType(type, assemblies); + } + + private static void AddAssembliesAssociatedWithType(TypeDesc type, HashSet assemblies) + { + if (type.IsPrimitive) + return; + + if (type.Context.IsCanonicalDefinitionType(type, CanonicalFormKind.Any)) + return; + + if (type.IsParameterizedType) + { + AddAssembliesAssociatedWithType(type.GetParameterType(), assemblies); + } + else + { + assemblies.Add(((MetadataType)type).Module.Assembly.GetName().Name); + foreach (var instantiationType in type.Instantiation) + { + AddAssembliesAssociatedWithType(instantiationType, assemblies); + } + } + } + + private static void AddAssembliesAssociatedWithMethod(MethodDesc method, HashSet assemblies, out string definingAssembly) + { + AddAssembliesAssociatedWithType(method.OwningType, assemblies, out definingAssembly); + foreach (var instantiationType in method.Instantiation) + { + AddAssembliesAssociatedWithType(instantiationType, assemblies); + } + } + + static int GenerateMibcFile(TraceTypeSystemContext tsc, FileInfo outputFileName, ICollection methodsToAttemptToPlaceIntoProfileData, bool validate) + { + TypeSystemMetadataEmitter emitter = new TypeSystemMetadataEmitter(new AssemblyName(outputFileName.Name), tsc); + + SortedDictionary groups = new SortedDictionary(); + StringBuilder mibcGroupNameBuilder = new StringBuilder(); + HashSet assembliesAssociatedWithMethod = new HashSet(); + + foreach (var entry in methodsToAttemptToPlaceIntoProfileData) + { + MethodDesc method = entry.Method; + assembliesAssociatedWithMethod.Clear(); + AddAssembliesAssociatedWithMethod(method, assembliesAssociatedWithMethod, out string definingAssembly); + + string[] assemblyNames = new string[assembliesAssociatedWithMethod.Count]; + int i = 1; + assemblyNames[0] = definingAssembly; + + foreach (string s in assembliesAssociatedWithMethod) + { + if (s.Equals(definingAssembly)) + continue; + assemblyNames[i++] = s; + } + + // Always keep the defining assembly as the first name + Array.Sort(assemblyNames, 1, assemblyNames.Length - 1); + mibcGroupNameBuilder.Clear(); + foreach (string s in assemblyNames) + { + mibcGroupNameBuilder.Append(s); + mibcGroupNameBuilder.Append(';'); + } + + string mibcGroupName = mibcGroupNameBuilder.ToString(); + if (!groups.TryGetValue(mibcGroupName, out MIbcGroup mibcGroup)) + { + mibcGroup = new MIbcGroup(mibcGroupName, emitter); + groups.Add(mibcGroupName, mibcGroup); + } + mibcGroup.AddProcessedMethodData(entry); + } + + var buffer = new BlobBuilder(); + var il = new InstructionEncoder(buffer); + + foreach (var entry in groups) + { + il.LoadString(emitter.GetUserStringHandle(entry.Key)); + il.OpCode(ILOpCode.Ldtoken); + il.Token(entry.Value.EmitMethod()); + il.OpCode(ILOpCode.Pop); + } + + emitter.AddGlobalMethod("AssemblyDictionary", il, 8); + MemoryStream peFile = new MemoryStream(); + emitter.SerializeToStream(peFile); + peFile.Position = 0; + + if (outputFileName.Exists) + { + outputFileName.Delete(); + } + + using (ZipArchive file = ZipFile.Open(outputFileName.FullName, ZipArchiveMode.Create)) + { + var entry = file.CreateEntry(outputFileName.Name + ".dll", CompressionLevel.Optimal); + using (Stream archiveStream = entry.Open()) + { + peFile.CopyTo(archiveStream); + } + } + + Console.WriteLine($"Generated {outputFileName.FullName}"); + if (validate) + return ValidateMIbcData(tsc, outputFileName, peFile.ToArray(), methodsToAttemptToPlaceIntoProfileData); + else + return 0; + } + + struct MIbcData + { + public object MetadataObject; + } + + static int ValidateMIbcData(TraceTypeSystemContext tsc, FileInfo outputFileName, byte[] moduleBytes, ICollection methodsToAttemptToPrepare) + { + var mibcLoadedData = ReadMIbcData(tsc, outputFileName, moduleBytes).ToArray(); + Dictionary mibcDict = new Dictionary(); + + foreach (var mibcData in mibcLoadedData) + { + mibcDict.Add((MethodDesc)mibcData.MetadataObject, mibcData); + } + + bool failure = false; + if (methodsToAttemptToPrepare.Count != mibcLoadedData.Length) + { + PrintError($"Not same count of methods {methodsToAttemptToPrepare.Count} != {mibcLoadedData.Length}"); + failure = true; + } + + foreach (var entry in methodsToAttemptToPrepare) + { + MethodDesc method = entry.Method; + if (!mibcDict.ContainsKey(method)) + { + PrintError($"{method} not found in mibcEntryData"); + failure = true; + continue; + } + } + + if (failure) + { + return -1; + } + else + { + Console.WriteLine($"Validated {outputFileName.FullName}"); + return 0; + } + } + + static IEnumerable ReadMIbcGroup(TypeSystemContext tsc, EcmaMethod method) + { + EcmaMethodIL ilBody = EcmaMethodIL.Create((EcmaMethod)method); + byte[] ilBytes = ilBody.GetILBytes(); + int currentOffset = 0; + object metadataObject = null; + while (currentOffset < ilBytes.Length) + { + ILOpcode opcode = (ILOpcode)ilBytes[currentOffset]; + if (opcode == ILOpcode.prefix1) + opcode = 0x100 + (ILOpcode)ilBytes[currentOffset + 1]; + switch (opcode) + { + case ILOpcode.ldtoken: + UInt32 token = (UInt32)(ilBytes[currentOffset + 1] + (ilBytes[currentOffset + 2] << 8) + (ilBytes[currentOffset + 3] << 16) + (ilBytes[currentOffset + 4] << 24)); + metadataObject = ilBody.GetObject((int)token); + break; + case ILOpcode.pop: + MIbcData mibcData = new MIbcData(); + mibcData.MetadataObject = metadataObject; + yield return mibcData; + break; + } + + // This isn't correct if there is a switch opcode, but since we won't do that, its ok + currentOffset += opcode.GetSize(); + } + } + + class CanonModule : ModuleDesc, IAssemblyDesc + { + public CanonModule(TypeSystemContext wrappedContext) : base(wrappedContext, null) + { + } + + public override IEnumerable GetAllTypes() + { + throw new NotImplementedException(); + } + + public override MetadataType GetGlobalModuleType() + { + throw new NotImplementedException(); + } + + public override MetadataType GetType(string nameSpace, string name, bool throwIfNotFound = true) + { + TypeSystemContext context = Context; + + if (context.SupportsCanon && (nameSpace == context.CanonType.Namespace) && (name == context.CanonType.Name)) + return Context.CanonType; + if (context.SupportsUniversalCanon && (nameSpace == context.UniversalCanonType.Namespace) && (name == context.UniversalCanonType.Name)) + return Context.UniversalCanonType; + else + { + if (throwIfNotFound) + { + throw new TypeLoadException($"{nameSpace}.{name}"); + } + return null; + } + } + + public AssemblyName GetName() + { + return new AssemblyName("System.Private.Canon"); + } + } + + class CustomCanonResolver : IModuleResolver + { + CanonModule _canonModule; + AssemblyName _canonModuleName; + IModuleResolver _wrappedResolver; + + public CustomCanonResolver(TypeSystemContext wrappedContext) + { + _canonModule = new CanonModule(wrappedContext); + _canonModuleName = _canonModule.GetName(); + _wrappedResolver = wrappedContext; + } + + ModuleDesc IModuleResolver.ResolveAssembly(AssemblyName name, bool throwIfNotFound) + { + if (name.Name == _canonModuleName.Name) + return _canonModule; + else + return _wrappedResolver.ResolveAssembly(name, throwIfNotFound); + } + + ModuleDesc IModuleResolver.ResolveModule(IAssemblyDesc referencingModule, string fileName, bool throwIfNotFound) + { + return _wrappedResolver.ResolveModule(referencingModule, fileName, throwIfNotFound); + } + } + + static IEnumerable ReadMIbcData(TraceTypeSystemContext tsc, FileInfo outputFileName, byte[] moduleBytes) + { + var peReader = new System.Reflection.PortableExecutable.PEReader(System.Collections.Immutable.ImmutableArray.Create(moduleBytes)); + var module = EcmaModule.Create(tsc, peReader, null, null, new CustomCanonResolver(tsc)); + + var loadedMethod = (EcmaMethod)module.GetGlobalModuleType().GetMethod("AssemblyDictionary", null); + EcmaMethodIL ilBody = EcmaMethodIL.Create(loadedMethod); + byte[] ilBytes = ilBody.GetILBytes(); + int currentOffset = 0; + while (currentOffset < ilBytes.Length) + { + ILOpcode opcode = (ILOpcode)ilBytes[currentOffset]; + if (opcode == ILOpcode.prefix1) + opcode = 0x100 + (ILOpcode)ilBytes[currentOffset + 1]; + switch (opcode) + { + case ILOpcode.ldtoken: + UInt32 token = (UInt32)(ilBytes[currentOffset + 1] + (ilBytes[currentOffset + 2] << 8) + (ilBytes[currentOffset + 3] << 16) + (ilBytes[currentOffset + 4] << 24)); + foreach (var data in ReadMIbcGroup(tsc, (EcmaMethod)ilBody.GetObject((int)token))) + yield return data; + break; + case ILOpcode.pop: + break; + } + + // This isn't correct if there is a switch opcode, but since we won't do that, its ok + currentOffset += opcode.GetSize(); + } + GC.KeepAlive(peReader); + } + + static void GenerateJittraceFile(FileInfo outputFileName, IEnumerable methodsToAttemptToPrepare, jittraceoptions jittraceOptions) + { + s_logger.PrintMessage($"JitTrace options {jittraceOptions}"); + + List methodsToPrepare = new List(); + HashSet prepareMethods = new HashSet(); + + Dictionary typeStringCache = new Dictionary(); + StringBuilder methodPrepareInstruction = new StringBuilder(); + + StringBuilder instantiationBuilder = new StringBuilder(); + const string outerCsvEscapeChar = "~"; + const string innerCsvEscapeChar = ":"; + foreach (var entry in methodsToAttemptToPrepare) + { + MethodDesc method = entry.Method; + string reason = entry.Reason; + double time = entry.Millisecond; + + methodPrepareInstruction.Clear(); + instantiationBuilder.Clear(); + // Format is FriendlyNameOfMethod~typeIndex~ArgCount~GenericParameterCount:genericParamsSeperatedByColons~MethodName + // This format is not sufficient to exactly describe methods, so the runtime component may compile similar methods + // In the various strings \ is escaped to \\ and in the outer ~ csv the ~ character is escaped to \s. In the inner csv : is escaped to \s + try + { + string timeStampAddon = ""; + if (jittraceOptions.HasFlag(jittraceoptions.showtimestamp)) + timeStampAddon = time.ToString("F4") + "-"; + + methodPrepareInstruction.Append(CsvEscape(timeStampAddon + method.ToString(), outerCsvEscapeChar)); + methodPrepareInstruction.Append(outerCsvEscapeChar); + methodPrepareInstruction.Append(CsvEscape(GetStringForType(method.OwningType, typeStringCache), outerCsvEscapeChar)); + methodPrepareInstruction.Append(outerCsvEscapeChar); + methodPrepareInstruction.Append(method.Signature.Length); + methodPrepareInstruction.Append(outerCsvEscapeChar); + + instantiationBuilder.Append(method.Instantiation.Length); + foreach (TypeDesc methodInstantiationType in method.Instantiation) + { + instantiationBuilder.Append(innerCsvEscapeChar); + instantiationBuilder.Append(CsvEscape(GetStringForType(methodInstantiationType, typeStringCache), innerCsvEscapeChar)); + } + + methodPrepareInstruction.Append(CsvEscape(instantiationBuilder.ToString(), outerCsvEscapeChar)); + methodPrepareInstruction.Append(outerCsvEscapeChar); + methodPrepareInstruction.Append(CsvEscape(method.Name, outerCsvEscapeChar)); + } + catch (Exception ex) + { + PrintWarning($"Exception {ex} while attempting to generate method lists"); + continue; + } + + string prepareInstruction = methodPrepareInstruction.ToString(); + if (!prepareMethods.Contains(prepareInstruction)) + { + prepareMethods.Add(prepareInstruction); + methodsToPrepare.Add(prepareInstruction); + } + } + + if (jittraceOptions.HasFlag(jittraceoptions.sorted)) + { + methodsToPrepare.Sort(); + } + + using (TextWriter tw = new StreamWriter(outputFileName.FullName)) + { + foreach (string methodString in methodsToPrepare) + { + tw.WriteLine(methodString); + } + } + + Console.WriteLine($"Generated {outputFileName.FullName}"); + } + + static string CsvEscape(string input, string separator) + { + Debug.Assert(separator.Length == 1); + return input.Replace("\\", "\\\\").Replace(separator, "\\s"); + } + + static string GetStringForType(TypeDesc type, Dictionary typeStringCache) + { + string str; + if (typeStringCache.TryGetValue(type, out str)) + { + return str; + } + + CustomAttributeTypeNameFormatter caFormat = new CustomAttributeTypeNameFormatter(); + str = caFormat.FormatName(type, true); + typeStringCache.Add(type, str); + return str; + } + } +} diff --git a/src/coreclr/src/tools/dotnet-pgo/README.md b/src/coreclr/src/tools/dotnet-pgo/README.md new file mode 100644 index 0000000000000..60b7101ad0dd1 --- /dev/null +++ b/src/coreclr/src/tools/dotnet-pgo/README.md @@ -0,0 +1,50 @@ +dotnet-pgo tool +=========================== + +This directory contains the complete source code for the experimental dotnet-pgo tool and associated libraries + +This tool is used to produce .jittrace files which are used to time-shift JIT compilation from later in the process to earlier in the process, or to produce .mibc files which are used as input to the crossgen2 tool. + +The general notion is to collect a trace of the application timeframe which is to be optimized using either dotnet trace, or perfview. Then run the dotnet-pgo tool to post-process the trace data into a profile data file that can be consumed by either the application at runtiome (.jittrace file) or the AOT crossgen2 compiler (.mibc file). + +## Building the dotnet-pgo tool +Run dotnet build from the root directory + +## Consuming a .jittrace file +Copy the code in the Microsoft.Diagnostics.JitTrace directory into the application, and then follow the following steps. + +``` +using Microsoft.Diagnostics.JitTrace +... +static void FunctionCalledAtStartup() +{ + JitTraceRuntime.Prepare(@"Somefile.jittrace"); +} +``` + +## Consuming a .mibc file +Invoke the `crossgen2` tool with the --mibc option, passing the .mibc file created by dotnet-pgo. + +## Example tracing commands used to generate the input to this tool: +Note, this tool requires MethodDetails events which are produced by the .NET 5.0 runtime, or by modifying the .NET Core 3 runtime to produce the event. + +- Capture events from process 73060 where we capture both JIT and R2R events using EventPipe tracing +``` +"dotnet trace collect -p 73060 --providers Microsoft-Windows-DotNETRuntime:0x6000080018:5" +``` + +- Capture events from process 73060 where we capture only JIT events using EventPipe tracing +``` +"dotnet trace collect -p 73060 --providers Microsoft-Windows-DotNETRuntime:0x4000080018:5" +``` + +- Capture Jit and R2R events via perfview of all processes running using ETW tracing +``` +"perfview collect -LogFile:logOfCollection.txt -DataFile:jittrace.etl -Zip:false -merge:false -providers:Microsoft-Windows-DotNETRuntime:0x6000080018:5" +``` + +## Example commands for using dotnet-pgo +- Given the etlfile jittrace.etl produced from perfview collect, create a matching jittrace file named jitdata.jittrace based on the data in the process named jittracetest that ran during collection of the etw data. While processing, print out all of the events processed, and warnings for methods which could not be processed. +``` +H:\git\jittrace\src\Tools\dotnet-pgo\bin\Debug\netcoreapp3.0\dotnet-pgo.exe --trace-file jittracewithlog.etl --process-name jittracetest --output-file-name withlog.jittrace --pgo-file-type jittrace --display-processed-events true +``` diff --git a/src/coreclr/src/tools/dotnet-pgo/TraceRuntimeDescToTypeSystemDesc.cs b/src/coreclr/src/tools/dotnet-pgo/TraceRuntimeDescToTypeSystemDesc.cs new file mode 100644 index 0000000000000..f259d45bdab37 --- /dev/null +++ b/src/coreclr/src/tools/dotnet-pgo/TraceRuntimeDescToTypeSystemDesc.cs @@ -0,0 +1,514 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using Microsoft.Diagnostics.Tracing.Etlx; +using System; +using System.Collections.Generic; +using System.Diagnostics; +using Internal.TypeSystem; +using Internal.TypeSystem.Ecma; +using Microsoft.Diagnostics.Tracing.Parsers.Clr; +using System.Reflection.Metadata; +using System.Reflection.Metadata.Ecma335; +using System.Reflection; +using System.IO; +using System.Text; + +namespace Microsoft.Diagnostics.Tools.Pgo +{ + class TraceRuntimeDescToTypeSystemDesc + { + TraceProcess _traceProcess; + TypeSystemContext _context; + int _clrInstanceID; + + struct TraceMethodData + { + public TraceMethodData(long loaderModuleID, long typeID, int methodToken, long[] typeParameters) + { + LoaderModuleID = loaderModuleID; + MethodToken = methodToken; + TypeParameters = typeParameters; + TypeID = typeID; + } + + + public readonly long LoaderModuleID; + public readonly int MethodToken; + public readonly long[] TypeParameters; + public readonly long TypeID; + } + + class MethodDescInfo + { + public MethodDescInfo(long id, TraceMethodData methodIDDetailsTraceData) + { + ID = id; + MethodDetailsTraceData = methodIDDetailsTraceData; + } + + public readonly long ID; + public MethodDesc Method; + public readonly TraceMethodData MethodDetailsTraceData; + } + + struct TraceTypeData + { + public TraceTypeData(long moduleID, int typeNameID, Microsoft.Diagnostics.Tracing.Parsers.Clr.TypeFlags flags, byte corElementType, long[] typeParameters, string name) + { + ModuleID = moduleID; + TypeNameID = typeNameID; + Flags = flags; + CorElementType = corElementType; + TypeParameters = typeParameters; + Name = name; + } + + public readonly long ModuleID; + public readonly int TypeNameID; + public readonly Microsoft.Diagnostics.Tracing.Parsers.Clr.TypeFlags Flags; + public readonly byte CorElementType; + public readonly long[] TypeParameters; + public readonly string Name; + } + + class TypeHandleInfo + { + public TypeHandleInfo(long id, TraceTypeData traceData) + { + ID = id; + TypeValue = traceData; + } + + public readonly long ID; + public TypeDesc Type; + public readonly TraceTypeData TypeValue; + } + + class ModuleDescInfo + { + public ModuleDescInfo(long id, TraceManagedModule traceManagedModule) + { + ID = id; + TraceManagedModule = traceManagedModule; + } + + public readonly long ID; + public ModuleDesc Module; + public readonly TraceManagedModule TraceManagedModule; + } + + private readonly Dictionary _methods = new Dictionary(); + private readonly Dictionary _types = new Dictionary(); + private readonly Dictionary _modules = new Dictionary(); + private readonly object _lock = new object(); + + public TraceRuntimeDescToTypeSystemDesc(TraceProcess traceProcess, TypeSystemContext context, int clrInstanceID) + { + _traceProcess = traceProcess; + _context = context; + _clrInstanceID = clrInstanceID; + + foreach (var methodIDDetailsData in traceProcess.EventsInProcess.ByEventType()) + { + + MethodDescInfo currentInfo; + if (_methods.TryGetValue(methodIDDetailsData.MethodID, out currentInfo)) + { + if (currentInfo.MethodDetailsTraceData.LoaderModuleID != methodIDDetailsData.LoaderModuleID) + throw new Exception("Re-use of MethodID with different data. Unload scenario?)"); + if (currentInfo.MethodDetailsTraceData.MethodToken != methodIDDetailsData.MethodToken) + throw new Exception("Re-use of MethodID with different data. Unload scenario?)"); + if (currentInfo.MethodDetailsTraceData.TypeID != methodIDDetailsData.TypeID) + throw new Exception("Re-use of MethodID with different data. Unload scenario?)"); + if (currentInfo.MethodDetailsTraceData.TypeParameters.Length != methodIDDetailsData.TypeParameterCount) + throw new Exception("Re-use of MethodID with different data. Unload scenario?)"); + for (int ix = 0; ix < methodIDDetailsData.TypeParameterCount; ix++) + { + if (currentInfo.MethodDetailsTraceData.TypeParameters[ix] != (long)methodIDDetailsData.TypeParameters(ix)) + throw new Exception("Re-use of MethodID with different data. Unload scenario?)"); + } + continue; + } + + long[] typeParameters = Array.Empty(); + if (methodIDDetailsData.TypeParameterCount != 0) + { + typeParameters = new long[methodIDDetailsData.TypeParameterCount]; + for (int ix = 0; ix < typeParameters.Length; ix++) + { + typeParameters[ix] = (long)methodIDDetailsData.TypeParameters(ix); + } + } + else + { + typeParameters = Array.Empty(); + } + + TraceMethodData traceMethodData = new TraceMethodData(typeID: (long)methodIDDetailsData.TypeID, + loaderModuleID: methodIDDetailsData.LoaderModuleID, + methodToken: methodIDDetailsData.MethodToken, + typeParameters: typeParameters); + + currentInfo = new MethodDescInfo(methodIDDetailsData.MethodID, traceMethodData); + _methods.Add(methodIDDetailsData.MethodID, currentInfo); + } + + foreach (var bulkTypeTrace in traceProcess.EventsInProcess.ByEventType()) + { + if (bulkTypeTrace.ClrInstanceID != _clrInstanceID) + continue; + + for (int i = 0; i < bulkTypeTrace.Count; i++) + { + TypeHandleInfo currentInfo; + var typeTrace = bulkTypeTrace.Values(i); + + if (_types.TryGetValue((long)typeTrace.TypeID, out currentInfo)) + { + if (currentInfo.TypeValue.ModuleID != (long)typeTrace.ModuleID) + throw new Exception("Re-use of TypeID with different data. Unload scenario?)"); + if (currentInfo.TypeValue.TypeNameID != typeTrace.TypeNameID) + throw new Exception("Re-use of TypeID with different data. Unload scenario?)"); + if (currentInfo.TypeValue.Flags != typeTrace.Flags) + throw new Exception("Re-use of TypeID with different data. Unload scenario?)"); + if (currentInfo.TypeValue.CorElementType != typeTrace.CorElementType) + throw new Exception("Re-use of TypeID with different data. Unload scenario?)"); + if (currentInfo.TypeValue.TypeParameters.Length != typeTrace.TypeParameterCount) + throw new Exception("Re-use of TypeID with different data. Unload scenario?)"); + + for (int ix = 0; ix < typeTrace.TypeParameterCount; ix++) + { + if (currentInfo.TypeValue.TypeParameters[ix] != (long)typeTrace.TypeParameterID(ix)) + throw new Exception("Re-use of TypeID with different data. Unload scenario?)"); + } + continue; + } + + long[] typeParameters = Array.Empty(); + if (typeTrace.TypeParameterCount != 0) + { + typeParameters = new long[typeTrace.TypeParameterCount]; + for (int ix = 0; ix < typeParameters.Length; ix++) + { + typeParameters[ix] = (long)typeTrace.TypeParameterID(ix); + } + } + else + { + typeParameters = Array.Empty(); + } + TraceTypeData traceTypeData = new TraceTypeData(moduleID: (long)typeTrace.ModuleID, + typeNameID: typeTrace.TypeNameID, + flags: typeTrace.Flags, + corElementType: typeTrace.CorElementType, + typeParameters: typeParameters, + name: typeTrace.TypeName); + + currentInfo = new TypeHandleInfo((long)typeTrace.TypeID, traceTypeData); + _types.Add((long)typeTrace.TypeID, currentInfo); + } + } + + Dictionary assemblyToCLRInstanceIDMap = new Dictionary(); + foreach (var assemblyLoadTrace in _traceProcess.EventsInProcess.ByEventType()) + { + assemblyToCLRInstanceIDMap[assemblyLoadTrace.AssemblyID] = assemblyLoadTrace.ClrInstanceID; + } + + foreach (var moduleFile in _traceProcess.LoadedModules) + { + if (moduleFile is TraceManagedModule) + { + var managedModule = moduleFile as TraceManagedModule; + + int clrInstanceIDModule; + if (!assemblyToCLRInstanceIDMap.TryGetValue(managedModule.AssemblyID, out clrInstanceIDModule)) + continue; + + if (clrInstanceIDModule != _clrInstanceID) + continue; + + if (managedModule.ModuleFile != null) + { + ModuleDescInfo currentInfo; + if (_modules.TryGetValue(managedModule.ModuleID, out currentInfo)) + { + continue; + } + currentInfo = new ModuleDescInfo(managedModule.ModuleID, managedModule); + _modules.Add(managedModule.ModuleID, currentInfo); + } + } + } + } + + public ModuleDesc ResolveModuleID(long handle, bool throwIfNotFound = true) + { + lock (_lock) + { + ModuleDescInfo minfo; + if (_modules.TryGetValue(handle, out minfo)) + { + if (minfo.Module != null) + return minfo.Module; + + string simpleName = minfo.TraceManagedModule.Name; + + if (!File.Exists(minfo.TraceManagedModule.FilePath) && minfo.TraceManagedModule.FilePath.EndsWith(".il.dll") && simpleName.EndsWith(".il")) + { + simpleName = simpleName.Substring(0, simpleName.Length - 3); + } + + minfo.Module = _context.ResolveAssembly(new AssemblyName(simpleName), throwIfNotFound); + return minfo.Module; + } + else + { + if (throwIfNotFound) + throw new Exception("Unknown ModuleID value"); + return null; + } + } + } + + public TypeDesc ResolveTypeHandle(long handle, bool throwIfNotFound = true) + { + lock(_lock) + { + TypeHandleInfo tinfo; + if (_types.TryGetValue(handle, out tinfo)) + { + if (tinfo.Type != null) + return tinfo.Type; + + if ((tinfo.TypeValue.Flags & Microsoft.Diagnostics.Tracing.Parsers.Clr.TypeFlags.Array) != 0) + { + if (tinfo.TypeValue.TypeParameters.Length != 1) + { + throw new Exception("Bad format for BulkType"); + } + + TypeDesc elementType = ResolveTypeHandle((long)tinfo.TypeValue.TypeParameters[0], throwIfNotFound); + if (elementType == null) + return null; + + if (tinfo.TypeValue.CorElementType == (byte)SignatureTypeCode.SZArray) + { + tinfo.Type = elementType.MakeArrayType(); + } + else + { + int rank = tinfo.TypeValue.Flags.GetArrayRank(); + tinfo.Type = elementType.MakeArrayType(rank); + } + } + else if (tinfo.TypeValue.CorElementType == (byte)SignatureTypeCode.ByReference) + { + if (tinfo.TypeValue.TypeParameters.Length != 1) + { + throw new Exception("Bad format for BulkType"); + } + + TypeDesc elementType = ResolveTypeHandle((long)tinfo.TypeValue.TypeParameters[0], throwIfNotFound); + if (elementType == null) + return null; + + tinfo.Type = elementType.MakeByRefType(); + } + else if (tinfo.TypeValue.CorElementType == (byte)SignatureTypeCode.Pointer) + { + if (tinfo.TypeValue.TypeParameters.Length != 1) + { + throw new Exception("Bad format for BulkType"); + } + + TypeDesc elementType = ResolveTypeHandle((long)tinfo.TypeValue.TypeParameters[0], throwIfNotFound); + if (elementType == null) + return null; + + tinfo.Type = elementType.MakePointerType(); + } + else if (tinfo.TypeValue.CorElementType == (byte)SignatureTypeCode.FunctionPointer) + { + tinfo.Type = null; + } + else + { + // Must be class type or instantiated type. + ModuleDesc module = ResolveModuleID((long)tinfo.TypeValue.ModuleID, throwIfNotFound); + if (module == null) + return null; + + EcmaModule ecmaModule = module as EcmaModule; + if (ecmaModule == null) + { + if (throwIfNotFound) + throw new Exception($"Unable to resolve module for {handle:8x}"); + return null; + } + + if ((tinfo.TypeValue.TypeNameID & 0xFF000000) != 0x02000000) + { + throw new Exception($"Invalid typedef {tinfo.TypeValue.TypeNameID:4x}"); + } + + TypeDefinitionHandle typedef = MetadataTokens.TypeDefinitionHandle(tinfo.TypeValue.TypeNameID & 0xFFFFFF); + MetadataType uninstantiatedType = (MetadataType)ecmaModule.GetType(typedef); + // Instantiate the type if requested + if ((tinfo.TypeValue.TypeParameters.Length != 0) && uninstantiatedType != null) + { + if (uninstantiatedType.Instantiation.Length != tinfo.TypeValue.TypeParameters.Length) + { + throw new Exception($"Invalid TypeParameterCount {tinfo.TypeValue.TypeParameters.Length} expected {uninstantiatedType.Instantiation.Length} as needed by '{uninstantiatedType}'"); + } + + TypeDesc[] instantiation = new TypeDesc[tinfo.TypeValue.TypeParameters.Length]; + for (int i = 0; i < instantiation.Length; i++) + { + instantiation[i] = ResolveTypeHandle((long)tinfo.TypeValue.TypeParameters[i], throwIfNotFound); + if (instantiation[i] == null) + return null; + } + tinfo.Type = uninstantiatedType.Context.GetInstantiatedType(uninstantiatedType, new Instantiation(instantiation)); + } + else + { + if ((uninstantiatedType.Name == "__Canon") && uninstantiatedType.Namespace == "System" && (uninstantiatedType.Module == uninstantiatedType.Context.SystemModule)) + { + tinfo.Type = uninstantiatedType.Context.CanonType; + } + else + { + tinfo.Type = uninstantiatedType; + } + } + } + if (tinfo.Type == null) + { + if (throwIfNotFound) + throw new Exception("Unknown typeHandle value"); + return null; + } + return tinfo.Type; + } + else + { + if (throwIfNotFound) + throw new Exception("Unknown typeHandle value"); + return null; + } + } + } + + public MethodDesc ResolveMethodID(long handle, bool throwIfNotFound = true) + { + lock (_lock) + { + MethodDescInfo minfo; + if (_methods.TryGetValue(handle, out minfo)) + { + if (minfo.Method != null) + return minfo.Method; + + TypeDesc owningType = ResolveTypeHandle(minfo.MethodDetailsTraceData.TypeID, throwIfNotFound); + if (owningType == null) + return null; + + MetadataType owningMDType = owningType as MetadataType; + if (owningMDType == null) + throw new Exception("Method not parented by MetadataType"); + + if ((minfo.MethodDetailsTraceData.MethodToken & 0xFF000000) != 0x06000000) + { + throw new Exception($"Invalid methoddef {minfo.MethodDetailsTraceData.MethodToken:4x}"); + } + + MethodDefinitionHandle methoddef = MetadataTokens.MethodDefinitionHandle(minfo.MethodDetailsTraceData.MethodToken & 0xFFFFFF); + + MethodDesc uninstantiatedMethod = null; + foreach (MethodDesc m in owningMDType.GetMethods()) + { + EcmaMethod ecmaMeth = m.GetTypicalMethodDefinition() as EcmaMethod; + if (ecmaMeth == null) + { + continue; + } + + if (ecmaMeth.Handle == methoddef) + { + uninstantiatedMethod = m; + break; + } + } + + if (uninstantiatedMethod == null) + { + if (throwIfNotFound) + { + EcmaType ecmaType = owningMDType.GetTypeDefinition() as EcmaType; + + throw new Exception($"Unknown MethodID value finding MethodDef {minfo.MethodDetailsTraceData.MethodToken:x} on type {owningMDType} from module {ecmaType.Module.Assembly.GetName().Name}"); + } + return null; + } + + // Instantiate the type if requested + if (minfo.MethodDetailsTraceData.TypeParameters.Length != 0) + { + if (uninstantiatedMethod.Instantiation.Length != minfo.MethodDetailsTraceData.TypeParameters.Length) + { + throw new Exception($"Invalid TypeParameterCount {minfo.MethodDetailsTraceData.TypeParameters.Length} expected {uninstantiatedMethod.Instantiation.Length} as needed by '{uninstantiatedMethod}'"); + } + + TypeDesc[] instantiation = new TypeDesc[minfo.MethodDetailsTraceData.TypeParameters.Length]; + for (int i = 0; i < instantiation.Length; i++) + { + instantiation[i] = ResolveTypeHandle((long)minfo.MethodDetailsTraceData.TypeParameters[i], throwIfNotFound); + if (instantiation[i] == null) + return null; + } + + minfo.Method = _context.GetInstantiatedMethod(uninstantiatedMethod, new Instantiation(instantiation)); + + if (minfo.Method == null) + { + if (throwIfNotFound) + { + StringBuilder s = new StringBuilder(); + foreach (TypeDesc type in instantiation) + { + if (s.Length != 0) + s.Append(','); + s.Append(type); + } + throw new Exception("Unable to instantiate {uninstantiatedMethod} over <{s}>"); + } + return null; + } + } + else + { + minfo.Method = uninstantiatedMethod; + } + + if (minfo.Method == null) + { + if (throwIfNotFound) + throw new Exception("Unknown MethodID value"); + return null; + } + + return minfo.Method; + } + else + { + if (throwIfNotFound) + throw new Exception("Unknown MethodID value"); + return null; + } + } + } + } +} diff --git a/src/coreclr/src/tools/dotnet-pgo/TraceTypeSystemContext.cs b/src/coreclr/src/tools/dotnet-pgo/TraceTypeSystemContext.cs new file mode 100644 index 0000000000000..fc8078b3e5a35 --- /dev/null +++ b/src/coreclr/src/tools/dotnet-pgo/TraceTypeSystemContext.cs @@ -0,0 +1,363 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; +using System.Diagnostics; +using System.Reflection.PortableExecutable; +using System.Collections.Generic; +using System.Collections.Immutable; +using System.Text; +using Internal.TypeSystem; +using Internal.TypeSystem.Ecma; +using Microsoft.Diagnostics.Tracing.Etlx; +using System.IO; +using System.IO.MemoryMappedFiles; + +using Microsoft.Diagnostics.Tracing.Parsers.Clr; +using System.Reflection.Metadata; + +namespace Microsoft.Diagnostics.Tools.Pgo +{ + class TraceTypeSystemContext : MetadataTypeSystemContext, IMetadataStringDecoderProvider + { + private readonly PgoTraceProcess _pgoTraceProcess; + private readonly ModuleLoadLogger _moduleLoadLogger; + private int _clrInstanceID; + + public TraceTypeSystemContext(PgoTraceProcess traceProcess, int clrInstanceID, Logger logger) + { + _pgoTraceProcess = traceProcess; + _clrInstanceID = clrInstanceID; + _moduleLoadLogger = new ModuleLoadLogger(logger); + } + + public bool Initialize() + { + ModuleDesc systemModule = GetModuleForSimpleName("System.Private.CoreLib", false); + if (systemModule == null) + return false; + SetSystemModule(systemModule); + return true; + } + + public override bool SupportsCanon => true; + + public override bool SupportsUniversalCanon => false; + + private class ModuleData + { + public string SimpleName; + public string FilePath; + + public EcmaModule Module; + public MemoryMappedViewAccessor MappedViewAccessor; + } + + private class ModuleHashtable : LockFreeReaderHashtable + { + protected override int GetKeyHashCode(EcmaModule key) + { + return key.GetHashCode(); + } + protected override int GetValueHashCode(ModuleData value) + { + return value.Module.GetHashCode(); + } + protected override bool CompareKeyToValue(EcmaModule key, ModuleData value) + { + return Object.ReferenceEquals(key, value.Module); + } + protected override bool CompareValueToValue(ModuleData value1, ModuleData value2) + { + return Object.ReferenceEquals(value1.Module, value2.Module); + } + protected override ModuleData CreateValueFromKey(EcmaModule key) + { + Debug.Fail("CreateValueFromKey not supported"); + return null; + } + } + private readonly ModuleHashtable _moduleHashtable = new ModuleHashtable(); + + private class SimpleNameHashtable : LockFreeReaderHashtable + { + private readonly StringComparer _comparer = StringComparer.OrdinalIgnoreCase; + + protected override int GetKeyHashCode(string key) + { + return _comparer.GetHashCode(key); + } + protected override int GetValueHashCode(ModuleData value) + { + return _comparer.GetHashCode(value.SimpleName); + } + protected override bool CompareKeyToValue(string key, ModuleData value) + { + return _comparer.Equals(key, value.SimpleName); + } + protected override bool CompareValueToValue(ModuleData value1, ModuleData value2) + { + return _comparer.Equals(value1.SimpleName, value2.SimpleName); + } + protected override ModuleData CreateValueFromKey(string key) + { + Debug.Fail("CreateValueFromKey not supported"); + return null; + } + } + private readonly SimpleNameHashtable _simpleNameHashtable = new SimpleNameHashtable(); + + public override ModuleDesc ResolveAssembly(System.Reflection.AssemblyName name, bool throwIfNotFound) + { + // TODO: catch typesystem BadImageFormatException and throw a new one that also captures the + // assembly name that caused the failure. (Along with the reason, which makes this rather annoying). + return GetModuleForSimpleName(name.Name, throwIfNotFound); + } + + public ModuleDesc GetModuleForSimpleName(string simpleName, bool throwIfNotFound = true) + { + ModuleData existing; + if (_simpleNameHashtable.TryGetValue(simpleName, out existing)) + return existing.Module; + + string filePath = null; + + foreach (var module in _pgoTraceProcess.EnumerateLoadedManagedModules()) + { + var managedModule = module.ManagedModule; + + if (module.ClrInstanceID != _clrInstanceID) + continue; + + if (PgoTraceProcess.CompareModuleAgainstSimpleName(simpleName, managedModule)) + { + filePath = PgoTraceProcess.ComputeFilePathOnDiskForModule(managedModule); + break; + } + } + + if (filePath == null) + { + // TODO: the exception is wrong for two reasons: for one, this should be assembly full name, not simple name. + // The other reason is that on CoreCLR, the exception also captures the reason. We should be passing two + // string IDs. This makes this rather annoying. + + _moduleLoadLogger.LogModuleLoadFailure(simpleName); + + if (throwIfNotFound) + ThrowHelper.ThrowFileNotFoundException(ExceptionStringID.FileLoadErrorGeneric, simpleName); + + return null; + } + + bool succeededOrReportedError = false; + try + { + ModuleDesc returnValue = AddModule(filePath, simpleName, null, true); + _moduleLoadLogger.LogModuleLoadSuccess(simpleName, filePath); + succeededOrReportedError = true; + return returnValue; + } + catch (Exception) when (!throwIfNotFound) + { + _moduleLoadLogger.LogModuleLoadFailure(simpleName, filePath); + succeededOrReportedError = true; + return null; + } + finally + { + if (!succeededOrReportedError) + { + _moduleLoadLogger.LogModuleLoadFailure(simpleName, filePath); + } + } + } + + public EcmaModule GetModuleFromPath(string filePath) + { + return GetOrAddModuleFromPath(filePath, null, true); + } + + public EcmaModule GetMetadataOnlyModuleFromPath(string filePath) + { + return GetOrAddModuleFromPath(filePath, null, false); + } + + public EcmaModule GetMetadataOnlyModuleFromMemory(string filePath, byte[] moduleData) + { + return GetOrAddModuleFromPath(filePath, moduleData, false); + } + + private EcmaModule GetOrAddModuleFromPath(string filePath, byte[] moduleData, bool useForBinding) + { + // This method is not expected to be called frequently. Linear search is acceptable. + foreach (var entry in ModuleHashtable.Enumerator.Get(_moduleHashtable)) + { + if (entry.FilePath == filePath) + return entry.Module; + } + + bool succeeded = false; + try + { + EcmaModule returnValue = AddModule(filePath, null, moduleData, useForBinding); + _moduleLoadLogger.LogModuleLoadSuccess(returnValue.Assembly.GetName().Name, filePath); + succeeded = true; + return returnValue; + } + finally + { + if (!succeeded) + { + _moduleLoadLogger.LogModuleLoadFailure(Path.GetFileNameWithoutExtension(filePath), filePath); + } + } + } + + public static unsafe PEReader OpenPEFile(string filePath, byte[] moduleBytes, out MemoryMappedViewAccessor mappedViewAccessor) + { + // If moduleBytes is specified create PEReader from the in memory array, not from a file on disk + if (moduleBytes != null) + { + var peReader = new PEReader(ImmutableArray.Create(moduleBytes)); + mappedViewAccessor = null; + return peReader; + } + + // System.Reflection.Metadata has heuristic that tries to save virtual address space. This heuristic does not work + // well for us since it can make IL access very slow (call to OS for each method IL query). We will map the file + // ourselves to get the desired performance characteristics reliably. + + FileStream fileStream = null; + MemoryMappedFile mappedFile = null; + MemoryMappedViewAccessor accessor = null; + try + { + // Create stream because CreateFromFile(string, ...) uses FileShare.None which is too strict + fileStream = new FileStream(filePath, FileMode.Open, FileAccess.Read, FileShare.Read, 4096, false); + mappedFile = MemoryMappedFile.CreateFromFile( + fileStream, null, fileStream.Length, MemoryMappedFileAccess.Read, HandleInheritability.None, true); + accessor = mappedFile.CreateViewAccessor(0, 0, MemoryMappedFileAccess.Read); + + var safeBuffer = accessor.SafeMemoryMappedViewHandle; + var peReader = new PEReader((byte*)safeBuffer.DangerousGetHandle(), (int)safeBuffer.ByteLength); + + // MemoryMappedFile does not need to be kept around. MemoryMappedViewAccessor is enough. + + mappedViewAccessor = accessor; + accessor = null; + + return peReader; + } + finally + { + if (accessor != null) + accessor.Dispose(); + if (mappedFile != null) + mappedFile.Dispose(); + if (fileStream != null) + fileStream.Dispose(); + } + } + + private EcmaModule AddModule(string filePath, string expectedSimpleName, byte[] moduleDataBytes, bool useForBinding) + { + MemoryMappedViewAccessor mappedViewAccessor = null; + PdbSymbolReader pdbReader = null; + try + { + PEReader peReader = OpenPEFile(filePath, moduleDataBytes, out mappedViewAccessor); + pdbReader = OpenAssociatedSymbolFile(filePath, peReader); + + EcmaModule module = EcmaModule.Create(this, peReader, containingAssembly: null, pdbReader); + + MetadataReader metadataReader = module.MetadataReader; + string simpleName = metadataReader.GetString(metadataReader.GetAssemblyDefinition().Name); + + ModuleData moduleData = new ModuleData() + { + SimpleName = simpleName, + FilePath = filePath, + Module = module, + MappedViewAccessor = mappedViewAccessor + }; + + lock (this) + { + if (useForBinding) + { + ModuleData actualModuleData = _simpleNameHashtable.AddOrGetExisting(moduleData); + if (actualModuleData != moduleData) + { + if (actualModuleData.FilePath != filePath) + throw new FileNotFoundException("Module with same simple name already exists " + filePath); + return actualModuleData.Module; + } + } + mappedViewAccessor = null; // Ownership has been transfered + pdbReader = null; // Ownership has been transferred + + _moduleHashtable.AddOrGetExisting(moduleData); + } + + return module; + } + finally + { + if (mappedViewAccessor != null) + mappedViewAccessor.Dispose(); + if (pdbReader != null) + pdbReader.Dispose(); + } + } + + + // + // Symbols + // + + private PdbSymbolReader OpenAssociatedSymbolFile(string peFilePath, PEReader peReader) + { + // Assume that the .pdb file is next to the binary + var pdbFilename = Path.ChangeExtension(peFilePath, ".pdb"); + + if (!File.Exists(pdbFilename)) + { + pdbFilename = null; + + // If the file doesn't exist, try the path specified in the CodeView section of the image + foreach (DebugDirectoryEntry debugEntry in peReader.ReadDebugDirectory()) + { + if (debugEntry.Type != DebugDirectoryEntryType.CodeView) + continue; + + string candidateFileName = peReader.ReadCodeViewDebugDirectoryData(debugEntry).Path; + if (Path.IsPathRooted(candidateFileName) && File.Exists(candidateFileName)) + { + pdbFilename = candidateFileName; + break; + } + } + + if (pdbFilename == null) + return null; + } + + // Try to open the symbol file as portable pdb first + PdbSymbolReader reader = PortablePdbSymbolReader.TryOpen(pdbFilename, GetMetadataStringDecoder()); + + return reader; + } + + + private MetadataStringDecoder _metadataStringDecoder; + + public MetadataStringDecoder GetMetadataStringDecoder() + { + if (_metadataStringDecoder == null) + _metadataStringDecoder = new CachingMetadataStringDecoder(0x10000); // TODO: Tune the size + return _metadataStringDecoder; + } + } +} diff --git a/src/coreclr/src/tools/dotnet-pgo/TypeSystemMetadataEmitter.cs b/src/coreclr/src/tools/dotnet-pgo/TypeSystemMetadataEmitter.cs new file mode 100644 index 0000000000000..f6fdc3839c627 --- /dev/null +++ b/src/coreclr/src/tools/dotnet-pgo/TypeSystemMetadataEmitter.cs @@ -0,0 +1,375 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; +using System.Collections.Generic; +using System.Collections.Immutable; +using System.IO; +using System.Reflection; +using System.Reflection.Metadata; +using System.Reflection.Metadata.Ecma335; +using System.Reflection.PortableExecutable; +using Internal.TypeSystem; + +namespace Microsoft.Diagnostics.Tools.Pgo +{ + class TypeSystemMetadataEmitter + { + MetadataBuilder _metadataBuilder; + BlobBuilder _ilBuilder; + MethodBodyStreamEncoder _methodBodyStream; + Dictionary _assemblyRefs = new Dictionary(); + Dictionary _typeRefs = new Dictionary(); + Dictionary _methodRefs = new Dictionary(); + Blob _mvidFixup; + BlobHandle _noArgsVoidReturnStaticMethodSigHandle; + + public TypeSystemMetadataEmitter(AssemblyName assemblyName, TypeSystemContext context, AssemblyFlags flags = default(AssemblyFlags)) + { + _metadataBuilder = new MetadataBuilder(); + _ilBuilder = new BlobBuilder(); + _methodBodyStream = new MethodBodyStreamEncoder(_ilBuilder); + StringHandle assemblyNameHandle = _metadataBuilder.GetOrAddString(assemblyName.Name); + if (assemblyName.CultureName != null) + throw new ArgumentException("assemblyName"); + + if (assemblyName.GetPublicKeyToken() != null) + throw new ArgumentException("assemblyName"); + + var mvid = _metadataBuilder.ReserveGuid(); + _mvidFixup = mvid.Content; + + _metadataBuilder.AddModule(0, assemblyNameHandle, mvid.Handle, default(GuidHandle), default(GuidHandle)); + _metadataBuilder.AddAssembly(assemblyNameHandle, assemblyName.Version ?? new Version(0,0,0,0), default(StringHandle), default(BlobHandle), flags, AssemblyHashAlgorithm.None); + + var canonAssemblyNameHandle = _metadataBuilder.GetOrAddString("System.Private.Canon"); + var canonAssemblyRef = _metadataBuilder.AddAssemblyReference(canonAssemblyNameHandle, new Version(0, 0, 0, 0), default(StringHandle), default(BlobHandle), (AssemblyFlags)0, default(BlobHandle)); + var systemStringHandle = _metadataBuilder.GetOrAddString("System"); + var canonStringHandle = _metadataBuilder.GetOrAddString("__Canon"); + var canonTypeRef = _metadataBuilder.AddTypeReference(canonAssemblyRef, systemStringHandle, canonStringHandle); + _typeRefs.Add(context.CanonType, canonTypeRef); + + _metadataBuilder.AddTypeDefinition( + default(TypeAttributes), + default(StringHandle), + _metadataBuilder.GetOrAddString(""), + baseType: default(EntityHandle), + fieldList: MetadataTokens.FieldDefinitionHandle(1), + methodList: MetadataTokens.MethodDefinitionHandle(1)); + + BlobBuilder noArgsNoReturnStaticMethodSig = new BlobBuilder(); + BlobEncoder signatureEncoder = new BlobEncoder(noArgsNoReturnStaticMethodSig); + + signatureEncoder.MethodSignature(SignatureCallingConvention.Default, 0, false); + noArgsNoReturnStaticMethodSig.WriteCompressedInteger(0); + noArgsNoReturnStaticMethodSig.WriteByte((byte)SignatureTypeCode.Void); + _noArgsVoidReturnStaticMethodSigHandle = _metadataBuilder.GetOrAddBlob(noArgsNoReturnStaticMethodSig); + } + + public MethodDefinitionHandle AddGlobalMethod(string name, InstructionEncoder il, int maxStack) + { + int methodILOffset = _methodBodyStream.AddMethodBody(il, maxStack); + return _metadataBuilder.AddMethodDefinition(MethodAttributes.Public | MethodAttributes.Static, + MethodImplAttributes.IL, _metadataBuilder.GetOrAddString(name), + _noArgsVoidReturnStaticMethodSigHandle, + methodILOffset, + default(ParameterHandle)); + } + + private static readonly Guid s_guid = new Guid("97F4DBD4-F6D1-4FAD-91B3-1001F92068E5"); + private static readonly BlobContentId s_contentId = new BlobContentId(s_guid, 0x04030201); + + public void SerializeToStream(Stream peStream) + { + var peHeaderBuilder = new PEHeaderBuilder(); + var peBuilder = new ManagedPEBuilder(peHeaderBuilder, new MetadataRootBuilder(_metadataBuilder), _ilBuilder, + deterministicIdProvider: content => s_contentId); + + var peBlob = new BlobBuilder(); + var contentId = peBuilder.Serialize(peBlob); + new BlobWriter(_mvidFixup).WriteGuid(contentId.Guid); + peBlob.WriteContentTo(peStream); + } + + public AssemblyReferenceHandle GetAssemblyRef(IAssemblyDesc assemblyDesc) + { + if (_assemblyRefs.TryGetValue(assemblyDesc, out var handle)) + { + return handle; + } + AssemblyName name = assemblyDesc.GetName(); + StringHandle assemblyName = _metadataBuilder.GetOrAddString(name.Name); + StringHandle cultureName = (name.CultureName != null) ? _metadataBuilder.GetOrAddString(name.CultureName) : default(StringHandle); + BlobHandle publicTokenBlob = name.GetPublicKeyToken() != null ? _metadataBuilder.GetOrAddBlob(name.GetPublicKeyToken()) : default(BlobHandle); + AssemblyFlags flags = default(AssemblyFlags); + if (name.Flags.HasFlag(AssemblyNameFlags.Retargetable)) + { + flags |= AssemblyFlags.Retargetable; + } + if (name.ContentType == AssemblyContentType.WindowsRuntime) + { + flags |= AssemblyFlags.WindowsRuntime; + } + + var referenceHandle = _metadataBuilder.AddAssemblyReference(assemblyName, name.Version, cultureName, publicTokenBlob, flags, default(BlobHandle)); + _assemblyRefs.Add(assemblyDesc, referenceHandle); + return referenceHandle; + } + + public EntityHandle GetTypeRef(MetadataType type) + { + if (_typeRefs.TryGetValue(type, out var handle)) + { + return handle; + } + + if (type.IsParameterizedType) + { + throw new ArgumentException("type"); + } + else if (type.IsFunctionPointer) + { + throw new ArgumentException("type"); + } + + EntityHandle typeHandle; + + if (type.IsTypeDefinition) + { + // Make a typeref + StringHandle typeName = _metadataBuilder.GetOrAddString(type.Name); + StringHandle typeNamespace = type.Namespace != null ? _metadataBuilder.GetOrAddString(type.Namespace) : default(StringHandle); + EntityHandle resolutionScope; + + if (type.ContainingType == null) + { + // non-nested type + resolutionScope = GetAssemblyRef(type.Module.Assembly); + } + else + { + // nested type + resolutionScope = GetTypeRef((MetadataType)type.ContainingType); + } + + typeHandle = _metadataBuilder.AddTypeReference(resolutionScope, typeNamespace, typeName); + } + else + { + var typeSpecSignature = new BlobBuilder(); + EncodeType(typeSpecSignature, type); + var blobSigHandle = _metadataBuilder.GetOrAddBlob(typeSpecSignature); + typeHandle = _metadataBuilder.AddTypeSpecification(blobSigHandle); + } + + _typeRefs.Add(type, typeHandle); + return typeHandle; + } + + public EntityHandle GetMethodRef(MethodDesc method) + { + if (_methodRefs.TryGetValue(method, out var handle)) + { + return handle; + } + + EntityHandle methodHandle; + + if (method.HasInstantiation && (method.GetMethodDefinition() != method)) + { + EntityHandle uninstantiatedHandle = GetMethodRef(method.GetMethodDefinition()); + BlobBuilder methodSpecSig = new BlobBuilder(); + BlobEncoder methodSpecEncoder = new BlobEncoder(methodSpecSig); + methodSpecEncoder.MethodSpecificationSignature(method.Instantiation.Length); + foreach (var type in method.Instantiation) + EncodeType(methodSpecSig, type); + + var methodSpecSigHandle = _metadataBuilder.GetOrAddBlob(methodSpecSig); + methodHandle = _metadataBuilder.AddMethodSpecification(uninstantiatedHandle, methodSpecSigHandle); + } + else + { + EntityHandle typeHandle = GetTypeRef((MetadataType)method.OwningType); + StringHandle methodName = _metadataBuilder.GetOrAddString(method.Name); + var sig = method.GetTypicalMethodDefinition().Signature; + + BlobBuilder memberRefSig = new BlobBuilder(); + EncodeMethodSignature(memberRefSig, sig); + + var sigBlob = _metadataBuilder.GetOrAddBlob(memberRefSig); + methodHandle = _metadataBuilder.AddMemberReference(typeHandle, methodName, sigBlob); + } + + _methodRefs.Add(method, methodHandle); + return methodHandle; + } + + private void EncodeType(BlobBuilder blobBuilder, TypeDesc type) + { + if (type.IsPrimitive) + { + SignatureTypeCode primitiveCode; + switch (type.Category) + { + case TypeFlags.Void: + primitiveCode = SignatureTypeCode.Void; + break; + case TypeFlags.Boolean: + primitiveCode = SignatureTypeCode.Boolean; + break; + case TypeFlags.Char: + primitiveCode = SignatureTypeCode.Char; + break; + case TypeFlags.SByte: + primitiveCode = SignatureTypeCode.SByte; + break; + case TypeFlags.Byte: + primitiveCode = SignatureTypeCode.Byte; + break; + case TypeFlags.Int16: + primitiveCode = SignatureTypeCode.Int16; + break; + case TypeFlags.UInt16: + primitiveCode = SignatureTypeCode.UInt16; + break; + case TypeFlags.Int32: + primitiveCode = SignatureTypeCode.Int32; + break; + case TypeFlags.UInt32: + primitiveCode = SignatureTypeCode.UInt32; + break; + case TypeFlags.Int64: + primitiveCode = SignatureTypeCode.Int64; + break; + case TypeFlags.UInt64: + primitiveCode = SignatureTypeCode.UInt64; + break; + case TypeFlags.IntPtr: + primitiveCode = SignatureTypeCode.IntPtr; + break; + case TypeFlags.UIntPtr: + primitiveCode = SignatureTypeCode.UIntPtr; + break; + case TypeFlags.Single: + primitiveCode = SignatureTypeCode.Single; + break; + case TypeFlags.Double: + primitiveCode = SignatureTypeCode.Double; + break; + default: + throw new Exception("Unknown primitive type"); + } + + blobBuilder.WriteByte((byte)primitiveCode); + } + else if (type.IsSzArray) + { + blobBuilder.WriteByte((byte)SignatureTypeCode.SZArray); + EncodeType(blobBuilder, type.GetParameterType()); + } + else if (type.IsArray) + { + var arrayType = (ArrayType)type; + blobBuilder.WriteByte((byte)SignatureTypeCode.Array); + EncodeType(blobBuilder, type.GetParameterType()); + var shapeEncoder = new ArrayShapeEncoder(blobBuilder); + // TODO Add support for non-standard array shapes + shapeEncoder.Shape(arrayType.Rank, default(ImmutableArray), default(ImmutableArray)); + } + else if (type.IsPointer) + { + blobBuilder.WriteByte((byte)SignatureTypeCode.Pointer); + EncodeType(blobBuilder, type.GetParameterType()); + } + else if (type.IsFunctionPointer) + { + FunctionPointerType fnptrType = (FunctionPointerType)type; + EncodeMethodSignature(blobBuilder, fnptrType.Signature); + } + else if (type.IsByRef) + { + blobBuilder.WriteByte((byte)SignatureTypeCode.ByReference); + EncodeType(blobBuilder, type.GetParameterType()); + } + else if (type.IsObject) + { + blobBuilder.WriteByte((byte)SignatureTypeCode.Object); + } + else if (type.IsString) + { + blobBuilder.WriteByte((byte)SignatureTypeCode.String); + } + else if (type.IsWellKnownType(WellKnownType.TypedReference)) + { + blobBuilder.WriteByte((byte)SignatureTypeCode.TypedReference); + } + else if (type.IsWellKnownType(WellKnownType.Void)) + { + blobBuilder.WriteByte((byte)SignatureTypeCode.Void); + } + else if (type is SignatureVariable) + { + SignatureVariable sigVar = (SignatureVariable)type; + SignatureTypeCode code = sigVar.IsMethodSignatureVariable ? SignatureTypeCode.GenericMethodParameter : SignatureTypeCode.GenericTypeParameter; + blobBuilder.WriteByte((byte)code); + blobBuilder.WriteCompressedInteger(sigVar.Index); + } + else if (type is InstantiatedType) + { + blobBuilder.WriteByte((byte)SignatureTypeCode.GenericTypeInstance); + EncodeType(blobBuilder, type.GetTypeDefinition()); + blobBuilder.WriteCompressedInteger(type.Instantiation.Length); + foreach (var instantiationArg in type.Instantiation) + EncodeType(blobBuilder, instantiationArg); + } + else if (type is MetadataType) + { + var metadataType = (MetadataType)type; + // Must be class or valuetype + blobBuilder.WriteByte(type.IsValueType ? (byte)SignatureTypeKind.ValueType : (byte)SignatureTypeKind.Class); + int codedIndex = CodedIndex.TypeDefOrRef(GetTypeRef(metadataType)); + blobBuilder.WriteCompressedInteger(codedIndex); + } + else + { + throw new Exception("Unexpected type"); + } + } + + void EncodeMethodSignature(BlobBuilder signatureBuilder, MethodSignature sig) + { + BlobEncoder signatureEncoder = new BlobEncoder(signatureBuilder); + int genericParameterCount = sig.GenericParameterCount; + bool isInstanceMethod = !sig.IsStatic; + SignatureCallingConvention sigCallingConvention = SignatureCallingConvention.Default; + switch (sig.Flags & MethodSignatureFlags.UnmanagedCallingConventionMask) + { + case MethodSignatureFlags.CallingConventionVarargs: + sigCallingConvention = SignatureCallingConvention.VarArgs; + break; + case MethodSignatureFlags.UnmanagedCallingConventionCdecl: + sigCallingConvention = SignatureCallingConvention.CDecl; + break; + case MethodSignatureFlags.UnmanagedCallingConventionStdCall: + sigCallingConvention = SignatureCallingConvention.StdCall; + break; + case MethodSignatureFlags.UnmanagedCallingConventionThisCall: + sigCallingConvention = SignatureCallingConvention.ThisCall; + break; + } + + signatureEncoder.MethodSignature(sigCallingConvention, genericParameterCount, isInstanceMethod); + signatureBuilder.WriteCompressedInteger(sig.Length); + // TODO Process custom modifiers in some way + EncodeType(signatureBuilder, sig.ReturnType); + for (int i = 0; i < sig.Length; i++) + EncodeType(signatureBuilder, sig[i]); + } + + public UserStringHandle GetUserStringHandle(string userString) + { + return _metadataBuilder.GetOrAddUserString(userString); + } + } +} diff --git a/src/coreclr/src/tools/dotnet-pgo/dotnet-pgo-experiment.md b/src/coreclr/src/tools/dotnet-pgo/dotnet-pgo-experiment.md new file mode 100644 index 0000000000000..c47d9d227b404 --- /dev/null +++ b/src/coreclr/src/tools/dotnet-pgo/dotnet-pgo-experiment.md @@ -0,0 +1,238 @@ +# Experiments towards a Profile Data pipeline for .NET +----- +The .NET Runtime has a long history of providing instrumentation based profile guided optimization +for use internally at Microsoft, and for scenarios involving extremely high value customers. To +this end the team built the IBC (instrumented block count) infrastructure into the runtime/ngen, +and IBCMerge as a tool for manipulating .ibc files. Over the last few years, the structure of these +technologies and tools has shown that they are not ideal for customer use or even internal use, and +a new tech would be valuable in this space. + +To that end, I have produced this proposal for providing a new trace based profile guided optimization +for .NET. The proposal aims to solve the major issues with our current pgo technology, and leverage +the investments that the .NET team has made over the last few years to make it possible. + +The design is based on the following principles. + +1. Consumption of pgo data for use in an application shall be as simple as adding a single line to +a csproj file or build script. +2. Storage format of pgo data shall be version resilient. +3. Export of data from the runtime using the standard dotnet tracing capabilities instead of a custom hand rolled scheme. +4. Pgo data shall be gathered at a scenario level instead of a per output file level +5. Production of pgo data shall utilize the normal build of the application +6. Pgo data comes in tiers. Not all scenarios will call for capture of all data +7. Any high volume Pgo data shall support sampling, such that the vast majority of applications can continue to execute with instrumentation enabled +8. The proposal shall be designed around the principles of flowing information between various RyuJit operations. Any references to specific forms of data such as block count, or type feedback are not truly topical for this document, but serve as examples of the sort of data that might be transited through from compile to compile. + +## Purpose of PGO +Profile guided optimization in .NET is used to provide benefits for 3 major concerns. + +1. Startup Time +2. Application Size on Disk +3. Application Throughput Performance + +Startup time for an application is primarily improved by avoiding the use of the JIT by ahead of time +compiling methods in the application. In addition a profile can allow determination of which methods +are hot vs cold, and group methods commonly used together with others. This has been the primary use +of pgo in .NET historically. + +Pgo is used to address size on disk concerns of R2R binaries where the default R2R strategy is too +aggressive and produces binaries that are excessively large. The idea in that case is to only generate +the functions specifically referenced in some profile instead of every method the heuristic indicates +may be interesting. + +Application throughput performance has historically been the primary use of pgo data for C++ compilers. +.NET has history with the use of instrumented per block counts, but this data is not generally processed +in an effective manner by the JIT. This proposal aims to revitalize efforts to make good use of profile +guided data to improve code quality. Over time, it is expected that not only will profile data be used at +build time, but that it will also be used to do runtime profile instrumentation. + +# Proposal Contents +Profile guided optimization is a combination of effort across a swath of components. + +1. Trace data format (For exporting instrumentation data from the runtime through the trace collection tool into the trace processing tool. +2. PGO file format (for storage of pgo data between instrumentation and Aot compilation) +3. ReadyToRun file format (some PGO data is relevant to a Tier 1 compilation such as block counts) + +And there are a series of components that need to be modified + +1. Instrumenting runtime (coreclr) +2. Instrumenting jit (clrjit) +3. Trace processing tool (dotnet-pgo) +4. AOT compilation tool (crossgen2) +6. Consuming runtime (coreclr) +7. Diagnostic tools (r2rdump, dotnet-pgo) + +## Conceptual model of `InstrumentationData` +`InstrumentationData` is information used to optimize code in a manner which cannot be determined +statically, and instead is determined through instrumentation of the code. The format of this data +is expected to be defined by the JIT team, and be specific to the probes inserted, and may very well +change over time. It is composed of two sections + +1. The descriptor used to describe the probes, this is fixed at JIT time, and describes the meaning of the data. +2. The data gathered as counts, and values that will be used to perform further optimization. + +Both of these data blocks are able to contain type and method data, where the concept is that it is +most likely useful to describe methods in the descriptor, and there are known cases such as TypeFeedback +where it would be useful to describe types in the data section (such as for devirtualization and such), +but there are also plausible cases for gathering each kind of data in both section, so the format will +be made general to support both. Instrumentation Data shall have a version number independent of the +general R2R versioning scheme. The intention is for this form of `InstrumentationData` to become +useable for both out of line instrumentation as described in this document, as well as only tiered +compilation rejit scenarios with in process profiling. + +## Trace data format +Runtime instrumentation will be accomplished through 4 events, 2 of which are already existing + +1. Which methods are used by a process is exposed through the combination of the `R2REntryPoint` event as well as the `JitStarted` event. These methods are in place today, and do not need augmentation +2. New events to report instrumentation data and counts. +- `InstrumentationDesc` shall consist of an 8 byte identifier for the instrumentation, a 4 byte instrumentation data format version number, followed by 3 byte arrays. + - The 8 byte identifier has no particular meaning other than to associate an InstrumentationDesc with followon InstrumentationData events. + - The first byte array or `DescShape` array contains 2 bits for each 8 byte sized region in the `Desc` array. A bit pattern of `00` indicates simple byte data, a `01` represents a `TypeHandle`, `10` represents a `MethodDesc`, and `11` is reserved for future use. + - The second byte array or `Desc` array describes the instrumentation point. The first 8 byte sized region of the data must be a `MethodDesc` or `TypeHandle`, and followon data is used to describe the other data associated with the Instrumentation. For instance, there may be a hash of the IL code so that a future compiler may identify if the IL code has changed from build to build, and there may be a description of the IL ranges associated with offsets into the `InstrumentationData` event. This is a general purpose format. This data must have a length that is aligned to 8 byte boundaries. + - The third byte array `DataShape` describes the shape of the actual instrumentation data. It follows the same rules as the `DescShape` array, but is used to describe the data to be captured in future `InstrumentationData` events. +- `InstrumentationData` shall consist of a 8 byte identifier that matches up with an identifier in the `InstrumentationDesc` as well as a byte array of captured instrumentation data. A new `InstrumentationData` event shall override the data for a previously generated event. This data must have a length that is aligned to 8 byte boundaries. +- Any type or method in data must be emitted via a `BulkType` or `MethodDetails` event before the instrumentation event is emitted. + +This format is intended to be a long-lived format which can be extended over time as necessary, but without needing to update the associated tooling and infrastructure. + +## MIBC File format +The PGO file format is designed with the goal of supporting the trace data format efficiently, and support + compilation which only needs to read a portion of the file at a time. The proposal is to produce a .NET PE + file and zip it into a container to reduce size on disk. The proposed file extension is .mibc. (Within + the container zip, the internal .NET PE file shall be named `SomeFileName.mibc.dll`. + +In the PE file there shall be at least one global function, and it shall have a name of `AssemblyDictionary`. This function shall have a series of il instructions of the form +``` +ldstr mibcGroupName +ldtoken mibcGroupMethod +pop +``` + +Where the presence of a `pop` instruction indicates that the group is complete, and a `ldstr` instruction +begins a group. Each group has a name of the format `;;;` +where there may be 0->Infinite other assemblies. As an example `System.Collections.Concurrent;` would be a +group where the contents are defined in `System.Collections.Concurrent` and where the components are not +defined in multiple assemblies. As a different example, `System.Private.CoreLib;System.Collections.Concurrent;System.Collections.Immutable;` +would be a different example, used for cases where the object being described is defined in `System.Private.CoreLib`, +but is reliant on definitions from the other assemblies as well. This would be the place to describe methods +of `List>`. Through this grouping mechanism, a compiler will +be able to ignore the entries in the profile data format which do not relate to the compilation, and thus +avoid wasting excess time parsing portions of the file which are not relevant to the compilation. + +In each `mibcGroupMethod` the data about an individual method/type entry is to be recorded. Similarly to the +`AssemblyDictionary` this is an encoding of il instructions designed to hold the relevant data. In this case +the data is encoded as follows. + +``` +ldtoken methodOrTypeInProfileData +Any series of instructions that does not involve the pop, ldstr "InstrumentationDesc", ldstr "InstrumentationData", or ldstr "InstrumentationEnd" instructions. +ldstr "FunctionTouchOrder" +ldc.i4 +ldstr "InstrumentationDesc" +ldc.i4 + A series of ldtoken and ldc.i8 instructions to represent the InstrumentationDesc. Only ldc.i8 instructions + may be used to represent raw bytes, more efficient encodings such as ldc.i4 followed by conv.i8 are not supported +ldstr "InstrumentationData" + A series of ldtoken and ldc.i8 instructions to represent the last InstrumentationData in the trace. Only ldc.i8 + instructions may be used to represent raw bytes, more efficient encodings such as ldc.i4 followed by conv.i8 are not supported +ldstr "InstrumentationEnd" +Any series of instructions that does not involve the pop, ldstr "InstrumentationDesc", ldstr "InstrumentationData", or ldstr "InstrumentationEnd" instructions. +pop +``` + +The `InstrumentationData` is optional, and may not be present in the file. + +The `FunctionTouchOrder` is optional, and may not be present in the file. + +## R2R File Format +Profile data shall be encoded into the R2R FileFormat in a new section named `READYTORUN_SECTION_PROFILEDATA`. +This section shall hold a version number, and a single `NativeHashtable` that contains a mapping from type/method +to the pair of Desc and Data. TODO define how Desc and Data are encoded. The intention is to store exactly the +same data as is stored in the PGO data file, except that the instrumentation data version must be the same for +all data chunks. + +## Instrumenting Runtime +The runtime shall be responsible for choosing when to execute instrumentation, allocating the tracing buffers +and actually reporting the data through the tracing pipeline. Tracing shall leverage recent work by Kount in +which low tier code is given a small stub which executes before method execution. At the moment, this stub +increments a call counter, and then invokes either tier 0 code or the prestub. My intention is to tie into this +infrastructure and use it to drive instrumentation behavior. + +One idea on how to do so would be to build 2 copies +of the code, 1 of which is the instrumented version, and the other is the normal code. On some configurable interval, +such as once every 1, 2, 4, 8 or 16 calls, the instrumented version would be called. On exponentially growing intervals, +such as on the 32nd, 64th, 128th, 256th, 512th, etc call, it would call a function to emit the trace data for +the instrumented method. (To support reporting instrumentation for both the uninstantiated and instantiated +forms of generics, a given method may be associated with multiple instrumentation data payloads.) This sampling +scheme should allow applications to execute with reasonable performance even with instrumentation enabled. +Due to the double JIT compilation, it is expected that application launch times will be slowed, but hopefully +not by too much. Configuration of instrumentation shall be done via environment variable. (In theory this could +be done via the tracing apis, but it is likely a significant security vulnerability, and thus I do not believe +its appropriate to send such data without positive confirmation that it should be sent.) This concept of a sampling +instrumenting JIT is intended to provide fairly excellent performance while running instrumented, and may be +a reasonable model for moving to a more dynamic system in the future. @AndyAyersMS has pointed out that this approach +has substantial difficulties for providing extremely precise data, but I am of the opinion it would be good enough +to show benefit, and as we develop the profile data further we may choose to adjust this policy, or have anther. + +Another policy would be less startup focussed, and instead focus on attempting to gather improvements to steady state +performance. The runtime could use the sampling instrumented scheme as above, but instead of applying the scheme at +initial jit time, it could apply it based on a stack sampling profile of the process, and instrument only the methods found +via a sampling profiler. I expect this approach would be more valuable for performing dynamic recompilation and adaptive +optimization than for static AOT based profile data. + +## Instrumenting JIT +The jit shall be responsible for the format and contents of instrumentation data. When compiled with instrumentation +enabled, it is expected to generate an instrumentation descriptor, and report it across the jitinterface, so that the +runtime may send the InstrumentationDesc event, and allocate the instrumentation region. As an initial implementation, +the data gathered is expected to be simple block count data, but as the data format allows for transmittal of `TypeHandle` +and `MethodDesc` data, it should be possible to add support for capturing information such as virtual function dispatch +type data as well, or other related information. + +In addition the JIT is responsible for parsing the instrumentation data. It shall support an api that allows multiple +instrumentation data chunks to be passed to it, and should support merging/parsing the combination of all the data. + +Finally, once parsed and merged, the JIT when producing an R2R image is responsible for producing a merged copy of pgo +data that may be embedded into the R2R file format for possible consumption by the runtime JIT. + +## Trace processing tool +The trace processing tool is responsible for reading the trace files as produced by perfview/dotnet trace, and +producing .MIBC files. The process should be a straightforward format translation for instrumentation data. The +`FunctionTouchOrder` and existence of the method shall be based on the `JitStarted` and `R2EEntryPoint` events. + +## AOT Compilation tool +AOT compilation shall use the profile guided data in several ways. +- Any function mentioned in a scenario shall be considered to be warm which shall influence code layout. +- The `FunctionTouchOrder` shall be used as an input into codelayout ordering in the final file. + +Processing PGO `InstrumentationData` in the AOT compilation tool shall be a cooperative process between the managed +components of the compiler and the jit. The managed component shall provide to the jit the unpackaged Instrumentation +data for the method being compiled, and for both the uninstantiated method and instantiated method instrumentations +as are present. The jit is responsible for merging these multiple data sources. + +In addition the JIT may optionally choose to generate a profile guided data block for association with the precompiled +code for use in re-jit scenarios, and information about related method code layout for the code, and optionally a +portion of the function body which is to be placed into a cold code section. The intention here it to allow some +algorithm such as Pettis-Hansen or a more modern variant (eg https://research.fb.com/wp-content/uploads/2017/01/cgo2017-hfsort-final1.pdf) +to be used to optimize code layout. + +## Consuming Runtime/JIT +If present in an R2R file, when a method is rejitted, the runtime shall provide a means for the jit to see instrumentation +data from either previous compiles in process, and/or from the R2R file. This shall provide a means for the JIT to choose +whether or not the method should be recompiled, or possibly to inform it about optimization opportunities that are +too expensive to compute at jit time, but could be computed by the AOT compiler, or other such ideas. + +As a means of doing this, options such as the following will be given to the jit to provide custom behavior. +1. Ignore the profile data and rejit. +2. Declare that the prejitted code is good enough as it is. +3. Use the profile data during rejit. + +The intention is that these options will provide an opportunity to improve our tiered compilation story with profile data. +I expect that the set of possibilities here is fairly long. For instance, one other option might be for the tiered compilation +infrastructure to integrate with the jit such that methods are re-jitted in some sort of priority order, another possibility +would be to use this as a means for adaptive or speculative optimization. + +## Diagnostic Tools +The tools r2rdump and dotnet-pgo shall provide a means for dumping their inputs. For most forms of data this is +fairly straightforward, but for `InstrumentationData`, there shall be a common dump tool written in managed code +that can provide a human readable dump of the data. r2rdump, dotnet-pgo, and possibly sos will all be able to share +this codebase for examination of the data structures in r2r files, traces, and runtime environments respectively. diff --git a/src/coreclr/src/tools/dotnet-pgo/dotnet-pgo.csproj b/src/coreclr/src/tools/dotnet-pgo/dotnet-pgo.csproj new file mode 100644 index 0000000000000..4563e17b494a9 --- /dev/null +++ b/src/coreclr/src/tools/dotnet-pgo/dotnet-pgo.csproj @@ -0,0 +1,22 @@ + + + + dotnet-pgo + Exe + $(NetCoreAppCurrent) + netcoreapp3.0 + true + 8.0 + $(BinDir)/dotnet-pgo + true + false + linux-x64;linux-musl-x64;win-x64 + + + + + + + + +