From 8fe767ff7cdde2eb961f022a3d6ea04bb2f765dc Mon Sep 17 00:00:00 2001 From: Bruce Forstall Date: Wed, 10 Nov 2021 13:28:49 -0800 Subject: [PATCH] Create superpmi-asmdiffs pipeline (#61194) Create a new `runtime-coreclr superpmi-asmdiffs` pipeline that runs SuperPMI asmdiffs for every change in the JIT directory. The diffs are run on two platforms: Windows x64 and Windows x86. Linux, and Arm64 and Arm32, asm diffs are done using cross-compilers, as follows: | Platform | Asm diffs | | -- | -- | | Windows x64 | win-x64, win-arm64, linux-x64, linux-arm64 | | Windows x86 | win-x86, linux-arm | The resulting summary .md files are uploaded into the pipeline artifacts, one .md file per platform (so, one for the Windows x64 runs and one for the Windows x86 runs). The results are also displayed in "Extensions" page of the AzDO pipeline. The runs take about 50 minutes to complete (assuming not much waiting for machines). The asm diffs pipeline is similar to the "superpmi-replay" pipeline, except: 1. It determines what an appropriate baseline JIT would be based on the PR commit and how it merges with the `main` branch. Given this, it downloads the matching baseline JITs from the JIT rolling build artifacts in Azure Storage. 2. It clones the `jitutils` repo and builds the `jit-analyze` tool, needed to generate the summary .md file. 3. It downloads and adds to the Helix machine payload a "portable" `git` installation, as `git diff` is used by `jit-analyze` for analyzing the generated .dasm files of the diff. 4. It collects all the various summary.md files into one per platform on which the runs are done, and publishes that to the artifacts and the `Extensions` page. 5. It only does one replay (asmdiffs) run, not one for each of a set of multiple stress modes. As part of this implementation, a. The `azdo_pipelines_util.py` was renamed to `jitutil.py`, and a lot of utility functions from superpmi.py were moved over to it. This was mostly to share the code for downloading and uncompressing .zip files. (There is a slight change to the output from the `superpmi.py download` command as a result.) However, I also moved a bunch of simple, more general helpers, for possible future sharing. b. `jitrollingbuild.py download` can now take no arguments and download a baseline JIT (from the JIT rolling build Azure Storage location), for the current enlistment, to the default location. Previously, it required a specific git_hash and target directory. There is similar logic in superpmi.py, but not quite the same. c. The `superpmi.py --no_progress` option was made global, and applied in a few more places. This was necessary because `superpmi.py asmdiffs` will download a coredistools binary from the JIT Azure Storage if one isn't found in the Core_Root folder. Fixes https://github.com/dotnet/runtime/issues/59445 --- eng/pipelines/coreclr/superpmi-asmdiffs.yml | 29 + .../templates/run-superpmi-asmdiffs-job.yml | 137 ++++ .../templates/run-superpmi-collect-job.yml | 2 +- .../templates/run-superpmi-replay-job.yml | 7 +- .../templates/superpmi-asmdiffs-job.yml | 39 + src/coreclr/scripts/antigen_run.py | 2 +- src/coreclr/scripts/azdo_pipelines_util.py | 178 ----- src/coreclr/scripts/fuzzer_setup.py | 8 +- src/coreclr/scripts/fuzzlyn_run.py | 2 +- src/coreclr/scripts/jitrollingbuild.py | 177 ++++- src/coreclr/scripts/jitutil.py | 666 ++++++++++++++++++ src/coreclr/scripts/superpmi-asmdiffs.proj | 73 ++ src/coreclr/scripts/superpmi.py | 520 +------------- src/coreclr/scripts/superpmi_asmdiffs.py | 193 +++++ .../scripts/superpmi_asmdiffs_setup.py | 255 +++++++ .../scripts/superpmi_asmdiffs_summarize.py | 176 +++++ src/coreclr/scripts/superpmi_aspnet.py | 2 +- src/coreclr/scripts/superpmi_benchmarks.py | 2 +- src/coreclr/scripts/superpmi_collect_setup.py | 8 +- src/coreclr/scripts/superpmi_replay.py | 2 +- src/coreclr/scripts/superpmi_replay_setup.py | 6 +- 21 files changed, 1770 insertions(+), 714 deletions(-) create mode 100644 eng/pipelines/coreclr/superpmi-asmdiffs.yml create mode 100644 eng/pipelines/coreclr/templates/run-superpmi-asmdiffs-job.yml create mode 100644 eng/pipelines/coreclr/templates/superpmi-asmdiffs-job.yml delete mode 100644 src/coreclr/scripts/azdo_pipelines_util.py create mode 100644 src/coreclr/scripts/jitutil.py create mode 100644 src/coreclr/scripts/superpmi-asmdiffs.proj create mode 100644 src/coreclr/scripts/superpmi_asmdiffs.py create mode 100644 src/coreclr/scripts/superpmi_asmdiffs_setup.py create mode 100644 src/coreclr/scripts/superpmi_asmdiffs_summarize.py diff --git a/eng/pipelines/coreclr/superpmi-asmdiffs.yml b/eng/pipelines/coreclr/superpmi-asmdiffs.yml new file mode 100644 index 0000000000000..bd41ac03d85e6 --- /dev/null +++ b/eng/pipelines/coreclr/superpmi-asmdiffs.yml @@ -0,0 +1,29 @@ +pr: + branches: + include: + - main + paths: + include: + - src/coreclr/jit/* + +jobs: + +- template: /eng/pipelines/common/platform-matrix.yml + parameters: + jobTemplate: /eng/pipelines/coreclr/templates/build-jit-job.yml + buildConfig: checked + platforms: + - windows_x64 + - windows_x86 + jobParameters: + uploadAs: 'pipelineArtifacts' + +- template: /eng/pipelines/common/platform-matrix.yml + parameters: + jobTemplate: /eng/pipelines/coreclr/templates/superpmi-asmdiffs-job.yml + buildConfig: checked + platforms: + - windows_x64 + - windows_x86 + helixQueueGroup: ci + helixQueuesTemplate: /eng/pipelines/coreclr/templates/helix-queues-setup.yml \ No newline at end of file diff --git a/eng/pipelines/coreclr/templates/run-superpmi-asmdiffs-job.yml b/eng/pipelines/coreclr/templates/run-superpmi-asmdiffs-job.yml new file mode 100644 index 0000000000000..46535d20e8680 --- /dev/null +++ b/eng/pipelines/coreclr/templates/run-superpmi-asmdiffs-job.yml @@ -0,0 +1,137 @@ +parameters: + steps: [] # optional -- any additional steps that need to happen before pulling down the jitutils repo and sending the jitutils to helix (ie building your repo) + variables: [] # optional -- list of additional variables to send to the template + jobName: '' # required -- job name + displayName: '' # optional -- display name for the job. Will use jobName if not passed + pool: '' # required -- name of the Build pool + container: '' # required -- name of the container + buildConfig: '' # required -- build configuration + archType: '' # required -- targeting CPU architecture + osGroup: '' # required -- operating system for the job + osSubgroup: '' # optional -- operating system subgroup + continueOnError: 'false' # optional -- determines whether to continue the build if the step errors + dependsOn: '' # optional -- dependencies of the job + timeoutInMinutes: 120 # optional -- timeout for the job + enableTelemetry: false # optional -- enable for telemetry + liveLibrariesBuildConfig: '' # optional -- live-live libraries configuration to use for the run + helixQueues: '' # required -- Helix queues + dependOnEvaluatePaths: false + +jobs: +- template: xplat-pipeline-job.yml + parameters: + dependsOn: ${{ parameters.dependsOn }} + buildConfig: ${{ parameters.buildConfig }} + archType: ${{ parameters.archType }} + osGroup: ${{ parameters.osGroup }} + osSubgroup: ${{ parameters.osSubgroup }} + liveLibrariesBuildConfig: ${{ parameters.liveLibrariesBuildConfig }} + enableTelemetry: ${{ parameters.enableTelemetry }} + enablePublishBuildArtifacts: true + continueOnError: ${{ parameters.continueOnError }} + dependOnEvaluatePaths: ${{ parameters.dependOnEvaluatePaths }} + timeoutInMinutes: ${{ parameters.timeoutInMinutes }} + + ${{ if ne(parameters.displayName, '') }}: + displayName: '${{ parameters.displayName }}' + ${{ if eq(parameters.displayName, '') }}: + displayName: '${{ parameters.jobName }}' + + variables: + + - ${{ each variable in parameters.variables }}: + - ${{ if ne(variable.name, '') }}: + - name: ${{ variable.name }} + value: ${{ variable.value }} + - ${{ if ne(variable.group, '') }}: + - group: ${{ variable.group }} + + - name: PythonScript + value: 'py -3' + - name: PipScript + value: 'py -3 -m pip' + - name: SpmiCollectionLocation + value: '$(Build.SourcesDirectory)\artifacts\spmi\' + - name: SpmiLogsLocation + value: '$(Build.SourcesDirectory)\artifacts\spmi_logs\' + - name: SpmiAsmdiffsLocation + value: '$(Build.SourcesDirectory)\artifacts\spmi_asmdiffs\' + - name: HelixResultLocation + value: '$(Build.SourcesDirectory)\artifacts\helixresults\' + + workspace: + clean: all + pool: + ${{ parameters.pool }} + container: ${{ parameters.container }} + steps: + - ${{ parameters.steps }} + + - script: | + mkdir $(SpmiCollectionLocation) + mkdir $(SpmiLogsLocation) + mkdir $(SpmiAsmdiffsLocation) + displayName: Create directories + + - script: $(PythonScript) $(Build.SourcesDirectory)/src/coreclr/scripts/superpmi_asmdiffs_setup.py -source_directory $(Build.SourcesDirectory) -product_directory $(buildProductRootFolderPath) -arch $(archType) + displayName: ${{ format('SuperPMI asmdiffs setup ({0})', parameters.archType) }} + + # Run superpmi asmdiffs in helix + - template: /eng/pipelines/common/templates/runtimes/send-to-helix-step.yml + parameters: + displayName: 'Send job to Helix' + helixBuild: $(Build.BuildNumber) + helixSource: $(_HelixSource) + helixType: 'build/tests/' + helixQueues: ${{ join(',', parameters.helixQueues) }} + creator: dotnet-bot + WorkItemTimeout: 2:00 # 2 hours + WorkItemDirectory: '$(WorkItemDirectory)' + CorrelationPayloadDirectory: '$(CorrelationPayloadDirectory)' + helixProjectArguments: '$(Build.SourcesDirectory)/src/coreclr/scripts/superpmi-asmdiffs.proj' + BuildConfig: ${{ parameters.buildConfig }} + osGroup: ${{ parameters.osGroup }} + archType: ${{ parameters.archType }} + shouldContinueOnError: true # Run the future step i.e. upload superpmi logs + + # Always upload the available logs for diagnostics + - task: CopyFiles@2 + displayName: Copying superpmi.log of all partitions + inputs: + sourceFolder: '$(HelixResultLocation)' + contents: '**/superpmi_*.log' + targetFolder: '$(SpmiLogsLocation)' + condition: always() + + - task: CopyFiles@2 + displayName: Copying superpmi.md of all partitions + inputs: + sourceFolder: '$(HelixResultLocation)' + contents: '**/superpmi_*.md' + targetFolder: '$(SpmiAsmdiffsLocation)' + condition: always() + + - script: $(PythonScript) $(Build.SourcesDirectory)/src/coreclr/scripts/superpmi_asmdiffs_summarize.py -diff_summary_dir $(SpmiAsmdiffsLocation) -arch $(archType) + displayName: ${{ format('Summarize ({0})', parameters.archType) }} + condition: always() + + - task: PublishPipelineArtifact@1 + displayName: Publish SuperPMI logs + inputs: + targetPath: $(SpmiLogsLocation) + artifactName: 'SuperPMI_Logs_$(archType)_$(buildConfig)' + condition: always() + + - task: PublishPipelineArtifact@1 + displayName: Publish SuperPMI asmdiffs files + inputs: + targetPath: $(SpmiAsmdiffsLocation) + artifactName: 'SuperPMI_Asmdiffs_$(archType)_$(buildConfig)' + condition: always() + + - task: PublishPipelineArtifact@1 + displayName: Publish SuperPMI build logs + inputs: + targetPath: $(Build.SourcesDirectory)/artifacts/log + artifactName: 'SuperPMI_BuildLogs_$(archType)_$(buildConfig)' + condition: always() diff --git a/eng/pipelines/coreclr/templates/run-superpmi-collect-job.yml b/eng/pipelines/coreclr/templates/run-superpmi-collect-job.yml index 01726c6b1a339..1ac235f6f6d08 100644 --- a/eng/pipelines/coreclr/templates/run-superpmi-collect-job.yml +++ b/eng/pipelines/coreclr/templates/run-superpmi-collect-job.yml @@ -114,7 +114,7 @@ jobs: - script: | mkdir -p $(MergedMchFileLocation) mkdir -p $(SpmiLogsLocation) - displayName: Create directory for merged collection + displayName: Create directories - ${{ if eq(parameters.osGroup, 'windows') }}: - script: | mkdir $(MergedMchFileLocation) diff --git a/eng/pipelines/coreclr/templates/run-superpmi-replay-job.yml b/eng/pipelines/coreclr/templates/run-superpmi-replay-job.yml index 696b2e5e043bc..9b64c380d2605 100644 --- a/eng/pipelines/coreclr/templates/run-superpmi-replay-job.yml +++ b/eng/pipelines/coreclr/templates/run-superpmi-replay-job.yml @@ -66,11 +66,12 @@ jobs: - ${{ parameters.steps }} - script: | - mkdir -p $(SpmiCollectionLocation) - displayName: Create directory for SPMI collection + mkdir $(SpmiCollectionLocation) + mkdir $(SpmiLogsLocation) + displayName: Create directories - script: $(PythonScript) $(Build.SourcesDirectory)/src/coreclr/scripts/superpmi_replay_setup.py -source_directory $(Build.SourcesDirectory) -product_directory $(buildProductRootFolderPath) -arch $(archType) - displayName: ${{ format('SuperPMI replay setup ({0} {1})', parameters.osGroup, parameters.archType) }} + displayName: ${{ format('SuperPMI replay setup ({0})', parameters.archType) }} # Run superpmi replay in helix - template: /eng/pipelines/common/templates/runtimes/send-to-helix-step.yml diff --git a/eng/pipelines/coreclr/templates/superpmi-asmdiffs-job.yml b/eng/pipelines/coreclr/templates/superpmi-asmdiffs-job.yml new file mode 100644 index 0000000000000..bb05902efe969 --- /dev/null +++ b/eng/pipelines/coreclr/templates/superpmi-asmdiffs-job.yml @@ -0,0 +1,39 @@ +parameters: + buildConfig: '' # required -- build configuration + archType: '' # required -- targeting CPU architecture + osGroup: '' # required -- operating system for the job + osSubgroup: '' # optional -- operating system subgroup + pool: '' + timeoutInMinutes: 120 # build timeout + variables: {} + helixQueues: '' + dependOnEvaluatePaths: false + runJobTemplate: '/eng/pipelines/coreclr/templates/run-superpmi-asmdiffs-job.yml' + +jobs: +- template: ${{ parameters.runJobTemplate }} + parameters: + jobName: ${{ format('superpmi_asmdiffs_{0}{1}_{2}_{3}', parameters.osGroup, parameters.osSubgroup, parameters.archType, parameters.buildConfig) }} + displayName: ${{ format('SuperPMI asmdiffs {0}{1} {2} {3}', parameters.osGroup, parameters.osSubgroup, parameters.archType, parameters.buildConfig) }} + pool: ${{ parameters.pool }} + buildConfig: ${{ parameters.buildConfig }} + archType: ${{ parameters.archType }} + osGroup: ${{ parameters.osGroup }} + osSubgroup: ${{ parameters.osSubgroup }} + dependOnEvaluatePaths: ${{ parameters.dependOnEvaluatePaths }} + timeoutInMinutes: ${{ parameters.timeoutInMinutes }} + helixQueues: ${{ parameters.helixQueues }} + dependsOn: + - ${{ format('coreclr_jit_build_{0}{1}_{2}_{3}', parameters.osGroup, parameters.osSubgroup, parameters.archType, parameters.buildConfig) }} + + variables: ${{ parameters.variables }} + + steps: + + # Download jit builds + - template: /eng/pipelines/common/download-artifact-step.yml + parameters: + unpackFolder: $(buildProductRootFolderPath) + artifactFileName: '$(buildProductArtifactName)$(archiveExtension)' + artifactName: '$(buildProductArtifactName)' + displayName: 'JIT product build' \ No newline at end of file diff --git a/src/coreclr/scripts/antigen_run.py b/src/coreclr/scripts/antigen_run.py index 96146005906d9..240e1353f75b8 100644 --- a/src/coreclr/scripts/antigen_run.py +++ b/src/coreclr/scripts/antigen_run.py @@ -20,7 +20,7 @@ from os.path import getsize import os from coreclr_arguments import * -from azdo_pipelines_util import run_command, TempDir +from jitutil import run_command, TempDir parser = argparse.ArgumentParser(description="description") diff --git a/src/coreclr/scripts/azdo_pipelines_util.py b/src/coreclr/scripts/azdo_pipelines_util.py deleted file mode 100644 index 83f1d083ee6ad..0000000000000 --- a/src/coreclr/scripts/azdo_pipelines_util.py +++ /dev/null @@ -1,178 +0,0 @@ -#!/usr/bin/env python3 -# -# Licensed to the .NET Foundation under one or more agreements. -# The .NET Foundation licenses this file to you under the MIT license. -# -# Title : azdo_pipelines_util.py -# -# Notes: -# -# Utility functions used by Python scripts involved with Azure DevOps Pipelines -# setup. -# -################################################################################ -################################################################################ - -import os -import shutil -import subprocess -import sys -import tempfile - - -def run_command(command_to_run, _cwd=None, _exit_on_fail=False, _output_file=None): - """ Runs the command. - - Args: - command_to_run ([string]): Command to run along with arguments. - _cwd (string): Current working directory. - _exit_on_fail (bool): If it should exit on failure. - Returns: - (string, string, int): Returns a tuple of stdout, stderr, and command return code if _output_file= None - Otherwise stdout, stderr are empty. - """ - print("Running: " + " ".join(command_to_run)) - command_stdout = "" - command_stderr = "" - return_code = 1 - - output_type = subprocess.STDOUT if _output_file else subprocess.PIPE - with subprocess.Popen(command_to_run, stdout=subprocess.PIPE, stderr=output_type, cwd=_cwd) as proc: - - # For long running command, continuously print the output - if _output_file: - while True: - with open(_output_file, 'a') as of: - output = proc.stdout.readline() - if proc.poll() is not None: - break - if output: - output_str = output.strip().decode("utf-8") - print(output_str) - of.write(output_str + "\n") - else: - command_stdout, command_stderr = proc.communicate() - if len(command_stdout) > 0: - print(command_stdout.decode("utf-8")) - if len(command_stderr) > 0: - print(command_stderr.decode("utf-8")) - - return_code = proc.returncode - if _exit_on_fail and return_code != 0: - print("Command failed. Exiting.") - sys.exit(1) - return command_stdout, command_stderr, return_code - - -def copy_directory(src_path, dst_path, verbose_output=True, match_func=lambda path: True): - """Copies directory in 'src_path' to 'dst_path' maintaining the directory - structure. https://docs.python.org/3.5/library/shutil.html#shutil.copytree can't - be used in this case because it expects the destination directory should not - exist, however we do call copy_directory() to copy files to same destination directory. - - Args: - src_path (string): Path of source directory that need to be copied. - dst_path (string): Path where directory should be copied. - verbose_output (bool): True to print every copy or skipped file. - match_func (str -> bool) : Criteria function determining if a file is copied. - """ - if not os.path.exists(dst_path): - os.makedirs(dst_path) - for item in os.listdir(src_path): - src_item = os.path.join(src_path, item) - dst_item = os.path.join(dst_path, item) - if os.path.isdir(src_item): - copy_directory(src_item, dst_item, verbose_output, match_func) - else: - try: - if match_func(src_item): - if verbose_output: - print("> copy {0} => {1}".format(src_item, dst_item)) - try: - shutil.copy2(src_item, dst_item) - except PermissionError as pe_error: - print('Ignoring PermissionError: {0}'.format(pe_error)) - else: - if verbose_output: - print("> skipping {0}".format(src_item)) - except UnicodeEncodeError: - if verbose_output: - print("> Got UnicodeEncodeError") - - -def copy_files(src_path, dst_path, file_names): - """Copy files from 'file_names' list from 'src_path' to 'dst_path'. - It retains the original directory structure of src_path. - - Args: - src_path (string): Source directory from where files are copied. - dst_path (string): Destination directory where files to be copied. - file_names ([string]): List of full path file names to be copied. - """ - - print('### Copying below files from {0} to {1}:'.format(src_path, dst_path)) - print('') - print(os.linesep.join(file_names)) - for f in file_names: - # Create same structure in dst so we don't clobber same files names present in different directories - dst_path_of_file = f.replace(src_path, dst_path) - - dst_directory = os.path.dirname(dst_path_of_file) - if not os.path.exists(dst_directory): - os.makedirs(dst_directory) - try: - shutil.copy2(f, dst_path_of_file) - except PermissionError as pe_error: - print('Ignoring PermissionError: {0}'.format(pe_error)) - - -def set_pipeline_variable(name, value): - """ This method sets pipeline variable. - - Args: - name (string): Name of the variable. - value (string): Value of the variable. - """ - define_variable_format = "##vso[task.setvariable variable={0}]{1}" - print("{0} -> {1}".format(name, value)) # logging - print(define_variable_format.format(name, value)) # set variable - - -class TempDir: - """ Class to create a temporary working directory, or use one that is passed as an argument. - - Use with: "with TempDir() as temp_dir" to change to that directory and then automatically - change back to the original working directory afterwards and remove the temporary - directory and its contents (if skip_cleanup is False). - """ - - def __init__(self, path=None, skip_cleanup=False): - self.mydir = tempfile.mkdtemp() if path is None else path - self.cwd = None - self._skip_cleanup = skip_cleanup - - def __enter__(self): - self.cwd = os.getcwd() - os.chdir(self.mydir) - return self.mydir - - def __exit__(self, exc_type, exc_val, exc_tb): - os.chdir(self.cwd) - if not self._skip_cleanup: - shutil.rmtree(self.mydir) - - -class ChangeDir: - """ Class to temporarily change to a given directory. Use with "with". - """ - - def __init__(self, mydir): - self.mydir = mydir - self.cwd = None - - def __enter__(self): - self.cwd = os.getcwd() - os.chdir(self.mydir) - - def __exit__(self, exc_type, exc_val, exc_tb): - os.chdir(self.cwd) \ No newline at end of file diff --git a/src/coreclr/scripts/fuzzer_setup.py b/src/coreclr/scripts/fuzzer_setup.py index 54ce2f37734fc..7bc60acc71c4f 100644 --- a/src/coreclr/scripts/fuzzer_setup.py +++ b/src/coreclr/scripts/fuzzer_setup.py @@ -18,7 +18,7 @@ import os from coreclr_arguments import * from os import path -from azdo_pipelines_util import run_command, copy_directory, set_pipeline_variable, ChangeDir, TempDir +from jitutil import run_command, copy_directory, set_pipeline_variable, ChangeDir, TempDir parser = argparse.ArgumentParser(description="description") @@ -103,7 +103,7 @@ def main(main_args): # create exploratory directory print('Copying {} -> {}'.format(scripts_src_directory, coreroot_directory)) - copy_directory(scripts_src_directory, coreroot_directory, match_func=lambda path: any(path.endswith(extension) for extension in [".py"])) + copy_directory(scripts_src_directory, coreroot_directory, verbose_output=True, match_func=lambda path: any(path.endswith(extension) for extension in [".py"])) if is_windows: acceptable_copy = lambda path: any(path.endswith(extension) for extension in [".py", ".dll", ".exe", ".json"]) @@ -113,7 +113,7 @@ def main(main_args): # copy CORE_ROOT print('Copying {} -> {}'.format(coreclr_args.core_root_directory, coreroot_directory)) - copy_directory(coreclr_args.core_root_directory, coreroot_directory, match_func=acceptable_copy) + copy_directory(coreclr_args.core_root_directory, coreroot_directory, verbose_output=True, match_func=acceptable_copy) try: with TempDir() as tool_code_directory: @@ -136,7 +136,7 @@ def main(main_args): # copy tool print('Copying {} -> {}'.format(publish_dir, dst_directory)) - copy_directory(publish_dir, dst_directory, match_func=acceptable_copy) + copy_directory(publish_dir, dst_directory, verbose_output=True, match_func=acceptable_copy) except PermissionError as pe: print("Skipping file. Got error: %s", pe) diff --git a/src/coreclr/scripts/fuzzlyn_run.py b/src/coreclr/scripts/fuzzlyn_run.py index 7d99b1f504fff..fa37b6eb4f655 100644 --- a/src/coreclr/scripts/fuzzlyn_run.py +++ b/src/coreclr/scripts/fuzzlyn_run.py @@ -20,7 +20,7 @@ import re import shutil import threading -from azdo_pipelines_util import run_command, TempDir +from jitutil import run_command, TempDir from coreclr_arguments import * from os import path diff --git a/src/coreclr/scripts/jitrollingbuild.py b/src/coreclr/scripts/jitrollingbuild.py index 0003805b49960..d120fa49d27ca 100644 --- a/src/coreclr/scripts/jitrollingbuild.py +++ b/src/coreclr/scripts/jitrollingbuild.py @@ -32,10 +32,10 @@ ################################################################################ az_account_name = "clrjit2" -az_container_name = "jitrollingbuild" +az_jitrollingbuild_container_name = "jitrollingbuild" az_builds_root_folder = "builds" az_blob_storage_account_uri = "https://" + az_account_name + ".blob.core.windows.net/" -az_blob_storage_container_uri = az_blob_storage_account_uri + az_container_name +az_blob_storage_jitrollingbuild_container_uri = az_blob_storage_account_uri + az_jitrollingbuild_container_name ################################################################################ # Argument Parser @@ -50,19 +50,28 @@ """ download_description = """\ -Download clrjit from SuperPMI Azure storage. +Download clrjit from SuperPMI Azure storage. If -git_hash is given, download exactly +that JIT. If -git_hash is not given, find the latest git hash from the main branch that +corresponds to the current tree, and download that JIT. That is, find an appropriate +"baseline" JIT for doing asm diffs. """ list_description = """\ List clrjit in SuperPMI Azure storage. """ -host_os_help = "OS (windows, OSX, Linux). Default: current OS." - arch_help = "Architecture (x64, x86, arm, arm64). Default: current architecture." build_type_help = "Build type (Debug, Checked, Release). Default: Checked." +host_os_help = "OS (windows, OSX, Linux). Default: current OS." + +spmi_location_help = """\ +Directory in which to put SuperPMI files, such as downloaded MCH files, asm diffs, and repro .MC files. +Optional. Default is 'spmi' within the repo 'artifacts' directory. +If 'SUPERPMI_CACHE_DIRECTORY' environment variable is set to a path, it will use that directory. +""" + git_hash_help = "git hash" target_dir_help = "Directory to put the downloaded JIT." @@ -82,6 +91,7 @@ common_parser.add_argument("-arch", help=arch_help) common_parser.add_argument("-build_type", default="Checked", help=build_type_help) common_parser.add_argument("-host_os", help=host_os_help) +common_parser.add_argument("-spmi_location", help=spmi_location_help) # subparser for upload upload_parser = subparsers.add_parser("upload", description=upload_description, parents=[common_parser]) @@ -93,8 +103,8 @@ # subparser for download download_parser = subparsers.add_parser("download", description=download_description, parents=[common_parser]) -download_parser.add_argument("-git_hash", required=True, help=git_hash_help) -download_parser.add_argument("-target_dir", required=True, help=target_dir_help) +download_parser.add_argument("-git_hash", help=git_hash_help) +download_parser.add_argument("-target_dir", help=target_dir_help) download_parser.add_argument("--skip_cleanup", action="store_true", help=skip_cleanup_help) # subparser for list @@ -172,6 +182,111 @@ def determine_jit_name(coreclr_args): raise RuntimeError("Unknown OS.") +def process_git_hash_arg(coreclr_args): + """ Process the -git_hash argument. + + If the argument is present, use that to download a JIT. + If not present, try to find and download a JIT based on the current environment: + 1. Determine the current directory git hash using: + git rev-parse HEAD + Call the result `current_git_hash`. + 2. Determine the baseline: where does this hash meet `main` using: + git merge-base `current_git_hash` main + Call the result `base_git_hash`. + 3. Figure out the latest hash, starting with `base_git_hash`, that contains any changes to + the src/coreclr/jit directory. (We do this because the JIT rolling build only includes + builds for changes to this directory. So, this logic needs to stay in sync with the logic + that determines what causes the JIT rolling build to run. E.g., it should also get + rebuilt if the JIT-EE interface GUID changes. Alternatively, we can take the entire list + of changes, and probe the rolling build drop for all of them.) + 4. Starting with `base_git_hash`, and possibly walking to older changes, look for matching builds + in the JIT rolling build drops. + 5. If a JIT directory in Azure Storage is found, set coreclr_args.git_hash to that git hash to use + for downloading. + + Args: + coreclr_args (CoreclrArguments) : parsed args + + Returns: + Nothing + + coreclr_args.git_hash is set to the git hash to use + + An exception is thrown if the `-git_hash` argument is unspecified, and we don't find an appropriate + JIT to download. + """ + + if coreclr_args.git_hash is not None: + return + + # Do all the remaining commands, including a number of 'git' commands including relative paths, + # from the root of the runtime repo. + + with ChangeDir(coreclr_args.runtime_repo_location): + command = [ "git", "rev-parse", "HEAD" ] + print("Invoking: {}".format(" ".join(command))) + proc = subprocess.Popen(command, stdout=subprocess.PIPE) + stdout_git_rev_parse, _ = proc.communicate() + return_code = proc.returncode + if return_code == 0: + current_git_hash = stdout_git_rev_parse.decode('utf-8').strip() + print("Current hash: {}".format(current_git_hash)) + else: + raise RuntimeError("Couldn't determine current git hash") + + # We've got the current hash; figure out the baseline hash. + command = [ "git", "merge-base", current_git_hash, "origin/main" ] + print("Invoking: {}".format(" ".join(command))) + proc = subprocess.Popen(command, stdout=subprocess.PIPE) + stdout_git_merge_base, _ = proc.communicate() + return_code = proc.returncode + if return_code == 0: + base_git_hash = stdout_git_merge_base.decode('utf-8').strip() + print("Baseline hash: {}".format(base_git_hash)) + else: + raise RuntimeError("Couldn't determine baseline git hash") + + # Enumerate the last 20 changes, starting with the baseline, that included JIT changes. + command = [ "git", "log", "--pretty=format:%H", base_git_hash, "-20", "--", "src/coreclr/jit/*" ] + print("Invoking: {}".format(" ".join(command))) + proc = subprocess.Popen(command, stdout=subprocess.PIPE) + stdout_change_list, _ = proc.communicate() + return_code = proc.returncode + change_list_hashes = [] + if return_code == 0: + change_list_hashes = stdout_change_list.decode('utf-8').strip().splitlines() + else: + raise RuntimeError("Couldn't determine list of JIT changes starting with baseline hash") + + if len(change_list_hashes) == 0: + raise RuntimeError("No JIT changes found starting with baseline hash") + + # For each hash, see if the rolling build contains the JIT. + + hashnum = 1 + for git_hash in change_list_hashes: + print("try {}: {}".format(hashnum, git_hash)) + + # Set the git hash to look for + # Note: there's a slight inefficiency here because this code searches for a JIT at this hash value, and + # then when we go to download, we do the same search again because we don't cache the result and pass it + # directly on to the downloader. + coreclr_args.git_hash = git_hash + urls = get_jit_urls(coreclr_args, find_all=False) + if len(urls) > 1: + if hashnum > 1: + print("Warning: the baseline found is not built with the first git hash with JIT code changes; there may be extraneous diffs") + return + + # We didn't find a baseline; keep looking + hashnum += 1 + + # We ran out of hashes of JIT changes, and didn't find a baseline. Give up. + print("Error: no baseline JIT found") + + raise RuntimeError("No baseline JIT found") + + def list_az_jits(filter_func=lambda unused: True, prefix_string = None): """ List the JITs in Azure Storage using REST api @@ -198,7 +313,7 @@ def list_az_jits(filter_func=lambda unused: True, prefix_string = None): urls = [] - list_az_container_uri_root = az_blob_storage_container_uri + "?restype=container&comp=list&prefix=" + az_builds_root_folder + "/" + list_az_container_uri_root = az_blob_storage_jitrollingbuild_container_uri + "?restype=container&comp=list&prefix=" + az_builds_root_folder + "/" if prefix_string: list_az_container_uri_root += prefix_string @@ -268,7 +383,7 @@ def upload_command(coreclr_args): print("JIT upload") def upload_blob(file, blob_name): - blob_client = blob_service_client.get_blob_client(container=az_container_name, blob=blob_name) + blob_client = blob_service_client.get_blob_client(container=az_jitrollingbuild_container_name, blob=blob_name) # Check if the blob already exists, and delete it if it does, before uploading / replacing it. try: @@ -382,14 +497,14 @@ def upload_blob(file, blob_name): total_bytes_uploaded += zip_stat_result.st_size blob_name = "{}/{}".format(blob_folder_name, zip_name) - print("Uploading: {} ({}) -> {}".format(file, zip_path, az_blob_storage_container_uri + "/" + blob_name)) + print("Uploading: {} ({}) -> {}".format(file, zip_path, az_blob_storage_jitrollingbuild_container_uri + "/" + blob_name)) upload_blob(zip_path, blob_name) else: file_stat_result = os.stat(file) total_bytes_uploaded += file_stat_result.st_size file_name = os.path.basename(file) blob_name = "{}/{}".format(blob_folder_name, file_name) - print("Uploading: {} -> {}".format(file, az_blob_storage_container_uri + "/" + blob_name)) + print("Uploading: {} -> {}".format(file, az_blob_storage_jitrollingbuild_container_uri + "/" + blob_name)) upload_blob(file, blob_name) print("Uploaded {:n} bytes".format(total_bytes_uploaded)) @@ -466,7 +581,7 @@ def get_jit_urls(coreclr_args, find_all=False): """ blob_filter_string = "{}/{}/{}/{}".format(coreclr_args.git_hash, coreclr_args.host_os, coreclr_args.arch, coreclr_args.build_type) - blob_prefix_filter = "{}/{}/{}".format(az_blob_storage_container_uri, az_builds_root_folder, blob_filter_string).lower() + blob_prefix_filter = "{}/{}/{}".format(az_blob_storage_jitrollingbuild_container_uri, az_builds_root_folder, blob_filter_string).lower() # Determine if a URL in Azure Storage should be allowed. The URL looks like: # https://clrjit.blob.core.windows.net/jitrollingbuild/builds/git_hash/Linux/x64/Checked/clrjit.dll @@ -480,17 +595,27 @@ def filter_jits(url): def download_command(coreclr_args): - """ Download the JIT + """ Download the JITs Args: coreclr_args (CoreclrArguments): parsed args """ urls = get_jit_urls(coreclr_args, find_all=False) - if urls is None: + if len(urls) == 0: + print("Nothing to download") return - download_urls(urls, coreclr_args.target_dir) + if coreclr_args.target_dir is None: + # Use the same default download location for the JIT as superpmi.py uses for downloading a baseline JIT. + default_basejit_root_dir = os.path.join(coreclr_args.spmi_location, "basejit") + target_dir = os.path.join(default_basejit_root_dir, "{}.{}.{}.{}".format(coreclr_args.git_hash, coreclr_args.host_os, coreclr_args.arch, coreclr_args.build_type)) + if not os.path.isdir(target_dir): + os.makedirs(target_dir) + else: + target_dir = coreclr_args.target_dir + + download_urls(urls, target_dir) def list_command(coreclr_args): @@ -501,7 +626,8 @@ def list_command(coreclr_args): """ urls = get_jit_urls(coreclr_args, find_all=coreclr_args.all) - if urls is None: + if len(urls) == 0: + print("No JITs found") return count = len(urls) @@ -536,6 +662,21 @@ def setup_args(args): lambda unused: True, "Unable to set mode") + def setup_spmi_location_arg(spmi_location): + if spmi_location is None: + if "SUPERPMI_CACHE_DIRECTORY" in os.environ: + spmi_location = os.environ["SUPERPMI_CACHE_DIRECTORY"] + spmi_location = os.path.abspath(spmi_location) + else: + spmi_location = os.path.abspath(os.path.join(coreclr_args.artifacts_location, "spmi")) + return spmi_location + + coreclr_args.verify(args, + "spmi_location", + lambda unused: True, + "Unable to set spmi_location", + modify_arg=setup_spmi_location_arg) + if coreclr_args.mode == "upload": coreclr_args.verify(args, @@ -575,10 +716,12 @@ def setup_args(args): lambda unused: True, "Unable to set skip_cleanup") - if not os.path.isdir(coreclr_args.target_dir): + if coreclr_args.target_dir is not None and not os.path.isdir(coreclr_args.target_dir): print("--target_dir directory does not exist") raise RuntimeError("Error") + process_git_hash_arg(coreclr_args) + elif coreclr_args.mode == "list": coreclr_args.verify(args, diff --git a/src/coreclr/scripts/jitutil.py b/src/coreclr/scripts/jitutil.py new file mode 100644 index 0000000000000..5bff7e304594b --- /dev/null +++ b/src/coreclr/scripts/jitutil.py @@ -0,0 +1,666 @@ +#!/usr/bin/env python3 +# +# Licensed to the .NET Foundation under one or more agreements. +# The .NET Foundation licenses this file to you under the MIT license. +# +# Title : jitutil.py +# +# Notes: +# +# Utility functions used by Python scripts used by the CLR JIT team. +# +################################################################################ +################################################################################ + +import os +import shutil +import subprocess +import sys +import tempfile +import logging +import urllib +import urllib.request +import zipfile + +################################################################################ +## +## Helper classes +## +################################################################################ + +class TempDir: + """ Class to create a temporary working directory, or use one that is passed as an argument. + + Use with: "with TempDir() as temp_dir" to change to that directory and then automatically + change back to the original working directory afterwards and remove the temporary + directory and its contents (if skip_cleanup is False). + """ + + def __init__(self, path=None, skip_cleanup=False): + self.mydir = tempfile.mkdtemp() if path is None else path + self.cwd = None + self._skip_cleanup = skip_cleanup + + def __enter__(self): + self.cwd = os.getcwd() + os.chdir(self.mydir) + return self.mydir + + def __exit__(self, exc_type, exc_val, exc_tb): + os.chdir(self.cwd) + if not self._skip_cleanup: + shutil.rmtree(self.mydir) + +class ChangeDir: + """ Class to temporarily change to a given directory. Use with "with". + """ + + def __init__(self, mydir): + self.mydir = mydir + self.cwd = None + + def __enter__(self): + self.cwd = os.getcwd() + os.chdir(self.mydir) + + def __exit__(self, exc_type, exc_val, exc_tb): + os.chdir(self.cwd) + + +################################################################################ +## +## Azure DevOps pipelines helper functions +## +################################################################################ + +def set_pipeline_variable(name, value): + """ This method sets pipeline variable. + + Args: + name (string): Name of the variable. + value (string): Value of the variable. + """ + define_variable_format = "##vso[task.setvariable variable={0}]{1}" + print("{0} -> {1}".format(name, value)) # logging + print(define_variable_format.format(name, value)) # set variable + + +################################################################################ +## +## Helper functions +## +################################################################################ + +def run_command(command_to_run, _cwd=None, _exit_on_fail=False, _output_file=None): + """ Runs the command. + + Args: + command_to_run ([string]): Command to run along with arguments. + _cwd (string): Current working directory. + _exit_on_fail (bool): If it should exit on failure. + _output_file (): + Returns: + (string, string, int): Returns a tuple of stdout, stderr, and command return code if _output_file= None + Otherwise stdout, stderr are empty. + """ + print("Running: " + " ".join(command_to_run)) + command_stdout = "" + command_stderr = "" + return_code = 1 + + output_type = subprocess.STDOUT if _output_file else subprocess.PIPE + with subprocess.Popen(command_to_run, stdout=subprocess.PIPE, stderr=output_type, cwd=_cwd) as proc: + + # For long running command, continuously print the output + if _output_file: + while True: + with open(_output_file, 'a') as of: + output = proc.stdout.readline() + if proc.poll() is not None: + break + if output: + output_str = output.strip().decode("utf-8") + print(output_str) + of.write(output_str + "\n") + else: + command_stdout, command_stderr = proc.communicate() + if len(command_stdout) > 0: + print(command_stdout.decode("utf-8")) + if len(command_stderr) > 0: + print(command_stderr.decode("utf-8")) + + return_code = proc.returncode + if _exit_on_fail and return_code != 0: + print("Command failed. Exiting.") + sys.exit(1) + return command_stdout, command_stderr, return_code + + +def copy_directory(src_path, dst_path, verbose_output=False, verbose_copy=False, verbose_skip=False, match_func=lambda path: True): + """Copies directory in 'src_path' to 'dst_path' maintaining the directory + structure. https://docs.python.org/3.5/library/shutil.html#shutil.copytree can't + be used in this case because it expects the destination directory should not + exist, however we do call copy_directory() to copy files to same destination directory. + + Args: + src_path (string): Path of source directory that need to be copied. + dst_path (string): Path where directory should be copied. + verbose_output (bool): True to print every copied or skipped file or error. + verbose_copy (bool): True to print every copied file + verbose_skip (bool): True to print every skipped file. + match_func (str -> bool) : Criteria function determining if a file is copied. + """ + display_copy = verbose_output or verbose_copy + display_skip = verbose_output or verbose_skip + for item in os.listdir(src_path): + src_item = os.path.join(src_path, item) + dst_item = os.path.join(dst_path, item) + if os.path.isdir(src_item): + copy_directory(src_item, dst_item, verbose_output, verbose_copy, verbose_skip, match_func) + else: + try: + if match_func(src_item): + if display_copy: + print("> copy {0} => {1}".format(src_item, dst_item)) + try: + if not os.path.exists(dst_path): + os.makedirs(dst_path) + shutil.copy2(src_item, dst_item) + except PermissionError as pe_error: + print('Ignoring PermissionError: {0}'.format(pe_error)) + else: + if display_skip: + print("> skipping {0}".format(src_item)) + except UnicodeEncodeError: + # Should this always be displayed? Or is it too verbose somehow? + if verbose_output: + print("> Got UnicodeEncodeError") + + +def copy_files(src_path, dst_path, file_names): + """Copy files from 'file_names' list from 'src_path' to 'dst_path'. + It retains the original directory structure of src_path. + + Args: + src_path (string): Source directory from where files are copied. + dst_path (string): Destination directory where files to be copied. + file_names ([string]): List of full path file names to be copied. + """ + + print('### Copying below files from {0} to {1}:'.format(src_path, dst_path)) + print('') + print(os.linesep.join(file_names)) + for f in file_names: + # Create same structure in dst so we don't clobber same files names present in different directories + dst_path_of_file = f.replace(src_path, dst_path) + + dst_directory = os.path.dirname(dst_path_of_file) + if not os.path.exists(dst_directory): + os.makedirs(dst_directory) + try: + shutil.copy2(f, dst_path_of_file) + except PermissionError as pe_error: + print('Ignoring PermissionError: {0}'.format(pe_error)) + + +def remove_prefix(text, prefix): + """ Helper function to remove a prefix `prefix` from a string `text` + """ + if text.startswith(prefix): + return text[len(prefix):] + return text + + +def is_zero_length_file(fpath): + """ Determine if a file system path refers to an existing file that is zero length + + Args: + fpath (str) : file system path to test + + Returns: + bool : true if the path is an existing file that is zero length + """ + return os.path.isfile(fpath) and os.stat(fpath).st_size == 0 + + +def is_nonzero_length_file(fpath): + """ Determine if a file system path refers to an existing file that is non-zero length + + Args: + fpath (str) : file system path to test + + Returns: + bool : true if the path is an existing file that is non-zero length + """ + return os.path.isfile(fpath) and os.stat(fpath).st_size != 0 + + +def make_safe_filename(s): + """ Turn a string into a string usable as a single file name component; replace illegal characters with underscores. + + Args: + s (str) : string to convert to a file name + + Returns: + (str) : The converted string + """ + def safe_char(c): + if c.isalnum(): + return c + else: + return "_" + return "".join(safe_char(c) for c in s) + + +def find_in_path(name, pathlist, match_func=os.path.isfile): + """ Find a name (e.g., directory name or file name) in the file system by searching the directories + in a `pathlist` (e.g., PATH environment variable that has been semi-colon + split into a list). + + Args: + name (str) : name to search for + pathlist (list) : list of directory names to search + match_func (str -> bool) : determines if the name is a match + + Returns: + (str) The pathname of the object, or None if not found. + """ + for dirname in pathlist: + candidate = os.path.join(dirname, name) + if match_func(candidate): + return candidate + return None + + +def find_file(filename, pathlist): + """ Find a filename in the file system by searching the directories + in a `pathlist` (e.g., PATH environment variable that has been semi-colon + split into a list). + + Args: + filename (str) : name to search for + pathlist (list) : list of directory names to search + + Returns: + (str) The pathname of the object, or None if not found. + """ + return find_in_path(filename, pathlist) + + +def find_dir(dirname, pathlist): + """ Find a directory name in the file system by searching the directories + in a `pathlist` (e.g., PATH environment variable that has been semi-colon + split into a list). + + Args: + dirname (str) : name to search for + pathlist (list) : list of directory names to search + + Returns: + (str) The pathname of the object, or None if not found. + """ + return find_in_path(dirname, pathlist, match_func=os.path.isdir) + + +def create_unique_directory_name(root_directory, base_name): + """ Create a unique directory name by joining `root_directory` and `base_name`. + If this name already exists, append ".1", ".2", ".3", etc., to the final + name component until the full directory name is not found. + + Args: + root_directory (str) : root directory in which a new directory will be created + base_name (str) : the base name of the new directory name component to be added + + Returns: + (str) The full absolute path of the new directory. The directory has been created. + """ + root_directory = os.path.abspath(root_directory) + full_path = os.path.join(root_directory, base_name) + + count = 1 + while os.path.isdir(full_path): + new_full_path = os.path.join(root_directory, base_name + "." + str(count)) + count += 1 + full_path = new_full_path + + os.makedirs(full_path) + return full_path + + +def create_unique_file_name(directory, base_name, extension): + """ Create a unique file name in the specified directory by joining `base_name` and `extension`. + If this name already exists, append ".1", ".2", ".3", etc., to the `base_name` + name component until the full file name is not found. + + Args: + directory (str) : directory in which a new file will be created + base_name (str) : the base name of the new filename to be added + extension (str) : the filename extension of the new filename to be added + + Returns: + (str) The full absolute path of the new filename. + """ + directory = os.path.abspath(directory) + if not os.path.isdir(directory): + try: + os.makedirs(directory) + except Exception as exception: + logging.critical(exception) + raise exception + + full_path = os.path.join(directory, base_name + "." + extension) + + count = 1 + while os.path.isfile(full_path): + new_full_path = os.path.join(directory, base_name + "." + str(count) + "." + extension) + count += 1 + full_path = new_full_path + + return full_path + + +def get_files_from_path(path, match_func=lambda path: True): + """ Return all files in a directory tree matching a criteria. + + Args: + path (str) : Either a single file to include, or a directory to traverse looking for matching + files. + match_func (str -> bool) : Criteria function determining if a file is added to the list + + Returns: + Array of absolute paths of matching files + """ + if not(os.path.isdir(path) or os.path.isfile(path)): + logging.warning("Warning: \"%s\" is not a file or directory", path) + return [] + + path = os.path.abspath(path) + + files = [] + + if os.path.isdir(path): + for item in os.listdir(path): + files += get_files_from_path(os.path.join(path, item), match_func) + else: + if match_func(path): + files.append(path) + + return files + + +def is_url(path): + """ Return True if this looks like a URL + + Args: + path (str) : name to check + + Returns: + True it it looks like an URL, False otherwise. + """ + # Probably could use urllib.parse to be more precise. + # If it doesn't look like an URL, treat it like a file, possibly a UNC file. + return path.lower().startswith("http:") or path.lower().startswith("https:") + +################################################################################ +## +## Azure Storage functions +## +################################################################################ + +# Decide if we're going to download and enumerate Azure Storage using anonymous +# read access and urllib functions (False), or Azure APIs including authentication (True). +authenticate_using_azure = False + +# Have we checked whether we have the Azure Storage libraries yet? +azure_storage_libraries_check = False + + +def require_azure_storage_libraries(need_azure_storage_blob=True, need_azure_identity=True): + """ Check for and import the Azure libraries. + We do this lazily, only when we decide we're actually going to need them. + Once we've done it once, we don't do it again. + """ + global azure_storage_libraries_check, BlobServiceClient, BlobClient, ContainerClient, AzureCliCredential + + if azure_storage_libraries_check: + return + + azure_storage_libraries_check = True + + azure_storage_blob_import_ok = True + if need_azure_storage_blob: + try: + from azure.storage.blob import BlobServiceClient, BlobClient, ContainerClient + except: + azure_storage_blob_import_ok = False + + azure_identity_import_ok = True + if need_azure_identity: + try: + from azure.identity import AzureCliCredential + except: + azure_identity_import_ok = False + + if not azure_storage_blob_import_ok or not azure_identity_import_ok: + logging.error("One or more required Azure Storage packages is missing.") + logging.error("") + logging.error("Please install:") + logging.error(" pip install azure-storage-blob azure-identity") + logging.error("or (Windows):") + logging.error(" py -3 -m pip install azure-storage-blob azure-identity") + logging.error("See also https://docs.microsoft.com/en-us/azure/storage/blobs/storage-quickstart-blobs-python") + raise RuntimeError("Missing Azure Storage package.") + + # The Azure packages spam all kinds of output to the logging channels. + # Restrict this to only ERROR and CRITICAL. + for name in logging.Logger.manager.loggerDict.keys(): + if 'azure' in name: + logging.getLogger(name).setLevel(logging.ERROR) + + +def report_azure_error(): + """ Report an Azure error + """ + logging.error("A problem occurred accessing Azure. Are you properly authenticated using the Azure CLI?") + logging.error("Install the Azure CLI from https://docs.microsoft.com/en-us/cli/azure/install-azure-cli.") + logging.error("Then log in to Azure using `az login`.") + + +def download_with_azure(uri, target_location, fail_if_not_found=True): + """ Do an URI download using Azure blob storage API. Compared to urlretrieve, + there is no progress hook. Maybe if converted to use the async APIs we + could have some kind of progress? + + Args: + uri (string) : URI to download + target_location (string) : local path to put the downloaded object + fail_if_not_found (bool) : if True, fail if a download fails due to file not found (HTTP error 404). + Otherwise, ignore the failure. + + Returns True if successful, False on failure + """ + + require_azure_storage_libraries() + + logging.info("Download: %s -> %s", uri, target_location) + + ok = True + az_credential = AzureCliCredential() + blob = BlobClient.from_blob_url(uri, credential=az_credential) + with open(target_location, "wb") as my_blob: + try: + download_stream = blob.download_blob(retry_total=0) + try: + my_blob.write(download_stream.readall()) + except Exception as ex1: + logging.error("Error writing data to %s", target_location) + report_azure_error() + ok = False + except Exception as ex2: + logging.error("Azure error downloading %s", uri) + report_azure_error() + ok = False + + if not ok and fail_if_not_found: + raise RuntimeError("Azure failed to download") + return ok + +################################################################################ +## +## File downloading functions +## +################################################################################ + + +def download_progress_hook(count, block_size, total_size): + """ A hook for urlretrieve to report download progress + + Args: + count (int) : current block index + block_size (int) : size of a block + total_size (int) : total size of a payload + """ + sys.stdout.write("\rDownloading {0:.1f}/{1:.1f} MB...".format(min(count * block_size, total_size) / 1024 / 1024, total_size / 1024 / 1024)) + sys.stdout.flush() + + +def download_with_progress_urlretrieve(uri, target_location, fail_if_not_found=True, display_progress=True): + """ Do an URI download using urllib.request.urlretrieve with a progress hook. + + Outputs messages using the `logging` package. + + Args: + uri (string) : URI to download + target_location (string) : local path to put the downloaded object + fail_if_not_found (bool) : if True, fail if a download fails due to file not found (HTTP error 404). + Otherwise, ignore the failure. + display_progress (bool) : if True, display download progress (for URL downloads). Otherwise, do not. + + Returns True if successful, False on failure + """ + logging.info("Download: %s -> %s", uri, target_location) + + ok = True + try: + progress_display_method = download_progress_hook if display_progress else None + urllib.request.urlretrieve(uri, target_location, reporthook=progress_display_method) + except urllib.error.HTTPError as httperror: + if (httperror == 404) and fail_if_not_found: + logging.error("HTTP 404 error") + raise httperror + ok = False + + if display_progress: + sys.stdout.write("\n") # Add newline after progress hook + + return ok + + +def download_one_url(uri, target_location, fail_if_not_found=True, is_azure_storage=False, display_progress=True): + """ Do an URI download using urllib.request.urlretrieve or Azure Storage APIs. + + Args: + uri (string) : URI to download + target_location (string) : local path to put the downloaded object + fail_if_not_found (bool) : if True, fail if a download fails due to file not found (HTTP error 404). + Otherwise, ignore the failure. + display_progress (bool) : if True, display download progress (for URL downloads). Otherwise, do not. + + Returns True if successful, False on failure + """ + if is_azure_storage and authenticate_using_azure: + return download_with_azure(uri, target_location, fail_if_not_found) + else: + return download_with_progress_urlretrieve(uri, target_location, fail_if_not_found, display_progress) + + +def download_files(paths, target_dir, verbose=True, fail_if_not_found=True, is_azure_storage=False, display_progress=True): + """ Download a set of files, specified as URLs or paths (such as Windows UNC paths), + to a target directory. If a file is a .ZIP file, then uncompress the file and + copy all its contents to the target directory. + + Args: + paths (list): the URLs and paths to download + target_dir (str): target directory where files are copied. It will be created if it doesn't already exist. + verbose (bool): if True, do verbose logging. + fail_if_not_found (bool): if True, fail if a download fails due to file not found (HTTP error 404). + Otherwise, ignore the failure. + is_azure_storage (bool): if True, treat any URL as an Azure Storage URL + display_progress (bool): if True, display download progress (for URL downloads). Otherwise, do not. + + Returns: + list of full paths of local filenames of downloaded files in the target directory + """ + + if len(paths) == 0: + logging.warning("No files specified to download") + return None + + if verbose: + logging.info("Downloading:") + for item_path in paths: + logging.info(" %s", item_path) + + # Create the target directory now, if it doesn't already exist. + target_dir = os.path.abspath(target_dir) + if not os.path.isdir(target_dir): + os.makedirs(target_dir) + + local_paths = [] + + # In case we'll need a temp directory for ZIP file processing, create it first. + with TempDir() as temp_location: + for item_path in paths: + is_item_url = is_url(item_path) + item_name = item_path.split("/")[-1] if is_item_url else os.path.basename(item_path) + + if item_path.lower().endswith(".zip"): + # Delete everything in the temp_location (from previous iterations of this loop, so previous URL downloads). + temp_location_items = [os.path.join(temp_location, item) for item in os.listdir(temp_location)] + for item in temp_location_items: + if os.path.isdir(item): + shutil.rmtree(item) + else: + os.remove(item) + + download_path = os.path.join(temp_location, item_name) + if is_item_url: + ok = download_one_url(item_path, download_path, fail_if_not_found=fail_if_not_found, is_azure_storage=is_azure_storage, display_progress=display_progress) + if not ok: + continue + else: + if fail_if_not_found or os.path.isfile(item_path): + if verbose: + logging.info("Download: %s -> %s", item_path, download_path) + shutil.copy2(item_path, download_path) + + if verbose: + logging.info("Uncompress %s", download_path) + with zipfile.ZipFile(download_path, "r") as file_handle: + file_handle.extractall(temp_location) + + # Copy everything that was extracted to the target directory. + copy_directory(temp_location, target_dir, verbose_copy=verbose, match_func=lambda path: not path.endswith(".zip")) + + # The caller wants to know where all the files ended up, so compute that. + for dirpath, _, files in os.walk(temp_location, topdown=True): + for file_name in files: + if not file_name.endswith(".zip"): + full_file_path = os.path.join(dirpath, file_name) + target_path = full_file_path.replace(temp_location, target_dir) + local_paths.append(target_path) + else: + # Not a zip file; download directory to target directory + download_path = os.path.join(target_dir, item_name) + if is_item_url: + ok = download_one_url(item_path, download_path, fail_if_not_found=fail_if_not_found, is_azure_storage=is_azure_storage, display_progress=display_progress) + if not ok: + continue + else: + if fail_if_not_found or os.path.isfile(item_path): + if verbose: + logging.info("Download: %s -> %s", item_path, download_path) + shutil.copy2(item_path, download_path) + local_paths.append(download_path) + + return local_paths diff --git a/src/coreclr/scripts/superpmi-asmdiffs.proj b/src/coreclr/scripts/superpmi-asmdiffs.proj new file mode 100644 index 0000000000000..170b306202224 --- /dev/null +++ b/src/coreclr/scripts/superpmi-asmdiffs.proj @@ -0,0 +1,73 @@ + + + + + + + %HELIX_PYTHONPATH% + %HELIX_CORRELATION_PAYLOAD% + %HELIX_WORKITEM_UPLOAD_ROOT% + + $(BUILD_SOURCESDIRECTORY)\artifacts\helixresults + $(Python) $(ProductDirectory)\superpmi_asmdiffs.py -base_jit_directory $(ProductDirectory)\base -diff_jit_directory $(ProductDirectory)\diff -log_directory $(SuperpmiLogsLocation) + 1:00 + + + + false + false + $(_Creator) + $(_HelixAccessToken) + $(_HelixBuild) + $(_HelixSource) + $(_HelixTargetQueues) + $(_HelixType) + + + + + %(Identity) + + + + + + + + + + + + + + + + + + $(WorkItemCommand) -arch %(HelixWorkItem.Architecture) -platform %(HelixWorkItem.Platform) + $(WorkItemTimeout) + superpmi_%(HelixWorkItem.Platform)_%(HelixWorkItem.Architecture).log;superpmi_download_%(HelixWorkItem.Platform)_%(HelixWorkItem.Architecture).log;superpmi_diff_summary_%(HelixWorkItem.Platform)_%(HelixWorkItem.Architecture).md + + + diff --git a/src/coreclr/scripts/superpmi.py b/src/coreclr/scripts/superpmi.py index 0c098ecc89880..dacf3a889d041 100755 --- a/src/coreclr/scripts/superpmi.py +++ b/src/coreclr/scripts/superpmi.py @@ -36,13 +36,13 @@ import zipfile from coreclr_arguments import * +from jitutil import TempDir, ChangeDir, remove_prefix, is_zero_length_file, is_nonzero_length_file, \ + make_safe_filename, find_file, download_one_url, download_files, report_azure_error, \ + require_azure_storage_libraries, authenticate_using_azure, \ + create_unique_directory_name, create_unique_file_name, get_files_from_path locale.setlocale(locale.LC_ALL, '') # Use '' for auto, or force e.g. to 'en_US.UTF-8' -# Decide if we're going to download and enumerate Azure Storage using anonymous -# read access and urllib functions (False), or Azure APIs including authentication (True). -authenticate_using_azure = False - ################################################################################ # Azure Storage information ################################################################################ @@ -237,8 +237,9 @@ core_root_parser.add_argument("-log_level", help=log_level_help) core_root_parser.add_argument("-log_file", help=log_file_help) core_root_parser.add_argument("-spmi_location", help=spmi_location_help) +core_root_parser.add_argument("--no_progress", action="store_true", help=download_no_progress_help) -# Create a set of arguments common to target specification. Used for replay, upload, upload-private, download, list-collections. +# Create a set of arguments common to target specification. Used for collect, replay, asmdiffs, upload, upload-private, download, list-collections. target_parser = argparse.ArgumentParser(add_help=False) @@ -342,7 +343,6 @@ download_parser.add_argument("-jit_ee_version", help=jit_ee_version_help) download_parser.add_argument("--skip_cleanup", action="store_true", help=skip_cleanup_help) download_parser.add_argument("--force_download", action="store_true", help=force_download_help) -download_parser.add_argument("--no_progress", action="store_true", help=download_no_progress_help) download_parser.add_argument("-mch_files", metavar="MCH_FILE", nargs='+', help=replay_mch_files_help) download_parser.add_argument("-private_store", action="append", help=private_store_help) @@ -363,345 +363,6 @@ ################################################################################ # Helper functions ################################################################################ - -def remove_prefix(text, prefix): - """ Helper method to remove a prefix `prefix` from a string `text` - """ - if text.startswith(prefix): - return text[len(prefix):] - return text - -# Have we checked whether we have the Azure Storage libraries yet? -azure_storage_libraries_check = False - - -def require_azure_storage_libraries(need_azure_storage_blob=True, need_azure_identity=True): - """ Check for and import the Azure libraries. - We do this lazily, only when we decide we're actually going to need them. - Once we've done it once, we don't do it again. - """ - global azure_storage_libraries_check, BlobServiceClient, BlobClient, ContainerClient, AzureCliCredential - - if azure_storage_libraries_check: - return - - azure_storage_libraries_check = True - - azure_storage_blob_import_ok = True - if need_azure_storage_blob: - try: - from azure.storage.blob import BlobServiceClient, BlobClient, ContainerClient - except: - azure_storage_blob_import_ok = False - - azure_identity_import_ok = True - if need_azure_identity: - try: - from azure.identity import AzureCliCredential - except: - azure_identity_import_ok = False - - if not azure_storage_blob_import_ok or not azure_identity_import_ok: - logging.error("One or more required Azure Storage packages is missing.") - logging.error("") - logging.error("Please install:") - logging.error(" pip install azure-storage-blob azure-identity") - logging.error("or (Windows):") - logging.error(" py -3 -m pip install azure-storage-blob azure-identity") - logging.error("See also https://docs.microsoft.com/en-us/azure/storage/blobs/storage-quickstart-blobs-python") - raise RuntimeError("Missing Azure Storage package.") - - # The Azure packages spam all kinds of output to the logging channels. - # Restrict this to only ERROR and CRITICAL. - for name in logging.Logger.manager.loggerDict.keys(): - if 'azure' in name: - logging.getLogger(name).setLevel(logging.ERROR) - - -def download_progress_hook(count, block_size, total_size): - """ A hook for urlretrieve to report download progress - - Args: - count (int) : current block index - block_size (int) : size of a block - total_size (int) : total size of a payload - """ - sys.stdout.write("\rDownloading {0:.1f}/{1:.1f} MB...".format(min(count * block_size, total_size) / 1024 / 1024, total_size / 1024 / 1024)) - sys.stdout.flush() - - -def download_with_progress_urlretrieve(uri, target_location, fail_if_not_found=True, display_progress=True): - """ Do an URI download using urllib.request.urlretrieve with a progress hook. - - Args: - uri (string) : URI to download - target_location (string) : local path to put the downloaded object - fail_if_not_found (bool) : if True, fail if a download fails due to file not found (HTTP error 404). - Otherwise, ignore the failure. - - Returns True if successful, False on failure - """ - logging.info("Download: %s -> %s", uri, target_location) - - ok = True - try: - progress_display_method = download_progress_hook if display_progress else None - urllib.request.urlretrieve(uri, target_location, reporthook=progress_display_method) - except urllib.error.HTTPError as httperror: - if (httperror == 404) and fail_if_not_found: - logging.error("HTTP 404 error") - raise httperror - ok = False - - sys.stdout.write("\n") # Add newline after progress hook - return ok - - -def report_azure_error(): - """ Report an Azure error - """ - logging.error("A problem occurred accessing Azure. Are you properly authenticated using the Azure CLI?") - logging.error("Install the Azure CLI from https://docs.microsoft.com/en-us/cli/azure/install-azure-cli.") - logging.error("Then log in to Azure using `az login`.") - - -def download_with_azure(uri, target_location, fail_if_not_found=True): - """ Do an URI download using Azure blob storage API. Compared to urlretrieve, - there is no progress hook. Maybe if converted to use the async APIs we - could have some kind of progress? - - Args: - uri (string) : URI to download - target_location (string) : local path to put the downloaded object - fail_if_not_found (bool) : if True, fail if a download fails due to file not found (HTTP error 404). - Otherwise, ignore the failure. - - Returns True if successful, False on failure - """ - - require_azure_storage_libraries() - - logging.info("Download: %s -> %s", uri, target_location) - - ok = True - az_credential = AzureCliCredential() - blob = BlobClient.from_blob_url(uri, credential=az_credential) - with open(target_location, "wb") as my_blob: - try: - download_stream = blob.download_blob(retry_total=0) - try: - my_blob.write(download_stream.readall()) - except Exception as ex1: - logging.error("Error writing data to %s", target_location) - report_azure_error() - ok = False - except Exception as ex2: - logging.error("Azure error downloading %s", uri) - report_azure_error() - ok = False - - if not ok and fail_if_not_found: - raise RuntimeError("Azure failed to download") - return ok - - -def download_one_url(uri, target_location, fail_if_not_found=True, display_progress=True): - """ Do an URI download using urllib.request.urlretrieve or Azure Storage APIs. - - Args: - uri (string) : URI to download - target_location (string) : local path to put the downloaded object - fail_if_not_found (bool) : if True, fail if a download fails due to file not found (HTTP error 404). - Otherwise, ignore the failure. - - Returns True if successful, False on failure - """ - if authenticate_using_azure: - return download_with_azure(uri, target_location, fail_if_not_found) - else: - return download_with_progress_urlretrieve(uri, target_location, fail_if_not_found, display_progress) - - -def is_zero_length_file(fpath): - """ Determine if a file system path refers to an existing file that is zero length - - Args: - fpath (str) : file system path to test - - Returns: - bool : true if the path is an existing file that is zero length - """ - return os.path.isfile(fpath) and os.stat(fpath).st_size == 0 - - -def is_nonzero_length_file(fpath): - """ Determine if a file system path refers to an existing file that is non-zero length - - Args: - fpath (str) : file system path to test - - Returns: - bool : true if the path is an existing file that is non-zero length - """ - return os.path.isfile(fpath) and os.stat(fpath).st_size != 0 - - -def make_safe_filename(s): - """ Turn a string into a string usable as a single file name component; replace illegal characters with underscores. - - Args: - s (str) : string to convert to a file name - - Returns: - (str) : The converted string - """ - def safe_char(c): - if c.isalnum(): - return c - else: - return "_" - return "".join(safe_char(c) for c in s) - - -def find_in_path(name, pathlist, match_func=os.path.isfile): - """ Find a name (e.g., directory name or file name) in the file system by searching the directories - in a `pathlist` (e.g., PATH environment variable that has been semi-colon - split into a list). - - Args: - name (str) : name to search for - pathlist (list) : list of directory names to search - match_func (str -> bool) : determines if the name is a match - - Returns: - (str) The pathname of the object, or None if not found. - """ - for dirname in pathlist: - candidate = os.path.join(dirname, name) - if match_func(candidate): - return candidate - return None - - -def find_file(filename, pathlist): - """ Find a filename in the file system by searching the directories - in a `pathlist` (e.g., PATH environment variable that has been semi-colon - split into a list). - - Args: - filename (str) : name to search for - pathlist (list) : list of directory names to search - - Returns: - (str) The pathname of the object, or None if not found. - """ - return find_in_path(filename, pathlist) - - -def find_dir(dirname, pathlist): - """ Find a directory name in the file system by searching the directories - in a `pathlist` (e.g., PATH environment variable that has been semi-colon - split into a list). - - Args: - dirname (str) : name to search for - pathlist (list) : list of directory names to search - - Returns: - (str) The pathname of the object, or None if not found. - """ - return find_in_path(dirname, pathlist, match_func=os.path.isdir) - - -def create_unique_directory_name(root_directory, base_name): - """ Create a unique directory name by joining `root_directory` and `base_name`. - If this name already exists, append ".1", ".2", ".3", etc., to the final - name component until the full directory name is not found. - - Args: - root_directory (str) : root directory in which a new directory will be created - base_name (str) : the base name of the new directory name component to be added - - Returns: - (str) The full absolute path of the new directory. The directory has been created. - """ - - root_directory = os.path.abspath(root_directory) - full_path = os.path.join(root_directory, base_name) - - count = 1 - while os.path.isdir(full_path): - new_full_path = os.path.join(root_directory, base_name + "." + str(count)) - count += 1 - full_path = new_full_path - - os.makedirs(full_path) - return full_path - - -def create_unique_file_name(directory, base_name, extension): - """ Create a unique file name in the specified directory by joining `base_name` and `extension`. - If this name already exists, append ".1", ".2", ".3", etc., to the `base_name` - name component until the full file name is not found. - - Args: - directory (str) : directory in which a new file will be created - base_name (str) : the base name of the new filename to be added - extension (str) : the filename extension of the new filename to be added - - Returns: - (str) The full absolute path of the new filename. - """ - - directory = os.path.abspath(directory) - if not os.path.isdir(directory): - try: - os.makedirs(directory) - except Exception as exception: - logging.critical(exception) - raise exception - - full_path = os.path.join(directory, base_name + "." + extension) - - count = 1 - while os.path.isfile(full_path): - new_full_path = os.path.join(directory, base_name + "." + str(count) + "." + extension) - count += 1 - full_path = new_full_path - - return full_path - - -def get_files_from_path(path, match_func=lambda path: True): - """ Return all files in a directory tree matching a criteria. - - Args: - path (str) : Either a single file to include, or a directory to traverse looking for matching - files. - match_func (str -> bool) : Criteria function determining if a file is added to the list - - Returns: - Array of absolute paths of matching files - """ - - if not(os.path.isdir(path) or os.path.isfile(path)): - logging.warning("Warning: \"%s\" is not a file or directory", path) - return [] - - path = os.path.abspath(path) - - files = [] - - if os.path.isdir(path): - for item in os.listdir(path): - files += get_files_from_path(os.path.join(path, item), match_func) - else: - if match_func(path): - files.append(path) - - return files - - def run_and_log(command, log_level=logging.DEBUG): """ Return a command and log its output to the debug logger @@ -788,19 +449,6 @@ def create_artifacts_base_name(coreclr_args, mch_file): return artifacts_base_name -def is_url(path): - """ Return True if this looks like a URL - - Args: - path (str) : name to check - - Returns: - True it it looks like an URL, False otherwise. - """ - # Probably could use urllib.parse to be more precise. - # If it doesn't look like an URL, treat it like a file, possibly a UNC file. - return path.lower().startswith("http:") or path.lower().startswith("https:") - def read_csv_metrics(path): """ Read a metrics summary file produced by superpmi, and return the single row containing the information as a dictionary. @@ -822,47 +470,6 @@ def read_csv_metrics(path): # Helper classes ################################################################################ - -class TempDir: - """ Class to create a temporary working directory, or use one that is passed as an argument. - - Use with: "with TempDir() as temp_dir" to change to that directory and then automatically - change back to the original working directory afterwards and remove the temporary - directory and its contents (if skip_cleanup is False). - """ - - def __init__(self, path=None, skip_cleanup=False): - self.mydir = tempfile.mkdtemp() if path is None else path - self.cwd = None - self._skip_cleanup = skip_cleanup - - def __enter__(self): - self.cwd = os.getcwd() - os.chdir(self.mydir) - return self.mydir - - def __exit__(self, exc_type, exc_val, exc_tb): - os.chdir(self.cwd) - if not self._skip_cleanup: - shutil.rmtree(self.mydir) - - -class ChangeDir: - """ Class to temporarily change to a given directory. Use with "with". - """ - - def __init__(self, mydir): - self.mydir = mydir - self.cwd = None - - def __enter__(self): - self.cwd = os.getcwd() - os.chdir(self.mydir) - - def __exit__(self, exc_type, exc_val, exc_tb): - os.chdir(self.cwd) - - class AsyncSubprocessHelper: """ Class to help with async multiprocessing tasks. """ @@ -2015,11 +1622,11 @@ async def create_one_artifact(jit_path: str, location: str, flags) -> str: if current_text_diff is not None: logging.info("Textual differences found in generated asm.") - # Find jit-analyze.bat/sh on PATH, if it exists, then invoke it. + # Find jit-analyze on PATH, if it exists, then invoke it. ran_jit_analyze = False path_var = os.environ.get("PATH") if path_var is not None: - jit_analyze_file = "jit-analyze.bat" if platform.system() == "Windows" else "jit-analyze.sh" + jit_analyze_file = "jit-analyze.exe" if platform.system() == "Windows" else "jit-analyze" jit_analyze_path = find_file(jit_analyze_file, path_var.split(os.pathsep)) if jit_analyze_path is not None: # It appears we have a built jit-analyze on the path, so try to run it. @@ -2167,7 +1774,8 @@ def determine_coredis_tools(coreclr_args): logging.warning("Warning: Core_Root does not exist at \"%s\"; creating it now", coreclr_args.core_root) os.makedirs(coreclr_args.core_root) coredistools_uri = az_blob_storage_superpmi_container_uri + "/libcoredistools/{}-{}/{}".format(coreclr_args.host_os.lower(), coreclr_args.arch.lower(), coredistools_dll_name) - download_one_url(coredistools_uri, coredistools_location) + skip_progress = hasattr(coreclr_args, 'no_progress') and coreclr_args.no_progress + download_one_url(coredistools_uri, coredistools_location, is_azure_storage=True, display_progress=not skip_progress) assert os.path.isfile(coredistools_location) return coredistools_location @@ -2203,7 +1811,8 @@ def determine_pmi_location(coreclr_args): logging.info("Using PMI found at %s", pmi_location) else: pmi_uri = az_blob_storage_superpmi_container_uri + "/pmi/pmi.dll" - download_one_url(pmi_uri, pmi_location) + skip_progress = hasattr(coreclr_args, 'no_progress') and coreclr_args.no_progress + download_one_url(pmi_uri, pmi_location, is_azure_storage=True, display_progress=not skip_progress) assert os.path.isfile(pmi_location) return pmi_location @@ -2629,7 +2238,7 @@ def filter_local_path(path): # Download all the urls at once, and add the local cache filenames to our accumulated list of local file names. skip_progress = hasattr(coreclr_args, 'no_progress') and coreclr_args.no_progress if len(urls) != 0: - local_mch_files += download_files(urls, mch_cache_dir, display_progress=not skip_progress) + local_mch_files += download_files(urls, mch_cache_dir, is_azure_storage=True, display_progress=not skip_progress) # Special case: walk the URLs list and for every ".mch" or ".mch.zip" file, check to see that either the associated ".mct" file is already # in the list, or add it to a new list to attempt to download (but don't fail the download if it doesn't exist). @@ -2640,7 +2249,7 @@ def filter_local_path(path): if mct_url not in urls: mct_urls.append(mct_url) if len(mct_urls) != 0: - local_mch_files += download_files(mct_urls, mch_cache_dir, fail_if_not_found=False, display_progress=not skip_progress) + local_mch_files += download_files(mct_urls, mch_cache_dir, fail_if_not_found=False, is_azure_storage=True, display_progress=not skip_progress) # Even though we might have downloaded MCT files, only return the set of MCH files. local_mch_files = [file for file in local_mch_files if any(file.lower().endswith(extension) for extension in [".mch"])] @@ -2736,94 +2345,7 @@ def filter_superpmi_collections(path): urls = [blob_url_prefix + path for path in paths] skip_progress = hasattr(coreclr_args, 'no_progress') and coreclr_args.no_progress - return download_files(urls, target_dir, display_progress=not skip_progress) - -def download_files(paths, target_dir, verbose=True, fail_if_not_found=True, display_progress=True): - """ Download a set of files, specified as URLs or paths (such as Windows UNC paths), - to a target directory. If a file is a .ZIP file, then uncompress the file and - copy all its contents to the target directory. - - Args: - paths (list): the URLs and paths to download - target_dir (str): target directory where files are copied. - verbse (bool): if True, do verbose logging. - fail_if_not_found (bool): if True, fail if a download fails due to file not found (HTTP error 404). - Otherwise, ignore the failure. - - Returns: - list of full paths of local filenames of downloaded files in the target directory - """ - - if len(paths) == 0: - logging.warning("No files specified to download") - return None - - if verbose: - logging.info("Downloading:") - for item_path in paths: - logging.info(" %s", item_path) - - # Create the target directory now, if it doesn't already exist. - target_dir = os.path.abspath(target_dir) - if not os.path.isdir(target_dir): - os.makedirs(target_dir) - - local_paths = [] - - # In case we'll need a temp directory for ZIP file processing, create it first. - with TempDir() as temp_location: - for item_path in paths: - is_item_url = is_url(item_path) - item_name = item_path.split("/")[-1] if is_item_url else os.path.basename(item_path) - - if item_path.lower().endswith(".zip"): - # Delete everything in the temp_location (from previous iterations of this loop, so previous URL downloads). - temp_location_items = [os.path.join(temp_location, item) for item in os.listdir(temp_location)] - for item in temp_location_items: - if os.path.isdir(item): - shutil.rmtree(item) - else: - os.remove(item) - - download_path = os.path.join(temp_location, item_name) - if is_item_url: - ok = download_one_url(item_path, download_path, fail_if_not_found, display_progress) - if not ok: - continue - else: - if fail_if_not_found or os.path.isfile(item_path): - if verbose: - logging.info("Download: %s -> %s", item_path, download_path) - shutil.copy2(item_path, download_path) - - if verbose: - logging.info("Uncompress %s", download_path) - with zipfile.ZipFile(download_path, "r") as file_handle: - file_handle.extractall(temp_location) - - # Copy everything that was extracted to the target directory. - items = [ os.path.join(temp_location, item) for item in os.listdir(temp_location) if not item.endswith(".zip") ] - for item in items: - target_path = os.path.join(target_dir, os.path.basename(item)) - if verbose: - logging.info("Copy %s -> %s", item, target_path) - shutil.copy2(item, target_dir) - local_paths.append(target_path) - else: - # Not a zip file; download directory to target directory - download_path = os.path.join(target_dir, item_name) - if is_item_url: - ok = download_one_url(item_path, download_path, fail_if_not_found, display_progress) - if not ok: - continue - else: - if fail_if_not_found or os.path.isfile(item_path): - if verbose: - logging.info("Download: %s -> %s", item_path, download_path) - shutil.copy2(item_path, download_path) - local_paths.append(download_path) - - return local_paths + return download_files(urls, target_dir, is_azure_storage=True, display_progress=not skip_progress) def upload_mch(coreclr_args): @@ -3218,7 +2740,7 @@ def process_base_jit_path_arg(coreclr_args): blob_folder_name = "{}/{}/{}/{}/{}/{}".format(az_builds_root_folder, git_hash, coreclr_args.host_os, coreclr_args.arch, coreclr_args.build_type, jit_name) blob_uri = "{}/{}".format(az_blob_storage_jitrollingbuild_container_uri, blob_folder_name) urls = [ blob_uri ] - local_files = download_files(urls, basejit_dir, verbose=False, fail_if_not_found=False) + local_files = download_files(urls, basejit_dir, verbose=False, is_azure_storage=True, fail_if_not_found=False) if len(local_files) > 0: if hashnum > 1: @@ -3298,6 +2820,11 @@ def setup_spmi_location_arg(spmi_location): "Unable to set spmi_location", modify_arg=setup_spmi_location_arg) + coreclr_args.verify(args, + "no_progress", + lambda unused: True, + "Unable to set no_progress") + # Finish setting up logging. # The spmi_location is the root directory where we put the log file. # Log everything to the log file and only the specified verbosity to the console logger. @@ -3845,11 +3372,6 @@ def verify_replay_common_args(): lambda unused: True, "Unable to set force_download") - coreclr_args.verify(args, - "no_progress", - lambda unused: True, - "Unable to set no_progress") - coreclr_args.verify(args, "filter", lambda unused: True, diff --git a/src/coreclr/scripts/superpmi_asmdiffs.py b/src/coreclr/scripts/superpmi_asmdiffs.py new file mode 100644 index 0000000000000..aca3fe959f363 --- /dev/null +++ b/src/coreclr/scripts/superpmi_asmdiffs.py @@ -0,0 +1,193 @@ +#!/usr/bin/env python3 +# +# Licensed to the .NET Foundation under one or more agreements. +# The .NET Foundation licenses this file to you under the MIT license. +# +# Title : superpmi_asmdiffs.py +# +# Notes: +# +# Script to run "superpmi asmdiffs" for various collections on the Helix machines. +# +################################################################################ +################################################################################ + +import argparse +import os +import shutil +from coreclr_arguments import * +from jitutil import run_command + +parser = argparse.ArgumentParser(description="description") + +parser.add_argument("-arch", help="Architecture") +parser.add_argument("-platform", help="OS platform") +parser.add_argument("-base_jit_directory", help="path to the directory containing base clrjit binaries") +parser.add_argument("-diff_jit_directory", help="path to the directory containing diff clrjit binaries") +parser.add_argument("-log_directory", help="path to the directory containing superpmi log files") + +def setup_args(args): + """ Setup the args for SuperPMI to use. + + Args: + args (ArgParse): args parsed by arg parser + + Returns: + args (CoreclrArguments) + + """ + coreclr_args = CoreclrArguments(args, require_built_core_root=False, require_built_product_dir=False, + require_built_test_dir=False, default_build_type="Checked") + + coreclr_args.verify(args, + "arch", + lambda unused: True, + "Unable to set arch") + + coreclr_args.verify(args, + "platform", + lambda unused: True, + "Unable to set platform") + + coreclr_args.verify(args, + "base_jit_directory", + lambda jit_directory: os.path.isdir(jit_directory), + "base_jit_directory doesn't exist") + + coreclr_args.verify(args, + "diff_jit_directory", + lambda jit_directory: os.path.isdir(jit_directory), + "diff_jit_directory doesn't exist") + + coreclr_args.verify(args, + "log_directory", + lambda log_directory: True, + "log_directory doesn't exist") + + return coreclr_args + + +def main(main_args): + """ Run superpmi asmdiffs process on the Helix machines. + + See superpmi_asmdiffs_setup.py for how the directory structure is set up in the + correlation payload. This script lives in the root of that directory tree. + + Args: + main_args ([type]): Arguments to the script + """ + + python_path = sys.executable + script_dir = os.path.abspath(os.path.dirname(os.path.realpath(__file__))) + coreclr_args = setup_args(main_args) + + # It doesn't really matter where we put the downloaded SPMI artifacts. + # Here, they are put in /artifacts/spmi. + spmi_location = os.path.join(script_dir, "artifacts", "spmi") + + log_directory = coreclr_args.log_directory + platform_name = coreclr_args.platform + + # Find the built jit-analyze and put its directory on the PATH + jit_analyze_dir = os.path.join(script_dir, "jit-analyze") + if not os.path.isdir(jit_analyze_dir): + print("Error: jit-analyze not found in {} (continuing)".format(jit_analyze_dir)) + else: + # Put the jit-analyze directory on the PATH so superpmi.py can find it. + print("Adding {} to PATH".format(jit_analyze_dir)) + os.environ["PATH"] = jit_analyze_dir + os.pathsep + os.environ["PATH"] + + # Find the portable `git` installation, and put `git.exe` on the PATH, for use by `jit-analyze`. + git_directory = os.path.join(script_dir, "git", "cmd") + git_exe_tool = os.path.join(git_directory, "git.exe") + if not os.path.isfile(git_exe_tool): + print("Error: `git` not found at {} (continuing)".format(git_exe_tool)) + else: + # Put the git/cmd directory on the PATH so jit-analyze can find it. + print("Adding {} to PATH".format(git_directory)) + os.environ["PATH"] = git_directory + os.pathsep + os.environ["PATH"] + + # Figure out which JITs to use + os_name = "win" if platform_name.lower() == "windows" else "unix" + arch_name = coreclr_args.arch + host_arch_name = "x64" if arch_name.endswith("64") else "x86" + os_name = "universal" if arch_name.startswith("arm") else os_name + base_jit_path = os.path.join(coreclr_args.base_jit_directory, 'clrjit_{}_{}_{}.dll'.format(os_name, arch_name, host_arch_name)) + diff_jit_path = os.path.join(coreclr_args.diff_jit_directory, 'clrjit_{}_{}_{}.dll'.format(os_name, arch_name, host_arch_name)) + + # Core_Root is where the superpmi tools (superpmi.exe, mcs.exe) are expected to be found. + # We pass the full path of the JITs to use as arguments. + core_root_dir = script_dir + + print("Running superpmi.py download to get MCH files") + + log_file = os.path.join(log_directory, "superpmi_download_{}_{}.log".format(platform_name, arch_name)) + run_command([ + python_path, + os.path.join(script_dir, "superpmi.py"), + "download", + "--no_progress", + "-core_root", core_root_dir, + "-target_os", platform_name, + "-target_arch", arch_name, + "-spmi_location", spmi_location, + "-log_level", "debug", + "-log_file", log_file + ], _exit_on_fail=True) + + print("Running superpmi.py asmdiffs") + log_file = os.path.join(log_directory, "superpmi_{}_{}.log".format(platform_name, arch_name)) + + overall_md_summary_file = os.path.join(spmi_location, "diff_summary.md") + if os.path.isfile(overall_md_summary_file): + os.remove(overall_md_summary_file) + + _, _, return_code = run_command([ + python_path, + os.path.join(script_dir, "superpmi.py"), + "asmdiffs", + "--no_progress", + "-core_root", core_root_dir, + "-target_os", platform_name, + "-target_arch", arch_name, + "-arch", host_arch_name, + "-base_jit_path", base_jit_path, + "-diff_jit_path", diff_jit_path, + "-spmi_location", spmi_location, + "-error_limit", "100", + "-log_level", "debug", + "-log_file", log_file]) + + # If there are asm diffs, and jit-analyze ran, we'll get a diff_summary.md file in the spmi_location directory. + # We make sure the file doesn't exist before we run diffs, so we don't need to worry about superpmi.py creating + # a unique, numbered file. If there are no diffs, we still want to create this file and indicate there were no diffs. + + overall_md_summary_file_target = os.path.join(log_directory, "superpmi_diff_summary_{}_{}.md".format(platform_name, arch_name)) + if os.path.isfile(overall_md_summary_file): + try: + print("Copying summary file {} -> {}".format(overall_md_summary_file, overall_md_summary_file_target)) + shutil.copy2(overall_md_summary_file, overall_md_summary_file_target) + except PermissionError as pe_error: + print('Ignoring PermissionError: {0}'.format(pe_error)) + else: + # Write a basic summary file. Ideally, we should not generate a summary.md file. However, currently I'm seeing + # errors where the Helix work item fails to upload this specified file if it doesn't exist. We should change the + # upload to be conditional, or otherwise not error. + with open(overall_md_summary_file_target, "a") as f: + f.write("""\ +No diffs found +""") + + # TODO: the superpmi.py asmdiffs command returns a failure code if there are MISSING data even if there are + # no asm diffs. We should probably only fail if there are actual failures (not MISSING or asm diffs). + + if return_code != 0: + print("Failure in {}".format(log_file)) + return 1 + + return 0 + + +if __name__ == "__main__": + args = parser.parse_args() + sys.exit(main(args)) diff --git a/src/coreclr/scripts/superpmi_asmdiffs_setup.py b/src/coreclr/scripts/superpmi_asmdiffs_setup.py new file mode 100644 index 0000000000000..6602708e67335 --- /dev/null +++ b/src/coreclr/scripts/superpmi_asmdiffs_setup.py @@ -0,0 +1,255 @@ +#!/usr/bin/env python3 +# +# Licensed to the .NET Foundation under one or more agreements. +# The .NET Foundation licenses this file to you under the MIT license. +# +# Title : superpmi_asmdiffs_setup.py +# +# Notes: +# +# Script to setup the directory structure required to perform SuperPMI asmdiffs in CI. +# It creates `correlation_payload_directory` with `base` and `diff` directories +# that contain clrjit*.dll. It figures out the baseline commit hash to use for +# a particular GitHub pull request, and downloads the JIT rolling build for that +# commit hash. It downloads the jitutils repo and builds the jit-analyze tool. It +# downloads a version of `git` to be used by jit-analyze. +# +################################################################################ +################################################################################ + +import argparse +import logging +import os + +from coreclr_arguments import * +from jitutil import copy_directory, set_pipeline_variable, run_command, TempDir, download_files + +parser = argparse.ArgumentParser(description="description") + +parser.add_argument("-arch", help="Architecture") +parser.add_argument("-source_directory", help="Path to the root directory of the dotnet/runtime source tree") +parser.add_argument("-product_directory", help="Path to the directory containing built binaries (e.g., /artifacts/bin/coreclr/windows.x64.Checked)") + +is_windows = platform.system() == "Windows" + + +def setup_args(args): + """ Setup the args for SuperPMI to use. + + Args: + args (ArgParse): args parsed by arg parser + + Returns: + args (CoreclrArguments) + + """ + coreclr_args = CoreclrArguments(args, require_built_core_root=False, require_built_product_dir=False, + require_built_test_dir=False, default_build_type="Checked") + + coreclr_args.verify(args, + "arch", + lambda unused: True, + "Unable to set arch") + + coreclr_args.verify(args, + "source_directory", + lambda source_directory: os.path.isdir(source_directory), + "source_directory doesn't exist") + + coreclr_args.verify(args, + "product_directory", + lambda product_directory: os.path.isdir(product_directory), + "product_directory doesn't exist") + + return coreclr_args + + +def match_jit_files(full_path): + """ Match all the JIT files that we want to copy and use. + Note that we currently only match Windows files, and not osx cross-compile files. + We also don't copy the "default" clrjit.dll, since we always use the fully specified + JITs, e.g., clrjit_win_x86_x86.dll. + """ + file_name = os.path.basename(full_path) + + if file_name.startswith("clrjit_") and file_name.endswith(".dll") and file_name.find("osx") == -1: + return True + + return False + + +def match_superpmi_tool_files(full_path): + """ Match all the SuperPMI tool files that we want to copy and use. + Note that we currently only match Windows files. + """ + file_name = os.path.basename(full_path) + + if file_name == "superpmi.exe" or file_name == "mcs.exe": + return True + + return False + + +def main(main_args): + """ Prepare the Helix data for SuperPMI asmdiffs Azure DevOps pipeline. + + The Helix correlation payload directory is created and populated as follows: + + \payload -- the correlation payload directory + -- contains the *.py scripts from \src\coreclr\scripts + -- contains superpmi.exe, mcs.exe from the target-specific build + \payload\base + -- contains the baseline JITs + \payload\diff + -- contains the diff JITs + \payload\jit-analyze + -- contains the self-contained jit-analyze build (from dotnet/jitutils) + \payload\git + -- contains a Portable ("xcopy installable") `git` tool, downloaded from: + https://netcorenativeassets.blob.core.windows.net/resource-packages/external/windows/git/Git-2.32.0-64-bit.zip + This is needed by jit-analyze to do `git diff` on the generated asm. The `\payload\git\cmd` + directory is added to the PATH. + NOTE: this only runs on Windows. + + Then, AzDO pipeline variables are set. + + Args: + main_args ([type]): Arguments to the script + """ + + # Set up logging. + logger = logging.getLogger() + logger.setLevel(logging.INFO) + stream_handler = logging.StreamHandler(sys.stdout) + stream_handler.setLevel(logging.INFO) + logger.addHandler(stream_handler) + + coreclr_args = setup_args(main_args) + + arch = coreclr_args.arch + source_directory = coreclr_args.source_directory + product_directory = coreclr_args.product_directory + + python_path = sys.executable + + # CorrelationPayload directories + correlation_payload_directory = os.path.join(source_directory, "payload") + superpmi_scripts_directory = os.path.join(source_directory, 'src', 'coreclr', 'scripts') + base_jit_directory = os.path.join(correlation_payload_directory, "base") + diff_jit_directory = os.path.join(correlation_payload_directory, "diff") + jit_analyze_build_directory = os.path.join(correlation_payload_directory, "jit-analyze") + git_directory = os.path.join(correlation_payload_directory, "git") + + ######## Get the portable `git` package + + git_url = "https://netcorenativeassets.blob.core.windows.net/resource-packages/external/windows/git/Git-2.32.0-64-bit.zip" + + print('Downloading {} -> {}'.format(git_url, git_directory)) + + urls = [ git_url ] + # There are too many files to be verbose in the download and copy. + download_files(urls, git_directory, verbose=False, display_progress=False) + git_exe_tool = os.path.join(git_directory, "cmd", "git.exe") + if not os.path.isfile(git_exe_tool): + print('Error: `git` not found at {}'.format(git_exe_tool)) + + ######## Get SuperPMI python scripts + + # Copy *.py to CorrelationPayload + print('Copying {} -> {}'.format(superpmi_scripts_directory, correlation_payload_directory)) + copy_directory(superpmi_scripts_directory, correlation_payload_directory, verbose_copy=True, + match_func=lambda path: any(path.endswith(extension) for extension in [".py"])) + + ######## Get baseline JIT + + # Figure out which baseline JIT to use, and download it. + if not os.path.exists(base_jit_directory): + os.makedirs(base_jit_directory) + + print("Fetching history of `main` branch so we can find the baseline JIT") + run_command(["git", "fetch", "origin", "main"], source_directory, _exit_on_fail=True) + + # Note: we only support downloading Windows versions of the JIT currently. To support downloading + # non-Windows JITs on a Windows machine, pass `-host_os ` to jitrollingbuild.py. + print("Running jitrollingbuild.py download to get baseline") + _, _, return_code = run_command([ + python_path, + os.path.join(superpmi_scripts_directory, "jitrollingbuild.py"), + "download", + "-arch", arch, + "-target_dir", base_jit_directory], + source_directory) + + ######## Get diff JIT + + print('Copying diff binaries {} -> {}'.format(product_directory, diff_jit_directory)) + copy_directory(product_directory, diff_jit_directory, verbose_copy=True, match_func=match_jit_files) + + ######## Get SuperPMI tools + + # Put the SuperPMI tools directly in the root of the correlation payload directory. + print('Copying SuperPMI tools {} -> {}'.format(product_directory, correlation_payload_directory)) + copy_directory(product_directory, correlation_payload_directory, verbose_copy=True, match_func=match_superpmi_tool_files) + + ######## Clone and build jitutils: we only need jit-analyze + + try: + with TempDir() as jitutils_directory: + run_command( + ["git", "clone", "--quiet", "--depth", "1", "https://github.com/dotnet/jitutils", jitutils_directory]) + + # Make sure ".dotnet" directory exists, by running the script at least once + dotnet_script_name = "dotnet.cmd" if is_windows else "dotnet.sh" + dotnet_script_path = os.path.join(source_directory, dotnet_script_name) + run_command([dotnet_script_path, "--info"], jitutils_directory) + + # Build jit-analyze only, and build it as a self-contained app (not framework-dependent). + # What target RID are we building? It depends on where we're going to run this code. + # The RID catalog is here: https://docs.microsoft.com/en-us/dotnet/core/rid-catalog. + # Windows x64 => win-x64 + # Windows x86 => win-x86 + # Windows arm32 => win-arm + # Windows arm64 => win-arm64 + # Linux x64 => linux-x64 + # Linux arm32 => linux-arm + # Linux arm64 => linux-arm64 + # macOS x64 => osx-x64 + + # NOTE: we currently only support running on Windows x86/x64 (we don't pass the target OS) + RID = None + if arch == "x86": + RID = "win-x86" + if arch == "x64": + RID = "win-x64" + + # Set dotnet path to run build + os.environ["PATH"] = os.path.join(source_directory, ".dotnet") + os.pathsep + os.environ["PATH"] + + run_command([ + "dotnet", + "publish", + "-c", "Release", + "--runtime", RID, + "--self-contained", + "--output", jit_analyze_build_directory, + os.path.join(jitutils_directory, "src", "jit-analyze", "jit-analyze.csproj")], + jitutils_directory) + except PermissionError as pe_error: + # Details: https://bugs.python.org/issue26660 + print('Ignoring PermissionError: {0}'.format(pe_error)) + + ######## Set pipeline variables + + helix_source_prefix = "official" + creator = "" + + print('Setting pipeline variables:') + set_pipeline_variable("CorrelationPayloadDirectory", correlation_payload_directory) + set_pipeline_variable("Architecture", arch) + set_pipeline_variable("Creator", creator) + set_pipeline_variable("HelixSourcePrefix", helix_source_prefix) + + +if __name__ == "__main__": + args = parser.parse_args() + sys.exit(main(args)) diff --git a/src/coreclr/scripts/superpmi_asmdiffs_summarize.py b/src/coreclr/scripts/superpmi_asmdiffs_summarize.py new file mode 100644 index 0000000000000..9601d91e56b60 --- /dev/null +++ b/src/coreclr/scripts/superpmi_asmdiffs_summarize.py @@ -0,0 +1,176 @@ +#!/usr/bin/env python3 +# +## Licensed to the .NET Foundation under one or more agreements. +## The .NET Foundation licenses this file to you under the MIT license. +# +## +# Title: superpmi_asmdiffs_summarize.py +# +# Notes: +# +# Script to summarize issues found from all partitions and print them on console. +# +################################################################################ +################################################################################ + +import argparse +import os +import re +from coreclr_arguments import * + +parser = argparse.ArgumentParser(description="description") + +parser.add_argument("-diff_summary_dir", help="Path to diff summary directory") +parser.add_argument("-arch", help="Architecture") + +def setup_args(args): + """ Setup the args. + + Args: + args (ArgParse): args parsed by arg parser + + Returns: + args (CoreclrArguments) + + """ + coreclr_args = CoreclrArguments(args, require_built_core_root=False, require_built_product_dir=False, + require_built_test_dir=False, default_build_type="Checked") + + coreclr_args.verify(args, + "diff_summary_dir", + lambda diff_summary_dir: os.path.isdir(diff_summary_dir), + "diff_summary_dir doesn't exist") + + coreclr_args.verify(args, + "arch", + lambda unused: True, + "Unable to set arch") + + return coreclr_args + + +def append_diff_file(f, arch, file_name, full_file_path): + """ Append a single summary file to the consolidated diff file. + + Args: + f : File we are appending to + arch (string): architecture we ran on + file_name (string): base file name of file to append (not including path components) + full_file_path (string): full path to file to append + + Returns: + True if diffs were found in the file, False otherwise + """ + + diffs_found = False + print("Appending {}".format(full_file_path)) + + # What platform is this file summarizing? We parse the filename itself, which is of the form: + # superpmi_diff_summary__.md + + diff_os = "unknown" + diff_arch = "unknown" + match_obj = re.search(r'^superpmi_diff_summary_(.*)_(.*).md', file_name) + if match_obj is not None: + diff_os = match_obj.group(1) + diff_arch = match_obj.group(2) + + with open(full_file_path, "r") as current_superpmi_md: + contents = current_superpmi_md.read() + + # Were there actually any diffs? We currently look to see if the file contains the text "No diffs found", + # inserted by `superpmi_asmdiffs.py`, instead of just not having a diff summary .md file. + # (A missing file has the same effect.) + match_obj = re.search(r'^No diffs found', contents) + if match_obj is not None: + # There were no diffs in this file; don't add it to the result + pass + else: + diffs_found = True + # Write a header for this summary, and create a
...
disclosure + # section around the file. + f.write("""\ + +## {0} {1} + +
+ +{0} {1} details + +Summary file: `{2}` + +To reproduce these diffs on Windows {3}: +``` +superpmi.py asmdiffs -target_os {0} -target_arch {1} -arch {3} +``` + +""".format(diff_os, diff_arch, file_name, arch)) + + # Now write the contents + f.write(contents) + + # Write the footer (close the
section) + f.write("""\ + +
+ +""") + + return diffs_found + + +def main(main_args): + """Main entrypoint + + Args: + main_args ([type]): Arguments to the script + """ + + coreclr_args = setup_args(main_args) + + diff_summary_dir = coreclr_args.diff_summary_dir + arch = coreclr_args.arch + + # Consolidate all superpmi_diff_summary_*.md in overall_diff_summary__.md + # (Don't name it "superpmi_xxx.md" or we might consolidate it into itself.) + # If there are no summary files found, add a "No diffs found" text to be explicit about that. + # + # Note that we currently do this summarizing in an architecture-specific job. That means that diffs run + # in a Windows x64 job and those run in a Windows x86 job will be summarized in two separate files. + # We should create a job that depends on all the diff jobs, downloads all the .md file artifacts, + # and consolidates everything together in one file. + + any_diffs_found = False + + final_md_path = os.path.join(diff_summary_dir, "overall_diff_summary_windows_{}.md".format(arch)) + print("Consolidating final {}".format(final_md_path)) + with open(final_md_path, "a") as f: + + f.write("""\ +# ASM diffs generated on Windows {} +""".format(arch)) + + for dirpath, _, files in os.walk(diff_summary_dir): + for file_name in files: + if file_name.startswith("superpmi_") and file_name.endswith(".md"): + full_file_path = os.path.join(dirpath, file_name) + if append_diff_file(f, arch, file_name, full_file_path): + any_diffs_found = True + + if not any_diffs_found: + f.write("""\ + +No diffs found +""") + + print("##vso[task.uploadsummary]{}".format(final_md_path)) + + with open(final_md_path, "r") as f: + print(f.read()) + + return 0 + + +if __name__ == "__main__": + args = parser.parse_args() + sys.exit(main(args)) diff --git a/src/coreclr/scripts/superpmi_aspnet.py b/src/coreclr/scripts/superpmi_aspnet.py index 2a0225d1e43bc..f2e01ad6e2ae5 100644 --- a/src/coreclr/scripts/superpmi_aspnet.py +++ b/src/coreclr/scripts/superpmi_aspnet.py @@ -20,7 +20,7 @@ from os import path from coreclr_arguments import * from superpmi import TempDir, determine_mcs_tool_path, determine_superpmi_tool_path, is_nonzero_length_file -from azdo_pipelines_util import run_command +from jitutil import run_command # Start of parser object creation. is_windows = platform.system() == "Windows" diff --git a/src/coreclr/scripts/superpmi_benchmarks.py b/src/coreclr/scripts/superpmi_benchmarks.py index 28ebf50c92cd8..1c15b9ae803bf 100644 --- a/src/coreclr/scripts/superpmi_benchmarks.py +++ b/src/coreclr/scripts/superpmi_benchmarks.py @@ -20,7 +20,7 @@ from os.path import isfile from shutil import copyfile from coreclr_arguments import * -from azdo_pipelines_util import run_command, ChangeDir, TempDir +from jitutil import run_command, ChangeDir, TempDir # Start of parser object creation. is_windows = platform.system() == "Windows" diff --git a/src/coreclr/scripts/superpmi_collect_setup.py b/src/coreclr/scripts/superpmi_collect_setup.py index a6a9b84e7e8ba..ce102ea091026 100644 --- a/src/coreclr/scripts/superpmi_collect_setup.py +++ b/src/coreclr/scripts/superpmi_collect_setup.py @@ -37,7 +37,7 @@ import stat from coreclr_arguments import * -from azdo_pipelines_util import run_command, copy_directory, copy_files, set_pipeline_variable, ChangeDir, TempDir +from jitutil import run_command, copy_directory, copy_files, set_pipeline_variable, ChangeDir, TempDir # Start of parser object creation. @@ -398,7 +398,7 @@ def main(main_args): # create superpmi directory print('Copying {} -> {}'.format(superpmi_src_directory, superpmi_dst_directory)) - copy_directory(superpmi_src_directory, superpmi_dst_directory, match_func=lambda path: any(path.endswith(extension) for extension in [".py"])) + copy_directory(superpmi_src_directory, superpmi_dst_directory, verbose_output=True, match_func=lambda path: any(path.endswith(extension) for extension in [".py"])) if is_windows: acceptable_copy = lambda path: any(path.endswith(extension) for extension in [".py", ".dll", ".exe", ".json"]) @@ -407,7 +407,7 @@ def main(main_args): acceptable_copy = lambda path: (os.path.basename(path).find(".") == -1) or any(path.endswith(extension) for extension in [".py", ".dll", ".so", ".json"]) print('Copying {} -> {}'.format(coreclr_args.core_root_directory, superpmi_dst_directory)) - copy_directory(coreclr_args.core_root_directory, superpmi_dst_directory, match_func=acceptable_copy) + copy_directory(coreclr_args.core_root_directory, superpmi_dst_directory, verbose_output=True, match_func=acceptable_copy) # Copy all the test files to CORE_ROOT # The reason is there are lot of dependencies with *.Tests.dll and to ensure we do not get @@ -448,7 +448,7 @@ def make_readable(folder_name): run_command(["ls", "-l", folder_name]) make_readable(coreclr_args.input_directory) - copy_directory(coreclr_args.input_directory, superpmi_dst_directory, match_func=acceptable_copy) + copy_directory(coreclr_args.input_directory, superpmi_dst_directory, verbose_output=True, match_func=acceptable_copy) # Workitem directories workitem_directory = os.path.join(source_directory, "workitem") diff --git a/src/coreclr/scripts/superpmi_replay.py b/src/coreclr/scripts/superpmi_replay.py index a00b80753b2a9..198b0f28ff940 100644 --- a/src/coreclr/scripts/superpmi_replay.py +++ b/src/coreclr/scripts/superpmi_replay.py @@ -15,7 +15,7 @@ import argparse import os from coreclr_arguments import * -from azdo_pipelines_util import run_command +from jitutil import run_command parser = argparse.ArgumentParser(description="description") diff --git a/src/coreclr/scripts/superpmi_replay_setup.py b/src/coreclr/scripts/superpmi_replay_setup.py index 73c89eb333556..b7717da8efaf2 100644 --- a/src/coreclr/scripts/superpmi_replay_setup.py +++ b/src/coreclr/scripts/superpmi_replay_setup.py @@ -17,7 +17,7 @@ import os from coreclr_arguments import * -from azdo_pipelines_util import copy_directory, copy_files, set_pipeline_variable +from jitutil import copy_directory, copy_files, set_pipeline_variable parser = argparse.ArgumentParser(description="description") @@ -90,12 +90,12 @@ def main(main_args): # Copy *.py to CorrelationPayload print('Copying {} -> {}'.format(superpmi_src_directory, correlation_payload_directory)) - copy_directory(superpmi_src_directory, correlation_payload_directory, + copy_directory(superpmi_src_directory, correlation_payload_directory, verbose_output=True, match_func=lambda path: any(path.endswith(extension) for extension in [".py"])) # Copy clrjit*_arch.dll binaries to CorrelationPayload print('Copying binaries {} -> {}'.format(product_directory, correlation_payload_directory)) - copy_directory(product_directory, correlation_payload_directory, match_func=match_correlation_files) + copy_directory(product_directory, correlation_payload_directory, verbose_output=True, match_func=match_correlation_files) # Set variables print('Setting pipeline variables:')