2024-10-11 nightly release (5696b35)

pytorch · Oct 11, 2024 · 5db774d · 5db774d
1 parent a583d9d
commit 5db774d
Show file tree

Hide file tree

Showing 206 changed files with 4,432 additions and 3,697 deletions.
diff --git a/.ci/scripts/test_model.sh b/.ci/scripts/test_model.sh
@@ -197,6 +197,11 @@ test_model_with_coreml() {
   fi
 }
 
+test_model_with_mps() {
+  "${PYTHON_EXECUTABLE}" -m examples.apple.mps.scripts.mps_example --model_name="${MODEL_NAME}" --use_fp16
+  EXPORTED_MODEL=$(find "." -type f -name "${MODEL_NAME}*.pte" -print -quit)
+}
+
 if [[ "${BACKEND}" == "portable" ]]; then
   echo "Testing ${MODEL_NAME} with portable kernels..."
   test_model
@@ -212,6 +217,12 @@ elif [[ "${BACKEND}" == "coreml" ]]; then
   if [[ $? -eq 0 ]]; then
     prepare_artifacts_upload
   fi
+elif [[ "${BACKEND}" == "mps" ]]; then
+  echo "Testing ${MODEL_NAME} with mps..."
+  test_model_with_mps
+  if [[ $? -eq 0 ]]; then
+    prepare_artifacts_upload
+  fi
 elif [[ "${BACKEND}" == "xnnpack" ]]; then
   echo "Testing ${MODEL_NAME} with xnnpack..."
   WITH_QUANTIZATION=true

diff --git a/.github/scripts/extract_benchmark_results.py b/.github/scripts/extract_benchmark_results.py
@@ -14,7 +14,7 @@
 from argparse import Action, ArgumentParser, Namespace
 from io import BytesIO
 from logging import info, warning
-from typing import Any, List, Optional
+from typing import Any, Dict, List, Optional
 from urllib import error, request
 
 
@@ -24,6 +24,15 @@
 BENCHMARK_RESULTS_FILENAME = "benchmark_results.json"
 ARTIFACTS_FILENAME_REGEX = re.compile(r"(android|ios)-artifacts-(?P<job_id>\d+).json")
 
+# iOS-related regexes and variables
+IOS_TEST_SPEC_REGEX = re.compile(
+    r"Test Case\s+'-\[(?P<test_class>\w+)\s+(?P<test_name>\w+)\]'\s+measured\s+\[(?P<metric>.+)\]\s+average:\s+(?P<value>[\d\.]+),"
+)
+IOS_TEST_NAME_REGEX = re.compile(
+    r"test_(?P<method>forward|load|generate)_(?P<model_name>\w+)_pte.*iOS_(?P<ios_ver>\w+)_iPhone(?P<iphone_ver>\w+)"
+)
+IOS_MODEL_NAME_REGEX = re.compile(r"(?P<model>[^_]+)_(?P<backend>\w+)_(?P<dtype>\w+)")
+
 
 class ValidateArtifacts(Action):
     def __call__(
@@ -135,6 +144,130 @@ def extract_android_benchmark_results(
         return []
 
 
+def initialize_ios_metadata(test_name: str) -> Dict[str, any]:
+    """
+    Extract the benchmark metadata from the test name, for example:
+        test_forward_llama2_pte_iOS_17_2_1_iPhone15_4
+        test_load_resnet50_xnnpack_q8_pte_iOS_17_2_1_iPhone15_4
+    """
+    m = IOS_TEST_NAME_REGEX.match(test_name)
+    if not m:
+        return {}
+
+    method = m.group("method")
+    model_name = m.group("model_name")
+    ios_ver = m.group("ios_ver").replace("_", ".")
+    iphone_ver = m.group("iphone_ver").replace("_", ".")
+
+    # NB: This looks brittle, but unless we can return iOS benchmark results in JSON
+    # format by the test, the mapping is needed to match with Android test
+    if method == "load":
+        metric = "model_load_time(ms)"
+    elif method == "forward":
+        metric = (
+            "generate_time(ms)"
+            if "llama" in model_name
+            else "avg_inference_latency(ms)"
+        )
+    elif method == "generate":
+        metric = "token_per_sec"
+
+    backend = ""
+    quantization = "unknown"
+
+    m = IOS_MODEL_NAME_REGEX.match(model_name)
+    if m:
+        backend = m.group("backend")
+        quantization = m.group("dtype")
+        model_name = m.group("model")
+
+    return {
+        "benchmarkModel": {
+            "backend": backend,
+            "quantization": quantization,
+            "name": model_name,
+        },
+        "deviceInfo": {
+            "arch": f"iPhone {iphone_ver}",
+            "device": f"iPhone {iphone_ver}",
+            "os": f"iOS {ios_ver}",
+            "availMem": 0,
+            "totalMem": 0,
+        },
+        "metric": metric,
+        # These fields will be populated later by extract_ios_metric
+        "actualValue": 0,
+        "targetValue": 0,
+    }
+
+
+def extract_ios_metric(
+    benchmark_result: Dict[str, Any],
+    test_name: str,
+    metric_name: str,
+    metric_value: float,
+) -> Dict[str, Any]:
+    """
+    Map the metric name from iOS xcresult to the benchmark result
+    """
+    if metric_name == "Clock Monotonic Time, s":
+        # The benchmark value is in ms
+        benchmark_result["actualValue"] = metric_value * 1000
+    elif metric_name == "Tokens Per Second, t/s":
+        benchmark_result["actualValue"] = metric_value
+
+    return benchmark_result
+
+
+def extract_ios_benchmark_results(
+    job_name: str, artifact_type: str, artifact_s3_url: str
+) -> List:
+    """
+    The benchmark results from iOS are currently from xcresult, which could either
+    be parsed from CUSTOMER_ARTIFACT or get from the test spec output. The latter
+    is probably easier to process
+    """
+    if artifact_type != "TESTSPEC_OUTPUT":
+        return []
+
+    try:
+        benchmark_results = []
+
+        with request.urlopen(artifact_s3_url) as data:
+            current_test_name = ""
+            current_record = {}
+
+            for line in data.read().decode("utf8").splitlines():
+                s = IOS_TEST_SPEC_REGEX.search(line)
+                if not s:
+                    continue
+
+                test_class = s.group("test_class")
+                test_name = s.group("test_name")
+                metric_name = s.group("metric")
+                metric_value = float(s.group("value"))
+
+                if test_name != current_test_name:
+                    if current_record:
+                        # Save the benchmark result in the same format used by Android
+                        benchmark_results.append(current_record.copy())
+
+                    current_test_name = test_name
+                    current_record = initialize_ios_metadata(current_test_name)
+
+                current_record = extract_ios_metric(
+                    current_record, test_name, metric_name, metric_value
+                )
+
+            benchmark_results.append(current_record.copy())
+
+        return benchmark_results
+
+    except error.HTTPError:
+        warning(f"Fail to {artifact_type} {artifact_s3_url}")
+        return []
+
+
 def extract_job_id(artifacts_filename: str) -> int:
     """
     Extract the job id from the artifacts filename
@@ -222,23 +355,25 @@ def main() -> None:
                 benchmark_results = extract_android_benchmark_results(
                     job_name, artifact_type, artifact_s3_url
                 )
-                if benchmark_results:
-                    benchmark_results = transform(
-                        app_type,
-                        benchmark_results,
-                        args.repo,
-                        args.head_branch,
-                        args.workflow_name,
-                        args.workflow_run_id,
-                        args.workflow_run_attempt,
-                        job_name,
-                        extract_job_id(args.artifacts),
-                    )
-                    all_benchmark_results.extend(benchmark_results)
 
             if app_type == "IOS_APP":
-                # TODO (huydhn): Implement the logic for iOS next
-                pass
+                benchmark_results = extract_ios_benchmark_results(
+                    job_name, artifact_type, artifact_s3_url
+                )
+
+            if benchmark_results:
+                benchmark_results = transform(
+                    app_type,
+                    benchmark_results,
+                    args.repo,
+                    args.head_branch,
+                    args.workflow_name,
+                    args.workflow_run_id,
+                    args.workflow_run_attempt,
+                    job_name,
+                    extract_job_id(args.artifacts),
+                )
+                all_benchmark_results.extend(benchmark_results)
 
     if all_benchmark_results:
         output_file = os.path.basename(args.artifacts)

diff --git a/.github/workflows/android-perf.yml b/.github/workflows/android-perf.yml
@@ -135,7 +135,7 @@ jobs:
           delegate: ${{ fromJson(needs.set-parameters.outputs.delegates) }}
       fail-fast: false
     with:
-      runner: linux.2xlarge
+      runner: linux.4xlarge
       docker-image: executorch-ubuntu-22.04-clang12-android
       submodules: 'true'
       timeout: 60

diff --git a/.github/workflows/apple-perf.yml b/.github/workflows/apple-perf.yml
@@ -76,7 +76,7 @@ jobs:
           # on-demand and periodic benchmarking.
           CRON_DEFAULT_MODELS: "stories110M,mv3,mv2,ic4,ic3,resnet50,edsr,mobilebert,w2l"
           CRON_DEFAULT_DEVICES: "apple_iphone_15"
-          CRON_DEFAULT_DELEGATES: "xnnpack,coreml"
+          CRON_DEFAULT_DELEGATES: "xnnpack,coreml,mps"
         run: |
           set -ex
           MODELS="${{ inputs.models }}"
@@ -169,6 +169,8 @@ jobs:
             DELEGATE_CONFIG="xnnpack+custom+qe"
           elif [[ ${{ matrix.delegate }} == "coreml" ]]; then
             DELEGATE_CONFIG="coreml"
+          elif [[ ${{ matrix.delegate }} == "mps" ]]; then
+            DELEGATE_CONFIG="mps"
           fi
           PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \
             bash .ci/scripts/test_llama.sh "${{ matrix.model }}" "${BUILD_MODE}" "${DTYPE}" "${DELEGATE_CONFIG}" "${ARTIFACTS_DIR_NAME}"
@@ -306,3 +308,78 @@ jobs:
       ios-xctestrun-zip: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/Benchmark.xctestrun.zip
       test-spec: ${{ inputs.test_spec || 'https://ossci-ios.s3.amazonaws.com/executorch/default-ios-device-farm-appium-test-spec.yml' }}
       extra-data: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/${{ matrix.model }}_${{ matrix.delegate }}/model.zip
+
+  upload-benchmark-results:
+    needs:
+      - benchmark-on-device
+    if: always()
+    runs-on: linux.2xlarge
+    environment: upload-benchmark-results
+    permissions:
+      id-token: write
+      contents: read
+    steps:
+      - uses: actions/checkout@v3
+        with:
+          submodules: false
+
+      - name: Authenticate with AWS
+        uses: aws-actions/configure-aws-credentials@v4
+        with:
+          role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_upload-benchmark-results
+          # The max duration enforced by the server side
+          role-duration-seconds: 18000
+          aws-region: us-east-1
+
+      - name: Setup conda
+        uses: pytorch/test-infra/.github/actions/setup-miniconda@main
+        with:
+          python-version: '3.10'
+
+      - name: Download the list of artifacts from S3
+        env:
+          ARTIFACTS_S3_DIR: s3://gha-artifacts/device_farm/${{ github.run_id }}/${{ github.run_attempt }}/artifacts/
+        shell: bash
+        run: |
+          set -eux
+          ${CONDA_RUN} python -mpip install awscli==1.32.18
+
+          mkdir -p artifacts
+          pushd artifacts
+          ${CONDA_RUN} aws s3 sync "${ARTIFACTS_S3_DIR}" .
+          popd
+
+          ls -lah artifacts
+
+      - name: Extract the benchmark results JSON
+        shell: bash
+        run: |
+          set -eux
+
+          mkdir -p benchmark-results
+
+          for ARTIFACTS_BY_JOB in artifacts/*.json; do
+            [ -f "${ARTIFACTS_BY_JOB}" ] || break
+            echo "${ARTIFACTS_BY_JOB}"
+            ${CONDA_RUN} python .github/scripts/extract_benchmark_results.py \
+              --artifacts "${ARTIFACTS_BY_JOB}" \
+              --output-dir benchmark-results \
+              --repo ${{ github.repository }} \
+              --head-branch ${{ github.head_ref || github.ref_name }} \
+              --workflow-name "${{ github.workflow }}" \
+              --workflow-run-id ${{ github.run_id }} \
+              --workflow-run-attempt ${{ github.run_attempt }}
+          done
+
+          ls -lah benchmark-results
+
+          for BENCHMARK_RESULTS in benchmark-results/*.json; do
+            cat "${BENCHMARK_RESULTS}"
+            echo
+          done
+
+      - name: Upload the benchmark results
+        uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main
+        with:
+          benchmark-results-dir: 'benchmark-results'
+          dry-run: false
diff --git a/.github/workflows/trunk.yml b/.github/workflows/trunk.yml
@@ -311,8 +311,8 @@ jobs:
         PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh
         PYTHON_EXECUTABLE=python bash .ci/scripts/test_model.sh ${{ matrix.model }} "cmake" "qnn"
 
-  test-coreml-model:
-    name: test-coreml-model
+  test-apple-model:
+    name: test-apple-model
     uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
     strategy:
       fail-fast: false
@@ -324,20 +324,25 @@ jobs:
       timeout: 90
       script: |
         BUILD_TOOL=cmake
-        BACKEND=coreml
 
         bash .ci/scripts/setup-conda.sh
 
         # Setup MacOS dependencies as there is no Docker support on MacOS atm
         PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh "${BUILD_TOOL}"
         PYTHON_EXECUTABLE=python ${CONDA_RUN} bash backends/apple/coreml/scripts/install_requirements.sh
         echo "Finishing installing coreml."
+        PYTHON_EXECUTABLE=python ${CONDA_RUN} bash backends/apple/mps/install_requirements.sh
+        echo "Finishing installing mps."
 
         # Build and test coreml model
         MODELS=(mv3 ic4 resnet50 edsr mobilebert w2l)
         for MODEL_NAME in "${MODELS[@]}"; do
           echo "::group::Exporting coreml model: $MODEL_NAME"
-          PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_model.sh "${MODEL_NAME}" "${BUILD_TOOL}" "${BACKEND}"
+          PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_model.sh "${MODEL_NAME}" "${BUILD_TOOL}" "coreml"
+          echo "::endgroup::"
+
+          echo "::group::Exporting mps model: $MODEL_NAME"
+          PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_model.sh "${MODEL_NAME}" "${BUILD_TOOL}" "mps"
           echo "::endgroup::"
         done
 

diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -201,6 +201,10 @@ option(EXECUTORCH_BUILD_KERNELS_QUANTIZED "Build the quantized kernels" OFF)
 
 option(EXECUTORCH_BUILD_DEVTOOLS "Build the ExecuTorch Developer Tools")
 
+option(EXECUTORCH_NNLIB_OPT "Build Cadence backend Hifi nnlib kernel" OFF)
+
+option(EXECUTORCH_CADENCE_CPU_RUNNER "Build Cadence backend CPU runner" OFF)
+
 option(EXECUTORCH_BUILD_SIZE_TEST "Build the size test" OFF)
 
 option(EXECUTORCH_BUILD_XNNPACK "Build the XNNPACK backend" OFF)

diff --git a/backends/arm/TARGETS b/backends/arm/TARGETS
@@ -8,7 +8,7 @@ python_library(
     typing = True,
     deps = [
         ":arm_backend",
-        "//executorch/backends/arm/passes:passes",
+        "//executorch/backends/arm/_passes:passes",
         "//executorch/exir:lib",
     ],
 )
@@ -27,7 +27,7 @@ python_library(
         ":arm_vela",
         "//executorch/backends/arm/operators:lib",
         "//executorch/backends/arm/operators:node_visitor",
-        "//executorch/backends/arm/passes:passes",
+        "//executorch/backends/arm/_passes:passes",
     ],
 )
 

diff --git a/backends/arm/passes/TARGETS → backends/arm/_passes/TARGETS b/backends/arm/passes/TARGETS → backends/arm/_passes/TARGETS
diff --git a/.../annotate_channels_last_dim_order_pass.py → .../annotate_channels_last_dim_order_pass.py b/.../annotate_channels_last_dim_order_pass.py → .../annotate_channels_last_dim_order_pass.py