Merge branch 'main' into mbahnas/vit_tech_report

tenstorrent · Sep 19, 2024 · 3a6811d · 3a6811d
2 parents b5bc95a + ea8522c
commit 3a6811d
Show file tree

Hide file tree

Showing 591 changed files with 7,979 additions and 2,509 deletions.
diff --git a/.github/workflows/fast-dispatch-full-regressions-and-models.yaml b/.github/workflows/fast-dispatch-full-regressions-and-models.yaml
@@ -26,20 +26,6 @@ jobs:
               cmd: tests/scripts/single_card/nightly/run_common_models.sh,
               timeout: 40
             },
-            {
-              name: "Common models N300 WH B0",
-              arch: wormhole_b0,
-              runs-on: ["cloud-virtual-machine", "N300", "in-service"],
-              cmd: tests/scripts/single_card/nightly/run_common_models.sh,
-              timeout: 40,
-            },
-            {
-              name: "Common models N150 WH BO",
-              arch: wormhole_b0,
-              runs-on: ["cloud-virtual-machine", "N150", "in-service"],
-              cmd: tests/scripts/single_card/nightly/run_common_models.sh,
-              timeout: 40,
-            },
             {
               name: "GS ttnn nightly",
               arch: grayskull,
@@ -68,40 +54,26 @@ jobs:
               cmd: tests/scripts/single_card/nightly/run_gs_only.sh,
               timeout: 40
             },
-            {
-              name: "N300 WH-only models",
-              arch: wormhole_b0,
-              runs-on: ["cloud-virtual-machine", "N300", "in-service"],
-              cmd: tests/scripts/single_card/nightly/run_wh_b0_only.sh,
-              timeout: 100
-            },
-            {
-              name: "N150 WH-only models",
-              arch: wormhole_b0,
-              runs-on: ["cloud-virtual-machine", "N150", "in-service"],
-              cmd: tests/scripts/single_card/nightly/run_wh_b0_only.sh,
-              timeout: 100
-            },
             {
               name: "API tests GS",
               arch: grayskull,
               runs-on: ["cloud-virtual-machine", "E150", "in-service"],
               cmd: ./tests/scripts/run_tests.sh --tt-arch grayskull --pipeline-type frequent_api --dispatch-mode fast,
-              timeout: 40
+              timeout: 10
             },
             {
               name: "API tests N300 WH B0",
               arch: wormhole_b0,
               runs-on: ["cloud-virtual-machine", "N300", "in-service"],
               cmd: ./tests/scripts/run_tests.sh --tt-arch wormhole_b0 --pipeline-type frequent_api --dispatch-mode fast,
-              timeout: 40
+              timeout: 10
             },
             {
               name: "API tests N150 WH B0",
               arch: wormhole_b0,
               runs-on: ["cloud-virtual-machine", "N150", "in-service"],
               cmd: ./tests/scripts/run_tests.sh --tt-arch wormhole_b0 --pipeline-type frequent_api --dispatch-mode fast,
-              timeout: 40
+              timeout: 10
             },
             {
               name: "[Unstable] N150 models",
@@ -120,7 +92,6 @@ jobs:
           ]
     name: FD ${{ matrix.test-group.name }} ${{ matrix.test-group.arch }}
     env:
-      TT_METAL_ENV: ${{ vars.TT_METAL_ENV }}
       ARCH_NAME: ${{ matrix.test-group.arch }}
       LOGURU_LEVEL: INFO
       LD_LIBRARY_PATH: ${{ github.workspace }}/build/lib
@@ -155,3 +126,52 @@ jobs:
           path: |
             generated/test_reports/
           prefix: "test_reports_"
+  nightly-wh-models:
+    needs: build-artifact
+    strategy:
+      # Do not fail-fast because we need to ensure all tests go to completion
+      # so we try not to get hanging machines
+      fail-fast: false
+      matrix:
+        card: [N150, N300]
+        model: [common_models, functional_unet, llama31_8b, mamba, mistral7b, mistral7b_eth, resnet50]
+    name: Nightly ${{ matrix.card }} ${{ matrix.model }}
+    env:
+      ARCH_NAME: wormhole_b0
+      LOGURU_LEVEL: INFO
+      LD_LIBRARY_PATH: ${{ github.workspace }}/build/lib
+    runs-on: ["cloud-virtual-machine", "in-service", "${{ matrix.card }}"]
+    steps:
+      - uses: tenstorrent-metal/metal-workflows/.github/actions/checkout-with-submodule-lfs@v2.0.0
+      - uses: ./.github/actions/retry-command
+        with:
+          timeout-seconds: 100
+          max-retries: 10
+          backoff-seconds: 60
+          command: ./.github/scripts/cloud_utils/mount_weka.sh
+      - name: Set up dyanmic env vars for build
+        run: |
+          echo "TT_METAL_HOME=$(pwd)" >> $GITHUB_ENV
+      - name: Set up WH_ARCH_YAML for eth-enabled models
+        if: ${{ matrix.model != 'mistral7b' }}
+        run: |
+          echo "WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml" >> $GITHUB_ENV
+      - uses: actions/download-artifact@v4
+        with:
+          name: TTMetal_build_wormhole_b0
+      - name: Extract files
+        run: tar -xvf ttm_wormhole_b0.tar
+      - uses: ./.github/actions/install-python-deps
+      - name: Run frequent reg tests scripts
+        timeout-minutes: 30
+        run: |
+          source ${{ github.workspace }}/python_env/bin/activate
+          cd $TT_METAL_HOME
+          export PYTHONPATH=$TT_METAL_HOME
+          pytest -n auto tests/nightly/single_card/${{ matrix.model }}
+      - uses: ./.github/actions/upload-artifact-with-job-uuid
+        if: ${{ !cancelled() }}
+        with:
+          path: |
+            generated/test_reports/
+          prefix: "test_reports_"
diff --git a/.github/workflows/ttnn-run-sweeps.yaml b/.github/workflows/ttnn-run-sweeps.yaml
@@ -13,6 +13,12 @@ on:
           - add
           - line_all_gather
           - logical_and_
+          - eltwise.subtract.subtract_interleaved
+          - eltwise.subalpha.subalpha_interleaved
+          - eltwise.rsub.rsub_interleaved
+          - eltwise.frac.frac_interleaved
+          - eltwise.ceil.ceil_interleaved
+          - eltwise.trunc.trunc_interleaved
           - matmul.full.matmul_default_block_sharded
           - matmul.full.matmul_default_height_sharded
           - matmul.full.matmul_default_interleaved

diff --git a/CODEOWNERS b/CODEOWNERS
@@ -108,7 +108,7 @@ ttnn/cpp/ttnn/operations/ccl/ @SeanNijjar @cfjchu
 ttnn/cpp/ttnn/operations/pool/ @mywoodstock @shwetankTT @sankarmanoj-tt @pavlejosipovic
 ttnn/cpp/ttnn/operations/conv/ @mywoodstock @shwetankTT @sankarmanoj-tt @pavlejosipovic @bbradelTT
 ttnn/cpp/ttnn/operations/sliding_window/ @mywoodstock @sankarmanoj-tt @pavlejosipovic
-ttnn/cpp/ttnn/operations/data_movement/ @tarafdarTT @sjameelTT @yan-zaretskiy
+ttnn/cpp/ttnn/operations/data_movement/ @tarafdarTT @sjameelTT @yan-zaretskiy @jaykru-tt
 ttnn/cpp/ttnn/operations/matmul/ @TT-BrianLiu @bbradelTT @yugaoTT
 ttnn/cpp/ttnn/operations/eltwise/ @patrickroberts @yan-zaretskiy @eyonland
 ttnn/cpp/ttnn/operations/reduction/ @SeanNijjar @tarafdarTT @sjameelTT

diff --git a/README.md b/README.md
@@ -49,7 +49,6 @@
 | [ResNet-50 (224x224) (data parallel)](./models/demos/tgg/resnet50)          | 1024  | [Two Galaxies](https://tenstorrent.com/hardware/galaxy)  | 128,800 | 448,000    |             |
 | [ViT](./models/demos/grayskull/vit)                                         | 9     | [e150](https://tenstorrent.com/hardware/grayskull)       | 1,360   | 2,000      |             |
 | [Stable Diffusion 1.4 (512x512)](./models/demos/wormhole/stable_diffusion)  | 1     | [n150](https://tenstorrent.com/hardware/wormhole)        | 0.167   | 0.3        |             |
-| [Unet (shallow)](./models/experimental/functional_unet)                     | 2     | [n150](https://tenstorrent.com/hardware/wormhole)        | 51      | 1000       |             |
 
 ## NLPs
 | Model                                               | Batch | Hardware                                           | sen/sec   | Target sen/sec | Release     |
@@ -65,7 +64,7 @@
 For the latest model updates and features, please see [MODEL_UPDATES.md](models/MODEL_UPDATES.md)
 
 ## TT-NN Tech Reports
-- [Advanced Performance Optimizations for Models](./tech_reports/AdvancedPerformanceOperationsForModels/AdvancedPerformanceOptimizationsForModels.md) (updated Sept 11th)
+- [Advanced Performance Optimizations for Models](./tech_reports/AdvancedPerformanceOperationsForModels/AdvancedPerformanceOptimizationsForModels.md) (updated Sept 18th)
 - [Programming Mesh of Devices](./tech_reports/Programming%20Mesh%20of%20Devices/Programming%20Mesh%20of%20Devices%20with%20TT-NN.md) (updated Sept 9th)
 ---
 

diff --git a/build_metal.sh b/build_metal.sh
@@ -61,6 +61,7 @@ show_help() {
     echo "  -m  Enable MemorySanitizer."
     echo "  -s  Enable ThreadSanitizer."
     echo "  -u  Enable UndefinedBehaviorSanitizer."
+    echo "  -p  Enable Tracy profiler."
 }
 
 # Parse CLI options
@@ -72,8 +73,9 @@ enable_msan="OFF"
 enable_tsan="OFF"
 enable_ubsan="OFF"
 build_type="Release"
+enable_profiler="OFF"
 
-while getopts "hectamsub:" opt; do
+while getopts "hectamsub:p" opt; do
     case ${opt} in
         h )
             show_help
@@ -103,6 +105,9 @@ while getopts "hectamsub:" opt; do
         b )
             build_type="$OPTARG"
             ;;
+        p )
+            enable_profiler="ON"
+            ;;
         \? )
             show_help
             exit 1
@@ -125,13 +130,7 @@ echo "Enable MemorySanitizer: $enable_msan"
 echo "Enable ThreadSanitizer: $enable_tsan"
 echo "Enable UndefinedBehaviorSanitizer: $enable_ubsan"
 
-# Create and link the build directory
-mkdir -p build_$build_type
-ln -nsf build_$build_type build
-
-# Prepare cmake arguments
-# -DCXX_INCLUDE_WHAT_YOU_USE=include-what-you-use
-cmake_args="-B build_$build_type -G Ninja -DCMAKE_BUILD_TYPE=$build_type -DCMAKE_EXPORT_COMPILE_COMMANDS=$export_compile_commands"
+build_dir="build_$build_type"
 
 if [ "$enable_ccache" = "ON" ]; then
     cmake_args="$cmake_args -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache"
@@ -157,10 +156,23 @@ if [ "$enable_ubsan" = "ON" ]; then
     cmake_args="$cmake_args -DENABLE_UBSAN=ON"
 fi
 
+if [ "$enable_profiler" = "ON" ]; then
+    cmake_args="$cmake_args -DENABLE_TRACY=ON"
+    build_dir="${build_dir}_tracy"
+fi
+
+# Create and link the build directory
+mkdir -p $build_dir
+ln -nsf $build_dir build
+
+# Prepare cmake arguments
+# -DCXX_INCLUDE_WHAT_YOU_USE=include-what-you-use
+cmake_args="$cmake_args -B $build_dir -G Ninja -DCMAKE_BUILD_TYPE=$build_type -DCMAKE_EXPORT_COMPILE_COMMANDS=$export_compile_commands"
+
 # Configure cmake
 cmake $cmake_args
 
 # Build libraries and cpp tests
 echo "Building libraries and cpp tests"
-cmake --build build_$build_type --target tests      # <- Can also just run `ninja tests -C build`
-cmake --build build_$build_type --target install    # <- This is a general cmake way, can also just run `ninja install -C build`
+cmake --build $build_dir --target tests      # <- Can also just run `ninja tests -C build`
+cmake --build $build_dir --target install    # <- This is a general cmake way, can also just run `ninja install -C build`
diff --git a/create_venv.sh b/create_venv.sh
@@ -26,7 +26,7 @@ $PYTHON_CMD -m venv $PYTHON_ENV_DIR
 source $PYTHON_ENV_DIR/bin/activate
 
 echo "Forcefully using a version of pip that will work with our view of editable installs"
-pip install --force-reinstall pip==20.1.1
+pip install --force-reinstall pip==21.2.4
 
 echo "Setting up virtual env"
 python3 -m pip config set global.extra-index-url https://download.pytorch.org/whl/cpu

diff --git a/docs/source/ttnn/ttnn/dependencies/tt_lib.rst b/docs/source/ttnn/ttnn/dependencies/tt_lib.rst
@@ -34,7 +34,7 @@ New Device Operation
 
     struct <NewOperation> {
         void validate(const std::vector<Tensor> &input_tensors) const;
-        std::vector<Shape> compute_output_shapes(const std::vector<Tensor> &input_tensors) const;
+        std::vector<tt::tt_metal::LegacyShape> compute_output_shapes(const std::vector<Tensor> &input_tensors) const;
         std::vector<Tensor> create_output_tensors(const std::vector<Tensor> &input_tensors) const;
         operation::ProgramWithCallbacks create_program(const std::vector<Tensor>& input_tensors, std::vector<Tensor> &output_tensors) const;
     };
@@ -48,7 +48,7 @@ New Device Operation with a member
         int some_member
 
         void validate(const std::vector<Tensor> &input_tensors) const;
-        std::vector<Shape> compute_output_shapes(const std::vector<Tensor> &input_tensors) const;
+        std::vector<tt::tt_metal::LegacyShape> compute_output_shapes(const std::vector<Tensor> &input_tensors) const;
         std::vector<Tensor> create_output_tensors(const std::vector<Tensor> &input_tensors) const;
         operation::ProgramWithCallbacks create_program(const std::vector<Tensor>& input_tensors, std::vector<Tensor> &output_tensors) const;
     };
@@ -61,7 +61,7 @@ New Device Operation with Optional Input Tensors
     struct <NewOperation> {
         void validate(const std::vector<Tensor> &input_tensors,
             const std::vector<std::optional<const Tensor>>& optional_input_tensors) const;
-        std::vector<Shape> compute_output_shapes(const std::vector<Tensor> &input_tensors) const;
+        std::vector<tt::tt_metal::LegacyShape> compute_output_shapes(const std::vector<Tensor> &input_tensors) const;
         std::vector<Tensor> create_output_tensors(const std::vector<Tensor> &input_tensors) const;
         operation::ProgramWithCallbacks create_program(
             const std::vector<Tensor>& input_tensors,
@@ -80,7 +80,7 @@ and create_output_tensors with the additional parameter for the output_tensors.
 
     struct <NewOperation> {
         void validate_with_output_tensors(const std::vector<Tensor> &input_tensors, const std::vector<std::optional<Tensor>>& output_tensors) const;
-        std::vector<Shape> compute_output_shapes(const std::vector<Tensor> &input_tensors) const;
+        std::vector<tt::tt_metal::LegacyShape> compute_output_shapes(const std::vector<Tensor> &input_tensors) const;
         std::vector<std::optional<Tensor>> create_output_tensors(const std::vector<Tensor> &input_tensors, const std::vector<std::optional<Tensor>>& output_tensors) const;
         operation::ProgramWithOptionalOutputTensors create_program(const std::vector<Tensor>& input_tensors, std::vector<std::optional<Tensor>> &output_tensors) const;
 

diff --git a/infra/data_collection/cicd.py b/infra/data_collection/cicd.py
@@ -41,12 +41,22 @@ def create_cicd_json_for_data_analysis(
 
     workflow_outputs_dir = get_workflow_outputs_dir()
 
-    github_job_id_to_test_reports = get_github_job_id_to_test_reports(workflow_outputs_dir, github_pipeline_id)
+    github_job_ids = []
+    for raw_job in raw_jobs:
+        github_job_id = int(raw_job["github_job_id"])
+        github_job_ids.append(github_job_id)
+
+    github_job_id_to_test_reports = get_github_job_id_to_test_reports(
+        workflow_outputs_dir, github_pipeline_id, github_job_ids
+    )
 
     jobs = []
 
     for raw_job in raw_jobs:
         github_job_id = raw_job["github_job_id"]
+
+        logger.info(f"Processing raw GitHub job {github_job_id}")
+
         test_report_exists = github_job_id in github_job_id_to_test_reports
         if test_report_exists:
             test_report_path = github_job_id_to_test_reports[github_job_id]

diff --git a/infra/data_collection/github/download_cicd_logs_and_artifacts.sh b/infra/data_collection/github/download_cicd_logs_and_artifacts.sh
@@ -27,13 +27,18 @@ download_artifacts() {
 download_logs_for_all_jobs() {
     local repo=$1
     local workflow_run_id=$2
-    local attempt_number=$3
+    local max_attempts=$3
 
+    echo "[info] downloading logs for job with id $job_id for all attempts up to $max_attempts"
+    for attempt_number in $(seq 1 $max_attempts); do
+        echo "[Info] Downloading for attempt $attempt_number"
 
-    gh api /repos/$repo/actions/runs/$workflow_run_id/attempts/$attempt_number/jobs --paginate | jq '.jobs[].id' | while read -r job_id; do
-        echo "[Info] Download logs for job with ID $job_id"
-        gh api /repos/$repo/actions/jobs/$job_id/logs > generated/cicd/$workflow_run_id/logs/$job_id.log
+        gh api /repos/$repo/actions/runs/$workflow_run_id/attempts/$attempt_number/jobs --paginate | jq '.jobs[].id' | while read -r job_id; do
+            echo "[info] download logs for job with id $job_id, attempt number $attempt_number"
+            gh api /repos/$repo/actions/jobs/$job_id/logs > generated/cicd/$workflow_run_id/logs/$job_id.log
+        done
     done
+
 }
 
 main() {