microsoft · YJHMITWEB · Aug 3, 2024 · Aug 6, 2024 · Aug 7, 2024 · Aug 7, 2024
@@ -0,0 +1,53 @@
+name: nv-flash-attn
+
+on:
+  workflow_dispatch:
+  pull_request:
+    paths:
+      - 'deepspeed/sequence/**'
+      - 'tests/unit/sequence_parallelism/**'
+      - '.github/workflows/nv-flash-attn.yml'
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  unit-tests:
+    runs-on: [self-hosted, nvidia, a6000]
+    container:
+      image: nvcr.io/nvidia/pytorch:24.03-py3
+      ports:
+        - 80
+      options: --gpus all --shm-size "8G"
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Check container state
+        run: |
+          ldd --version
+          nvcc --version
+          nvidia-smi
+          python -c "import torch; print('torch:', torch.__version__, torch)"
+          python -c "import torch; print('CUDA available:', torch.cuda.is_available())"
+      - name: Install transformers
+        run: |
+          git clone --depth=1 https://github.com/huggingface/transformers
+          cd transformers
+          git rev-parse --short HEAD
+          python -m pip install .
+      - name: Install deepspeed
+        run: |
+          python -m pip install .[dev]
+          ds_report
+      - name: Install FlashAttention
+        run: |
+          python -m pip install flash-attn
+      - name: Python environment
+        run: |
+          python -m pip list
+      - name: Unit tests
+        run: |
+          unset TORCH_CUDA_ARCH_LIST # only jit compile for current arch
+          cd tests
+          python -m pytest --color=yes --durations=0 --verbose -rF unit/sequence_parallelism/test_ulysses.py --torch_ver="2.3" --cuda_ver="12"
@@ -369,7 +369,9 @@ def is_activation_to_checkpoint(item):
         Is an activation to be checkpointed
     """
     global mp_size
-    return torch.is_tensor(item) and item.is_floating_point() and item.numel() >= mp_size
+    extra_flag = (not hasattr(item, 'no_checkpointing')) or (hasattr(item, 'no_checkpointing')
+                                                             and item.no_checkpointing == False)
+    return torch.is_tensor(item) and item.is_floating_point() and item.numel() >= mp_size and extra_flag
 
 
 def partition_activations(args, cpu_checkpoint, contiguous_checkpoint):