Skip to content

Commit

Permalink
multiheadattention scale param (#5526)
Browse files Browse the repository at this point in the history
* update swiftshader

* skip vs2017 swiftshader
  • Loading branch information
nihui authored Jun 23, 2024
1 parent f2a34ee commit 4c3deba
Show file tree
Hide file tree
Showing 14 changed files with 81 additions and 36 deletions.
4 changes: 2 additions & 2 deletions .ci/test-coverage.yml
Original file line number Diff line number Diff line change
Expand Up @@ -52,14 +52,14 @@ jobs:
uses: cache@1.*
with:
cachePaths: swiftshader-install
cacheKey: swiftshader-linux-install-20230420-1
cacheKey: swiftshader-linux-install-20240622

- name: checkout-swiftshader
if: steps.cache-swiftshader.outputs.cacheHit != 'true'
checkout: https://github.com/google/swiftshader.git
with:
pullType: COMMIT_ID
refName: dd55e592406dc0bae219df11adec6363840aff4a
refName: de870ac7518fe2b6bb651ecc22fc36647cf7b986
localPath: swiftshader
enableSubmodule: false
enableGitLfs: false
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/linux-x64-gpu-clang.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,14 +46,14 @@ jobs:
uses: actions/cache@v4
with:
path: swiftshader-install
key: swiftshader-linux-install-20230420
key: swiftshader-linux-install-20240622
- name: checkout-swiftshader
if: steps.cache-swiftshader.outputs.cache-hit != 'true'
uses: actions/checkout@v4
with:
repository: google/swiftshader
path: swiftshader
ref: dd55e592406dc0bae219df11adec6363840aff4a
ref: de870ac7518fe2b6bb651ecc22fc36647cf7b986
- name: checkout-swiftshader-submodules
if: steps.cache-swiftshader.outputs.cache-hit != 'true'
run: |
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/linux-x64-gpu-gcc.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,14 +46,14 @@ jobs:
uses: actions/cache@v4
with:
path: swiftshader-install
key: swiftshader-linux-install-20230420
key: swiftshader-linux-install-20240622
- name: checkout-swiftshader
if: steps.cache-swiftshader.outputs.cache-hit != 'true'
uses: actions/checkout@v4
with:
repository: google/swiftshader
path: swiftshader
ref: dd55e592406dc0bae219df11adec6363840aff4a
ref: de870ac7518fe2b6bb651ecc22fc36647cf7b986
- name: checkout-swiftshader-submodules
if: steps.cache-swiftshader.outputs.cache-hit != 'true'
run: |
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/macos.yml
Original file line number Diff line number Diff line change
Expand Up @@ -137,14 +137,14 @@ jobs:
uses: actions/cache@v4
with:
path: swiftshader-install
key: swiftshader-macos-install-20230420
key: swiftshader-macos-install-20240622
- name: checkout-swiftshader
if: steps.cache-swiftshader.outputs.cache-hit != 'true'
uses: actions/checkout@v4
with:
repository: google/swiftshader
path: swiftshader
ref: dd55e592406dc0bae219df11adec6363840aff4a
ref: de870ac7518fe2b6bb651ecc22fc36647cf7b986
- name: checkout-swiftshader-submodules
if: steps.cache-swiftshader.outputs.cache-hit != 'true'
run: |
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/python.yml
Original file line number Diff line number Diff line change
Expand Up @@ -56,14 +56,14 @@ jobs:
uses: actions/cache@v4
with:
path: swiftshader-install
key: swiftshader-linux-install-20230420
key: swiftshader-linux-install-20240622
- name: checkout-swiftshader
if: matrix.os == 'ubuntu-latest' && steps.cache-swiftshader.outputs.cache-hit != 'true'
uses: actions/checkout@v4
with:
repository: google/swiftshader
path: swiftshader
ref: dd55e592406dc0bae219df11adec6363840aff4a
ref: de870ac7518fe2b6bb651ecc22fc36647cf7b986
- name: checkout-swiftshader-submodules
if: matrix.os == 'ubuntu-latest' && steps.cache-swiftshader.outputs.cache-hit != 'true'
run: |
Expand Down
51 changes: 51 additions & 0 deletions .github/workflows/test-coverage.yml
Original file line number Diff line number Diff line change
Expand Up @@ -52,11 +52,62 @@ jobs:
lcov -r lcov.info '*/install/*' -o lcov.info
lcov -r lcov.info '*/build/*' -o lcov.info
lcov --list lcov.info
- name: codecov
id: codecov
continue-on-error: true
uses: codecov/codecov-action@v3
with:
token: ${{ secrets.CODECOV_TOKEN }}
file: build/lcov.info
- name: codecov-vlen256-retry-1
continue-on-error: true
id: codecov-vlen256-retry-1
if: steps.codecov.outcome=='failure'
uses: codecov/codecov-action@v3
with:
token: ${{ secrets.CODECOV_TOKEN }}
file: build/lcov.info
- name: codecov-vlen256-retry-2
continue-on-error: true
id: codecov-vlen256-retry-2
if: steps.codecov-vlen256-retry-1.outcome=='failure'
uses: codecov/codecov-action@v3
with:
token: ${{ secrets.CODECOV_TOKEN }}
file: build/lcov.info
- name: codecov-vlen256-retry-3
continue-on-error: true
id: codecov-vlen256-retry-3
if: steps.codecov-vlen256-retry-2.outcome=='failure'
uses: codecov/codecov-action@v3
with:
token: ${{ secrets.CODECOV_TOKEN }}
file: build/lcov.info
- name: codecov-vlen256-retry-4
continue-on-error: true
id: codecov-vlen256-retry-4
if: steps.codecov-vlen256-retry-3.outcome=='failure'
uses: codecov/codecov-action@v3
with:
token: ${{ secrets.CODECOV_TOKEN }}
file: build/lcov.info
- name: codecov-vlen256-retry-5
continue-on-error: true
id: codecov-vlen256-retry-5
if: steps.codecov-vlen256-retry-4.outcome=='failure'
uses: codecov/codecov-action@v3
with:
token: ${{ secrets.CODECOV_TOKEN }}
file: build/lcov.info
- name: set the status
if: always()
run: |
if ${{ steps.codecov.outcome=='success' || steps.codecov-vlen256-retry-1.outcome=='success' || steps.codecov-vlen256-retry-2.outcome=='success' || steps.codecov-vlen256-retry-3.outcome=='success' || steps.codecov-vlen256-retry-4.outcome=='success' || steps.codecov-vlen256-retry-5.outcome=='success' }}; then
echo fine
else
exit 1
fi
linux-gcc-x64-avx512-spr:
runs-on: ubuntu-22.04
Expand Down
14 changes: 7 additions & 7 deletions .github/workflows/windows.yml
Original file line number Diff line number Diff line change
Expand Up @@ -79,26 +79,26 @@ jobs:
cmake --build . --config Release -j 4
cmake --build . --config Release --target install
- name: cache-swiftshader
if: matrix.vs-version != 'vs2015'
if: matrix.vs-version != 'vs2015' && matrix.vs-version != 'vs2017'
id: cache-swiftshader
uses: actions/cache@v4
with:
path: swiftshader-install
key: swiftshader-${{ matrix.vs-version }}-x64-install-20230420
key: swiftshader-${{ matrix.vs-version }}-x64-install-20240622
- name: checkout-swiftshader
if: matrix.vs-version != 'vs2015' && steps.cache-swiftshader.outputs.cache-hit != 'true'
if: matrix.vs-version != 'vs2015' && matrix.vs-version != 'vs2017' && steps.cache-swiftshader.outputs.cache-hit != 'true'
uses: actions/checkout@v4
with:
repository: google/swiftshader
path: swiftshader
ref: dd55e592406dc0bae219df11adec6363840aff4a
ref: de870ac7518fe2b6bb651ecc22fc36647cf7b986
- name: checkout-swiftshader-submodules
if: matrix.vs-version != 'vs2015' && steps.cache-swiftshader.outputs.cache-hit != 'true'
if: matrix.vs-version != 'vs2015' && matrix.vs-version != 'vs2017' && steps.cache-swiftshader.outputs.cache-hit != 'true'
run: |
cd swiftshader
git -c submodule."third_party/git-hooks".update=none submodule update --init --recursive
- name: swiftshader
if: matrix.vs-version != 'vs2015' && steps.cache-swiftshader.outputs.cache-hit != 'true'
if: matrix.vs-version != 'vs2015' && matrix.vs-version != 'vs2017' && steps.cache-swiftshader.outputs.cache-hit != 'true'
run: |
cd swiftshader
mkdir build-${{ matrix.vs-version }}; cd build-${{ matrix.vs-version }}
Expand All @@ -113,7 +113,7 @@ jobs:
cmake -T ${{ matrix.toolset-version }},host=x64 -A x64 -Dprotobuf_DIR="$env:GITHUB_WORKSPACE\protobuf-install\cmake" -DNCNN_VULKAN=ON -DNCNN_BUILD_TESTS=ON ..
cmake --build . --config Release -j 4
- name: x64-test
if: matrix.vs-version != 'vs2015'
if: matrix.vs-version != 'vs2015' && matrix.vs-version != 'vs2017'
run: |
echo "[Processor]`nThreadCount=1`n" > build-x64/tests/Release/SwiftShader.ini
Copy-Item -Path "$env:GITHUB_WORKSPACE\swiftshader-install\vulkan-1.dll" -Destination 'build-x64\tests'
Expand Down
9 changes: 5 additions & 4 deletions docs/developer-guide/operators.md
Original file line number Diff line number Diff line change
Expand Up @@ -1266,21 +1266,22 @@ y = affine(out)
| --------- | ------------- | ----- | --------- | ----------------- |
| 0 | embed_dim | int | 0 | |
| 1 | num_heads | int | 1 | |
| 2 | weight_data_size| int | 0 | |
| 2 | weight_data_size| int | 0 | qdim = weight_data_size / embed_dim |
| 3 | kdim | int | embed_dim | |
| 4 | vdim | int | embed_dim | |
| 5 | attn_mask | int | 0 | |
| 6 | scale | float | 1.f / sqrt(embed_dim / num_heads) | |

| weight | type | shape |
| ------------- | ----- | --------------------- |
| q_weight_data | float/fp16/int8 | [weight_data_size] |
| q_weight_data | float/fp16/int8 | [embed_dim * qdim] |
| q_bias_data | float | [embed_dim] |
| k_weight_data | float/fp16/int8 | [embed_dim * kdim] |
| k_bias_data | float | [embed_dim] |
| v_weight_data | float/fp16/int8 | [embed_dim * vdim] |
| v_bias_data | float | [embed_dim] |
| out_weight_data| float/fp16/int8 | [weight_data_size] |
| out_bias_data | float | [embed_dim] |
| out_weight_data| float/fp16/int8 | [qdim * embed_dim] |
| out_bias_data | float | [qdim] |

# MVN
```
Expand Down
5 changes: 1 addition & 4 deletions src/layer/arm/multiheadattention_arm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,12 +60,9 @@ int MultiHeadAttention_arm::create_pipeline(const Option& _opt)
const int qdim = weight_data_size / embed_dim;

{
const int embed_dim_per_head = embed_dim / num_heads;
const float inv_sqrt_embed_dim_per_head = 1.f / sqrtf(embed_dim_per_head);

q_gemm = ncnn::create_layer_cpu(ncnn::LayerType::Gemm);
ncnn::ParamDict pd;
pd.set(0, inv_sqrt_embed_dim_per_head);
pd.set(0, scale);
pd.set(1, 1.f);
pd.set(2, 0); // transA
pd.set(3, 1); // transB
Expand Down
7 changes: 3 additions & 4 deletions src/layer/multiheadattention.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ int MultiHeadAttention::load_param(const ParamDict& pd)
kdim = pd.get(3, embed_dim);
vdim = pd.get(4, embed_dim);
attn_mask = pd.get(5, 0);
scale = pd.get(6, 1.f / sqrtf(embed_dim / num_heads));

return 0;
}
Expand Down Expand Up @@ -111,12 +112,10 @@ int MultiHeadAttention::forward(const std::vector<Mat>& bottom_blobs, std::vecto
if (xqkv.empty())
return -100;

const float inv_sqrt_embed_dim_per_head = 1.f / sqrtf(embed_dim_per_head);

#pragma omp parallel for num_threads(opt.num_threads)
for (int q = 0; q < num_heads; q++)
{
// xq = affine(q) * inv_sqrt_embed_dim_per_head
// xq = affine(q) * scale
{
Mat outm = xq.channel(q);

Expand All @@ -135,7 +134,7 @@ int MultiHeadAttention::forward(const std::vector<Mat>& bottom_blobs, std::vecto
sum += *ptr++ * *kptr++;
}

outptr[j] = sum * inv_sqrt_embed_dim_per_head;
outptr[j] = sum * scale;
}
}
}
Expand Down
1 change: 1 addition & 0 deletions src/layer/multiheadattention.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ class MultiHeadAttention : public Layer
int kdim;
int vdim;
int attn_mask;
float scale;

Mat q_weight_data;
Mat q_bias_data;
Expand Down
4 changes: 1 addition & 3 deletions src/layer/vulkan/multiheadattention_vulkan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,12 +48,10 @@ int MultiHeadAttention_vulkan::create_pipeline(const Option& opt)
const int embed_dim_per_head = embed_dim / num_heads;
const int qdim = weight_data_size / embed_dim;
{
const float inv_sqrt_embed_dim_per_head = 1.f / sqrtf(embed_dim_per_head);

q_gemm = ncnn::create_layer_vulkan(ncnn::LayerType::Gemm);
q_gemm->vkdev = vkdev;
ncnn::ParamDict pd;
pd.set(0, inv_sqrt_embed_dim_per_head);
pd.set(0, scale);
pd.set(1, 1.f);
pd.set(2, 0); // transA
pd.set(3, 1); // transB
Expand Down
5 changes: 1 addition & 4 deletions src/layer/x86/multiheadattention_x86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,12 +41,9 @@ int MultiHeadAttention_x86::create_pipeline(const Option& opt)
const int qdim = weight_data_size / embed_dim;

{
const int embed_dim_per_head = embed_dim / num_heads;
const float inv_sqrt_embed_dim_per_head = 1.f / sqrtf(embed_dim_per_head);

q_gemm = ncnn::create_layer_cpu(ncnn::LayerType::Gemm);
ncnn::ParamDict pd;
pd.set(0, inv_sqrt_embed_dim_per_head);
pd.set(0, scale);
pd.set(1, 1.f);
pd.set(2, 0); // transA
pd.set(3, 1); // transB
Expand Down
1 change: 1 addition & 0 deletions tests/test_multiheadattention.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ static int test_multiheadattention_sameqkv(const ncnn::Mat& a, int embed_dim, in
pd.set(2, embed_dim * qdim);
pd.set(3, qdim);
pd.set(4, qdim);
pd.set(6, 0.7f / sqrtf(embed_dim / num_heads));

std::vector<ncnn::Mat> weights(8);
weights[0] = RandomMat(embed_dim * qdim);
Expand Down

0 comments on commit 4c3deba

Please sign in to comment.