-
Notifications
You must be signed in to change notification settings - Fork 745
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[SYCL] Enable sub group masks for 64 bit subgroups (#7491)
This patch is adding group ballot support for HIP (based on initial work from @abagusetty on #6734 ), but also extending the sub-group mask implementation to support 64 bit masks, as a lot of AMD GPUs use 64 bit wavefronts. Related to issue: #6718
- Loading branch information
Showing
4 changed files
with
68 additions
and
15 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
//===----------------------------------------------------------------------===// | ||
// | ||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
//===----------------------------------------------------------------------===// | ||
|
||
#include <spirv/spirv.h> | ||
#include <spirv/spirv_types.h> | ||
|
||
// from llvm/include/llvm/IR/InstrTypes.h | ||
#define ICMP_NE 33 | ||
|
||
_CLC_DEF _CLC_CONVERGENT __clc_vec4_uint32_t | ||
_Z29__spirv_GroupNonUniformBallotjb(unsigned flag, bool predicate) { | ||
// only support subgroup for now | ||
if (flag != Subgroup) { | ||
__builtin_trap(); | ||
__builtin_unreachable(); | ||
} | ||
|
||
// prepare result, we only support the ballot operation on 64 threads maximum | ||
// so we only need the first two elements to represent the final mask | ||
__clc_vec4_uint32_t res; | ||
res[2] = 0; | ||
res[3] = 0; | ||
|
||
// run the ballot operation | ||
res.xy = __builtin_amdgcn_uicmp((int)predicate, 0, ICMP_NE); | ||
|
||
return res; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters