Skip to content

Commit

Permalink
[flang][OpenMP] Privatize "loop-local" values in do concurent on de…
Browse files Browse the repository at this point in the history
…vice

Extends ROCm#112.

This PR extends support for `do concurrent` mapping to the device a bit
more. In particular, it handles localization of loop-local values on the
deive. Previously, this was only supported and tested on the host.

See docs for `looputils::collectLoopLocalValues` for the definition of
"loop-local" values.
  • Loading branch information
ergawy committed Aug 19, 2024
1 parent d2b3307 commit 3790250
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 23 deletions.
21 changes: 14 additions & 7 deletions flang/lib/Optimizer/Transforms/DoConcurrentConversion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -565,9 +565,9 @@ class DoConcurrentConversion : public mlir::OpConversionPattern<fir::DoLoopOp> {
"defining operation.");
}

llvm::SmallVector<mlir::Value> outermostLoopLives;
looputils::collectLoopLiveIns(doLoop, outermostLoopLives);
assert(!outermostLoopLives.empty());
llvm::SmallVector<mlir::Value> outermostLoopLiveIns;
looputils::collectLoopLiveIns(doLoop, outermostLoopLiveIns);
assert(!outermostLoopLiveIns.empty());

looputils::LoopNestToIndVarMap loopNest;
bool hasRemainingNestedLoops =
Expand All @@ -577,28 +577,35 @@ class DoConcurrentConversion : public mlir::OpConversionPattern<fir::DoLoopOp> {
"Some `do concurent` loops are not perfectly-nested. "
"These will be serialzied.");

mlir::IRMapping mapper;

llvm::SetVector<mlir::Value> locals;
looputils::collectLoopLocalValues(loopNest.back().first, locals);
// We do not want to map "loop-local" values to the device through
// `omp.map.info` ops. Therefore, we remove them from the list of live-ins.
outermostLoopLiveIns.erase(llvm::remove_if(outermostLoopLiveIns,
[&](mlir::Value liveIn) {
return locals.contains(liveIn);
}),
outermostLoopLiveIns.end());

looputils::sinkLoopIVArgs(rewriter, loopNest);

mlir::omp::TargetOp targetOp;
mlir::omp::LoopNestOperands loopNestClauseOps;

mlir::IRMapping mapper;

if (mapToDevice) {
mlir::omp::TargetOperands targetClauseOps;

// The outermost loop will contain all the live-in values in all nested
// loops since live-in values are collected recursively for all nested
// ops.
for (mlir::Value liveIn : outermostLoopLives)
for (mlir::Value liveIn : outermostLoopLiveIns)
targetClauseOps.mapVars.push_back(
genMapInfoOpForLiveIn(rewriter, liveIn));

targetOp = genTargetOp(doLoop.getLoc(), rewriter, mapper,
outermostLoopLives, targetClauseOps);
outermostLoopLiveIns, targetClauseOps);
genTeamsOp(doLoop.getLoc(), rewriter);
}

Expand Down
44 changes: 28 additions & 16 deletions flang/test/Transforms/DoConcurrent/locally_destroyed_temp.f90
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,10 @@
! occur due to multiple teams trying to access the same allocation.

! RUN: %flang_fc1 -emit-hlfir -fopenmp -fdo-concurrent-parallel=host %s -o - \
! RUN: | FileCheck %s
! RUN: | FileCheck %s --check-prefixes=COMMON

! RUN: %flang_fc1 -emit-hlfir -fopenmp -fdo-concurrent-parallel=device %s -o - \
! RUN: | FileCheck %s --check-prefixes=COMMON,DEVICE

module struct_mod
type test_struct
Expand Down Expand Up @@ -49,18 +52,27 @@ program main
print *, "total =", total
end program main

! CHECK: omp.parallel {
! CHECK: %[[LOCAL_TEMP:.*]] = fir.alloca !fir.type<_QMstruct_modTtest_struct{x_:!fir.box<!fir.heap<i32>>}> {bindc_name = ".result"}
! CHECK: omp.wsloop {
! CHECK: omp.loop_nest {{.*}} {
! CHECK: %[[TEMP_VAL:.*]] = fir.call @_QMstruct_modPconstruct_from_components
! CHECK: fir.save_result %[[TEMP_VAL]] to %[[LOCAL_TEMP]]
! CHECK: %[[EMBOXED_LOCAL:.*]] = fir.embox %[[LOCAL_TEMP]]
! CHECK: %[[CONVERTED_LOCAL:.*]] = fir.convert %[[EMBOXED_LOCAL]]
! CHECK: fir.call @_FortranADestroy(%[[CONVERTED_LOCAL]])
! CHECK: omp.yield
! CHECK: }
! CHECK: omp.terminator
! CHECK: }
! CHECK: omp.terminator
! CHECK: }
! DEVICE: omp.target {{.*}} {
! DEVICE: omp.teams {
! COMMON: omp.parallel {
! COMMON: %[[LOCAL_TEMP:.*]] = fir.alloca !fir.type<_QMstruct_modTtest_struct{x_:!fir.box<!fir.heap<i32>>}> {bindc_name = ".result"}
! DEVICE: omp.distribute {
! COMMON: omp.wsloop {
! COMMON: omp.loop_nest {{.*}} {
! COMMON: %[[TEMP_VAL:.*]] = fir.call @_QMstruct_modPconstruct_from_components
! COMMON: fir.save_result %[[TEMP_VAL]] to %[[LOCAL_TEMP]]
! COMMON: %[[EMBOXED_LOCAL:.*]] = fir.embox %[[LOCAL_TEMP]]
! COMMON: %[[CONVERTED_LOCAL:.*]] = fir.convert %[[EMBOXED_LOCAL]]
! COMMON: fir.call @_FortranADestroy(%[[CONVERTED_LOCAL]])
! COMMON: omp.yield
! COMMON: }
! COMMON: omp.terminator
! COMMON: }
! DEVICE: omp.terminator
! DEVICE: }
! COMMON: omp.terminator
! COMMON: }
! DEVICE: omp.terminator
! DEVICE: }
! DEVICE: omp.terminator
! DEVICE: }

0 comments on commit 3790250

Please sign in to comment.