diff --git a/src/Milhoja_ThreadTeam.cpp b/src/Milhoja_ThreadTeam.cpp index b396be5c..5f4a5e59 100644 --- a/src/Milhoja_ThreadTeam.cpp +++ b/src/Milhoja_ThreadTeam.cpp @@ -1517,7 +1517,7 @@ void* ThreadTeam::threadRoutine(void* varg) { if (auto tileWrapperPrototype = dynamic_cast(team->receiverPrototype_)) { // NOTE: this is the case where dataItem is a TilwWrapper, - // and the team->dataReceiver_ is another TileWrapper. + // and the team->receiverPrototype_ is another TileWrapper. // Need to transfer dataItem initialized with data receiver's // tileProtoType, as it may differ. // TODO: very dirty ownership transfers diff --git a/tools/milhoja_pypkg/src/milhoja/TaskFunctionGenerator_OpenACC_F.py b/tools/milhoja_pypkg/src/milhoja/TaskFunctionGenerator_OpenACC_F.py index b5bddf22..e461191b 100644 --- a/tools/milhoja_pypkg/src/milhoja/TaskFunctionGenerator_OpenACC_F.py +++ b/tools/milhoja_pypkg/src/milhoja/TaskFunctionGenerator_OpenACC_F.py @@ -148,7 +148,7 @@ def generate_source_code(self, destination, overwrite): interface = interface.rstrip(".F90") fptr.write(f"{INDENT*2}use {interface}, ONLY : {subroutine}\n") offloading.append("#ifndef SUPPRESS_ACC_ROUTINE_FOR_METH_IN_APP\n") - offloading.append(f"{INDENT*2}!$acc routine ({subroutine}) vector\n") + offloading.append(f"{INDENT*2}!$acc routine ({self._get_wrapper_name(subroutine)}) vector\n" offloading.append("#endif\n") fptr.writelines(["\n", *offloading, "\n"]) # No implicit variables @@ -245,6 +245,7 @@ def generate_source_code(self, destination, overwrite): # Data packet sent on dataQ_h current_queues = ["dataQ_h"] + subroutine_wrappers = {} for node in self._tf_spec.internal_subroutine_graph: # Insert waits if needed before next round of kernel launches extras = [f"queue{i}_h" for i in range(2, len(node) + 1)] @@ -288,33 +289,25 @@ def generate_source_code(self, destination, overwrite): current_queues = next_queues.copy() assert len(current_queues) == len(node) for subroutine, queue in zip(node, current_queues): + # subroutine wrapper + # to prevent passing a slice of array + # which may introduce unnecessary device to host maps + wrapper_name, wrapper_lines = self._generate_subroutine_wrapper(INDENT, subroutine) + subroutine_wrappers[wrapper_name] = wrapper_lines + fptr.write(f"{INDENT*2}!$acc parallel loop gang default(none) &\n") fptr.write(f"{INDENT*2}!$acc& async({queue})\n") fptr.write(f"{INDENT*2}do n = 1, nTiles_d\n") - fptr.write(f"{INDENT*3}CALL {subroutine}( &\n") + fptr.write(f"{INDENT*3}CALL {wrapper_name}( &\n") actual_args = \ self._tf_spec.subroutine_actual_arguments(subroutine) - arg_list = [] + arg_list = [f"{INDENT*5}n"] for argument in actual_args: spec = self._tf_spec.argument_specification(argument) - extents = "" offs = "" - if spec["source"] in points: - extents = "(:, n)" - elif spec["source"] == TILE_DELTAS_ARGUMENT: - extents = "(:, n)" - elif spec["source"] == TILE_LEVEL_ARGUMENT: - extents = "(1, n)" + if spec["source"] == TILE_LEVEL_ARGUMENT: offs = " + 1" - elif spec["source"] in bounds: - extents = "(:, :, n)" - elif spec["source"] == GRID_DATA_ARGUMENT: - extents = "(:, :, :, :, n)" - elif spec["source"] == SCRATCH_ARGUMENT: - dimension = len(parse_extents(spec["extents"])) - tmp = [":" for _ in range(dimension)] - extents = "(" + ", ".join(tmp) + ", n)" - arg_list.append(f"{INDENT*5}{argument}_d{extents}{offs}") + arg_list.append(f"{INDENT*5}{argument}_d{offs}") fptr.write(", &\n".join(arg_list) + " &\n") fptr.write(f"{INDENT*5})\n") fptr.write(f"{INDENT*2}end do\n") @@ -344,5 +337,156 @@ def generate_source_code(self, destination, overwrite): # End subroutine declaration fptr.write(f"{INDENT}end subroutine {self._tf_spec.function_name}\n") fptr.write("\n") + + # Write subroutine wrappers + for wrapper, lines in subroutine_wrappers.items(): + for line in lines: + fptr.write(line + "\n") + # End module declaration fptr.write(f"end module {module}\n\n") + + def _get_wrapper_name(self, subroutine): + """ + A helper function to determine the name of subroutine wrapper, consisntently + """ + return "wrapper_" + subroutine + + def _generate_subroutine_wrapper(self, indent, subroutine): + """ + A helper function to generate a subroutine wrapper + """ + subroutine_wrapper = self._get_wrapper_name(subroutine) + lines = [] + + actual_args = self._tf_spec.subroutine_actual_arguments(subroutine) + dummy_args = ["nblk"] + [f"{arg}_d" for arg in actual_args] + + lines.append(f"{indent*1}subroutine {subroutine_wrapper} ( &") + dummy_arg_str = f"{indent*5}" + f", &\n{indent*5}".join(dummy_args) + f" &\n{indent*3})\n" + dummy_arg_str = "()\n" if len(dummy_args) == 0 else dummy_arg_str + lines.append(dummy_arg_str) + + interface = self._tf_spec.subroutine_interface_file(subroutine).strip() + interface = interface.rstrip(".F90") + lines.append(f"{indent*2}use {interface}, ONLY: {subroutine}") + lines.append("") + + lines.append(f"{indent*2}!$acc routine vector") + lines.append(f"{indent*2}!$acc routine ({subroutine}) vector") + lines.append("") + + lines.append(f"{indent*2}implicit none") + lines.append("") + + lines.append(f"{indent*2}! Arguments") + lines.append(f"{indent*2}integer, intent(IN) :: nblk") + + points = { + TILE_LO_ARGUMENT, TILE_HI_ARGUMENT, TILE_LBOUND_ARGUMENT, + TILE_UBOUND_ARGUMENT, LBOUND_ARGUMENT + } + bounds = {TILE_INTERIOR_ARGUMENT, TILE_ARRAY_BOUNDS_ARGUMENT} + pointer_extents = {} + pointer_types = {} + for arg in actual_args: + spec = self._tf_spec.argument_specification(arg) + src = spec["source"] + if src == EXTERNAL_ARGUMENT: + extents = spec["extents"] + if extents != "()": + msg = "No test case for non-scalar externals" + raise NotImplementedError(msg) + + # is this okay? Should we fail if there is no type mapping? + arg_type = C2F_TYPE_MAPPING.get(spec["type"], spec["type"]) + pointer_extents[arg] = 0 + pointer_types[arg] = arg_type + lines.append(f"{indent*2}{arg_type}, target, intent(IN) :: {arg}_d") + + elif src in points: + pointer_extents[arg] = 1 + pointer_types[arg] = "integer" + lines.append(f"{indent*2}integer, target, intent(IN) :: {arg}_d(:, :)") + + elif src == TILE_DELTAS_ARGUMENT: + pointer_extents[arg] = 1 + pointer_types[arg] = "real" + lines.append(f"{indent*2}real, target, intent(IN) :: {arg}_d(:, :)") + + elif src in bounds: + pointer_extents[arg] = 2 + pointer_types[arg] = "integer" + lines.append(f"{indent*2}integer, target, intent(IN) :: {arg}_d(:, :, :)") + + elif src == TILE_LEVEL_ARGUMENT: + pointer_extents[arg] = 1 + pointer_types[arg] = "integer" + lines.append(f"{indent*2}integer, target, intent(IN) :: {arg}_d(:, :)") + + elif src == GRID_DATA_ARGUMENT: + if arg in self._tf_spec.tile_in_arguments: + intent = "IN" + elif arg in self._tf_spec.tile_in_out_arguments: + intent = "INOUT" + elif arg in self._tf_spec.tile_out_arguments: + intent = "OUT" + else: + raise LogicError("Unknown grid data variable class") + + pointer_extents[arg] = 4 + pointer_types[arg] = "real" + lines.append(f"{indent*2}real, target, intent({intent}) :: {arg}_d(:, :, :, :, :)") + + elif src == SCRATCH_ARGUMENT: + arg_type = spec["type"] + dimension = len(parse_extents(spec["extents"])) + assert dimension > 0 + tmp = [":" for _ in range(dimension + 1)] + array = "(" + ", ".join(tmp) + ")" + pointer_extents[arg] = len(tmp) - 1 + pointer_types[arg] = arg_type + lines.append(f"{indent*2}{arg_type}, target, intent(INOUT) :: {arg}_d{array}") + + else: + raise LogicError(f"{arg} of unknown argument class") + lines.append("") + + lines.append(f"{indent*2}! Local variables") + pointer_mapping = {} + for arg in actual_args: + spec = self._tf_spec.argument_specification(arg) + arg_p = f"{arg}_d_p" + + ptr_type = pointer_types[arg] + ptr_extents = pointer_extents[arg] + + if ptr_extents > 0: + pointer_mapping[arg] = arg_p + _ext_str = ", ".join([":"] * ptr_extents) + _line = f"{indent*2}{ptr_type}, pointer :: {arg_p}({_ext_str})" + lines.append(_line) + lines.append("") + + lines.append(f"{indent*2}! Attach pointers") + for arg, ptr in pointer_mapping.items(): + ptr_extents = pointer_extents[arg] + _ext_str = ", ".join([":"] * ptr_extents) + ", nblk" + _line = f"{indent*2}{ptr} => {arg}_d({_ext_str})" + lines.append(_line) + lines.append("") + + lines.append(f"{indent*2}! Call subroutine") + lines.append(f"{indent*2}CALL {subroutine}( &") + arg_list = [] + for arg in actual_args: + _arg = pointer_mapping[arg] if arg in pointer_mapping else f"{arg}_d" + arg_list.append(_arg) + lines.append(f"{indent*5}" + f", &\n{indent*5}".join(arg_list) + " &") + lines.append(f"{indent*4})") + + lines.append("") + lines.append(f"{indent*1}end subroutine {subroutine_wrapper}") + lines.append("") + + return subroutine_wrapper, lines diff --git a/tools/milhoja_pypkg/src/milhoja/tests/data/FlashX/REF_gpu_tf_hydroFC_2D.F90 b/tools/milhoja_pypkg/src/milhoja/tests/data/FlashX/REF_gpu_tf_hydroFC_2D.F90 index 4df48a78..c331d4b7 100644 --- a/tools/milhoja_pypkg/src/milhoja/tests/data/FlashX/REF_gpu_tf_hydroFC_2D.F90 +++ b/tools/milhoja_pypkg/src/milhoja/tests/data/FlashX/REF_gpu_tf_hydroFC_2D.F90 @@ -41,16 +41,16 @@ subroutine gpu_tf_hydroFC_Fortran( & use dr_cg_hydroAdvance_mod, ONLY : Hydro_updateSolutionHll_gpu_oacc #ifndef SUPPRESS_ACC_ROUTINE_FOR_METH_IN_APP - !$acc routine (Hydro_computeSoundSpeedHll_gpu_oacc) vector + !$acc routine (wrapper_Hydro_computeSoundSpeedHll_gpu_oacc) vector #endif #ifndef SUPPRESS_ACC_ROUTINE_FOR_METH_IN_APP - !$acc routine (Hydro_computeFluxesHll_X_gpu_oacc) vector + !$acc routine (wrapper_Hydro_computeFluxesHll_X_gpu_oacc) vector #endif #ifndef SUPPRESS_ACC_ROUTINE_FOR_METH_IN_APP - !$acc routine (Hydro_computeFluxesHll_Y_gpu_oacc) vector + !$acc routine (wrapper_Hydro_computeFluxesHll_Y_gpu_oacc) vector #endif #ifndef SUPPRESS_ACC_ROUTINE_FOR_METH_IN_APP - !$acc routine (Hydro_updateSolutionHll_gpu_oacc) vector + !$acc routine (wrapper_Hydro_updateSolutionHll_gpu_oacc) vector #endif implicit none @@ -87,56 +87,60 @@ subroutine gpu_tf_hydroFC_Fortran( & !$acc parallel loop gang default(none) & !$acc& async(dataQ_h) do n = 1, nTiles_d - CALL Hydro_computeSoundSpeedHll_gpu_oacc( & - tile_lo_d(:, n), & - tile_hi_d(:, n), & - CC_1_d(:, :, :, :, n), & - hydro_op1_auxc_d(:, :, :, n) & - ) + CALL wrapper_Hydro_computeSoundSpeedHll_gpu_oacc( & + n, & + tile_lo_d, & + tile_hi_d, & + CC_1_d, & + hydro_op1_auxc_d & + ) end do !$acc end parallel loop !$acc parallel loop gang default(none) & !$acc& async(dataQ_h) do n = 1, nTiles_d - CALL Hydro_computeFluxesHll_X_gpu_oacc( & - dt_d, & - tile_lo_d(:, n), & - tile_hi_d(:, n), & - tile_deltas_d(:, n), & - CC_1_d(:, :, :, :, n), & - hydro_op1_auxc_d(:, :, :, n), & - FLX_1_d(:, :, :, :, n) & - ) + CALL wrapper_Hydro_computeFluxesHll_X_gpu_oacc( & + n, & + dt_d, & + tile_lo_d, & + tile_hi_d, & + tile_deltas_d, & + CC_1_d, & + hydro_op1_auxc_d, & + FLX_1_d & + ) end do !$acc end parallel loop !$acc parallel loop gang default(none) & !$acc& async(dataQ_h) do n = 1, nTiles_d - CALL Hydro_computeFluxesHll_Y_gpu_oacc( & - dt_d, & - tile_lo_d(:, n), & - tile_hi_d(:, n), & - tile_deltas_d(:, n), & - CC_1_d(:, :, :, :, n), & - hydro_op1_auxc_d(:, :, :, n), & - FLY_1_d(:, :, :, :, n) & - ) + CALL wrapper_Hydro_computeFluxesHll_Y_gpu_oacc( & + n, & + dt_d, & + tile_lo_d, & + tile_hi_d, & + tile_deltas_d, & + CC_1_d, & + hydro_op1_auxc_d, & + FLY_1_d & + ) end do !$acc end parallel loop !$acc parallel loop gang default(none) & !$acc& async(dataQ_h) do n = 1, nTiles_d - CALL Hydro_updateSolutionHll_gpu_oacc( & - tile_lo_d(:, n), & - tile_hi_d(:, n), & - FLX_1_d(:, :, :, :, n), & - FLY_1_d(:, :, :, :, n), & - FLZ_1_d(:, :, :, :, n), & - CC_1_d(:, :, :, :, n) & - ) + CALL wrapper_Hydro_updateSolutionHll_gpu_oacc( & + n, & + tile_lo_d, & + tile_hi_d, & + FLX_1_d, & + FLY_1_d, & + FLZ_1_d, & + CC_1_d & + ) end do !$acc end parallel loop @@ -147,4 +151,216 @@ subroutine gpu_tf_hydroFC_Fortran( & !$acc end data end subroutine gpu_tf_hydroFC_Fortran + subroutine wrapper_Hydro_computeSoundSpeedHll_gpu_oacc ( & + nblk, & + tile_lo_d, & + tile_hi_d, & + CC_1_d, & + hydro_op1_auxc_d & + ) + + use dr_cg_hydroAdvance_mod, ONLY: Hydro_computeSoundSpeedHll_gpu_oacc + + !$acc routine vector + !$acc routine (Hydro_computeSoundSpeedHll_gpu_oacc) vector + + implicit none + + ! Arguments + integer, intent(IN) :: nblk + integer, target, intent(IN) :: tile_lo_d(:, :) + integer, target, intent(IN) :: tile_hi_d(:, :) + real, target, intent(INOUT) :: CC_1_d(:, :, :, :, :) + real, target, intent(INOUT) :: hydro_op1_auxc_d(:, :, :, :) + + ! Local variables + integer, pointer :: tile_lo_d_p(:) + integer, pointer :: tile_hi_d_p(:) + real, pointer :: CC_1_d_p(:, :, :, :) + real, pointer :: hydro_op1_auxc_d_p(:, :, :) + + ! Attach pointers + tile_lo_d_p => tile_lo_d(:, nblk) + tile_hi_d_p => tile_hi_d(:, nblk) + CC_1_d_p => CC_1_d(:, :, :, :, nblk) + hydro_op1_auxc_d_p => hydro_op1_auxc_d(:, :, :, nblk) + + ! Call subroutine + CALL Hydro_computeSoundSpeedHll_gpu_oacc( & + tile_lo_d_p, & + tile_hi_d_p, & + CC_1_d_p, & + hydro_op1_auxc_d_p & + ) + + end subroutine wrapper_Hydro_computeSoundSpeedHll_gpu_oacc + + subroutine wrapper_Hydro_computeFluxesHll_X_gpu_oacc ( & + nblk, & + dt_d, & + tile_lo_d, & + tile_hi_d, & + tile_deltas_d, & + CC_1_d, & + hydro_op1_auxc_d, & + FLX_1_d & + ) + + use dr_cg_hydroAdvance_mod, ONLY: Hydro_computeFluxesHll_X_gpu_oacc + + !$acc routine vector + !$acc routine (Hydro_computeFluxesHll_X_gpu_oacc) vector + + implicit none + + ! Arguments + integer, intent(IN) :: nblk + real, target, intent(IN) :: dt_d + integer, target, intent(IN) :: tile_lo_d(:, :) + integer, target, intent(IN) :: tile_hi_d(:, :) + real, target, intent(IN) :: tile_deltas_d(:, :) + real, target, intent(INOUT) :: CC_1_d(:, :, :, :, :) + real, target, intent(INOUT) :: hydro_op1_auxc_d(:, :, :, :) + real, target, intent(OUT) :: FLX_1_d(:, :, :, :, :) + + ! Local variables + integer, pointer :: tile_lo_d_p(:) + integer, pointer :: tile_hi_d_p(:) + real, pointer :: tile_deltas_d_p(:) + real, pointer :: CC_1_d_p(:, :, :, :) + real, pointer :: hydro_op1_auxc_d_p(:, :, :) + real, pointer :: FLX_1_d_p(:, :, :, :) + + ! Attach pointers + tile_lo_d_p => tile_lo_d(:, nblk) + tile_hi_d_p => tile_hi_d(:, nblk) + tile_deltas_d_p => tile_deltas_d(:, nblk) + CC_1_d_p => CC_1_d(:, :, :, :, nblk) + hydro_op1_auxc_d_p => hydro_op1_auxc_d(:, :, :, nblk) + FLX_1_d_p => FLX_1_d(:, :, :, :, nblk) + + ! Call subroutine + CALL Hydro_computeFluxesHll_X_gpu_oacc( & + dt_d, & + tile_lo_d_p, & + tile_hi_d_p, & + tile_deltas_d_p, & + CC_1_d_p, & + hydro_op1_auxc_d_p, & + FLX_1_d_p & + ) + + end subroutine wrapper_Hydro_computeFluxesHll_X_gpu_oacc + + subroutine wrapper_Hydro_computeFluxesHll_Y_gpu_oacc ( & + nblk, & + dt_d, & + tile_lo_d, & + tile_hi_d, & + tile_deltas_d, & + CC_1_d, & + hydro_op1_auxc_d, & + FLY_1_d & + ) + + use dr_cg_hydroAdvance_mod, ONLY: Hydro_computeFluxesHll_Y_gpu_oacc + + !$acc routine vector + !$acc routine (Hydro_computeFluxesHll_Y_gpu_oacc) vector + + implicit none + + ! Arguments + integer, intent(IN) :: nblk + real, target, intent(IN) :: dt_d + integer, target, intent(IN) :: tile_lo_d(:, :) + integer, target, intent(IN) :: tile_hi_d(:, :) + real, target, intent(IN) :: tile_deltas_d(:, :) + real, target, intent(INOUT) :: CC_1_d(:, :, :, :, :) + real, target, intent(INOUT) :: hydro_op1_auxc_d(:, :, :, :) + real, target, intent(OUT) :: FLY_1_d(:, :, :, :, :) + + ! Local variables + integer, pointer :: tile_lo_d_p(:) + integer, pointer :: tile_hi_d_p(:) + real, pointer :: tile_deltas_d_p(:) + real, pointer :: CC_1_d_p(:, :, :, :) + real, pointer :: hydro_op1_auxc_d_p(:, :, :) + real, pointer :: FLY_1_d_p(:, :, :, :) + + ! Attach pointers + tile_lo_d_p => tile_lo_d(:, nblk) + tile_hi_d_p => tile_hi_d(:, nblk) + tile_deltas_d_p => tile_deltas_d(:, nblk) + CC_1_d_p => CC_1_d(:, :, :, :, nblk) + hydro_op1_auxc_d_p => hydro_op1_auxc_d(:, :, :, nblk) + FLY_1_d_p => FLY_1_d(:, :, :, :, nblk) + + ! Call subroutine + CALL Hydro_computeFluxesHll_Y_gpu_oacc( & + dt_d, & + tile_lo_d_p, & + tile_hi_d_p, & + tile_deltas_d_p, & + CC_1_d_p, & + hydro_op1_auxc_d_p, & + FLY_1_d_p & + ) + + end subroutine wrapper_Hydro_computeFluxesHll_Y_gpu_oacc + + subroutine wrapper_Hydro_updateSolutionHll_gpu_oacc ( & + nblk, & + tile_lo_d, & + tile_hi_d, & + FLX_1_d, & + FLY_1_d, & + FLZ_1_d, & + CC_1_d & + ) + + use dr_cg_hydroAdvance_mod, ONLY: Hydro_updateSolutionHll_gpu_oacc + + !$acc routine vector + !$acc routine (Hydro_updateSolutionHll_gpu_oacc) vector + + implicit none + + ! Arguments + integer, intent(IN) :: nblk + integer, target, intent(IN) :: tile_lo_d(:, :) + integer, target, intent(IN) :: tile_hi_d(:, :) + real, target, intent(OUT) :: FLX_1_d(:, :, :, :, :) + real, target, intent(OUT) :: FLY_1_d(:, :, :, :, :) + real, target, intent(OUT) :: FLZ_1_d(:, :, :, :, :) + real, target, intent(INOUT) :: CC_1_d(:, :, :, :, :) + + ! Local variables + integer, pointer :: tile_lo_d_p(:) + integer, pointer :: tile_hi_d_p(:) + real, pointer :: FLX_1_d_p(:, :, :, :) + real, pointer :: FLY_1_d_p(:, :, :, :) + real, pointer :: FLZ_1_d_p(:, :, :, :) + real, pointer :: CC_1_d_p(:, :, :, :) + + ! Attach pointers + tile_lo_d_p => tile_lo_d(:, nblk) + tile_hi_d_p => tile_hi_d(:, nblk) + FLX_1_d_p => FLX_1_d(:, :, :, :, nblk) + FLY_1_d_p => FLY_1_d(:, :, :, :, nblk) + FLZ_1_d_p => FLZ_1_d(:, :, :, :, nblk) + CC_1_d_p => CC_1_d(:, :, :, :, nblk) + + ! Call subroutine + CALL Hydro_updateSolutionHll_gpu_oacc( & + tile_lo_d_p, & + tile_hi_d_p, & + FLX_1_d_p, & + FLY_1_d_p, & + FLZ_1_d_p, & + CC_1_d_p & + ) + + end subroutine wrapper_Hydro_updateSolutionHll_gpu_oacc + end module gpu_tf_hydroFC_mod diff --git a/tools/milhoja_pypkg/src/milhoja/tests/data/FlashX/REF_gpu_tf_hydro_2D.F90 b/tools/milhoja_pypkg/src/milhoja/tests/data/FlashX/REF_gpu_tf_hydro_2D.F90 index 3e07d151..7237d52a 100644 --- a/tools/milhoja_pypkg/src/milhoja/tests/data/FlashX/REF_gpu_tf_hydro_2D.F90 +++ b/tools/milhoja_pypkg/src/milhoja/tests/data/FlashX/REF_gpu_tf_hydro_2D.F90 @@ -41,16 +41,16 @@ subroutine gpu_tf_hydro_Fortran( & use dr_cg_hydroAdvance_mod, ONLY : Hydro_updateSolutionHll_gpu_oacc #ifndef SUPPRESS_ACC_ROUTINE_FOR_METH_IN_APP - !$acc routine (Hydro_computeSoundSpeedHll_gpu_oacc) vector + !$acc routine (wrapper_Hydro_computeSoundSpeedHll_gpu_oacc) vector #endif #ifndef SUPPRESS_ACC_ROUTINE_FOR_METH_IN_APP - !$acc routine (Hydro_computeFluxesHll_X_gpu_oacc) vector + !$acc routine (wrapper_Hydro_computeFluxesHll_X_gpu_oacc) vector #endif #ifndef SUPPRESS_ACC_ROUTINE_FOR_METH_IN_APP - !$acc routine (Hydro_computeFluxesHll_Y_gpu_oacc) vector + !$acc routine (wrapper_Hydro_computeFluxesHll_Y_gpu_oacc) vector #endif #ifndef SUPPRESS_ACC_ROUTINE_FOR_METH_IN_APP - !$acc routine (Hydro_updateSolutionHll_gpu_oacc) vector + !$acc routine (wrapper_Hydro_updateSolutionHll_gpu_oacc) vector #endif implicit none @@ -87,56 +87,60 @@ subroutine gpu_tf_hydro_Fortran( & !$acc parallel loop gang default(none) & !$acc& async(dataQ_h) do n = 1, nTiles_d - CALL Hydro_computeSoundSpeedHll_gpu_oacc( & - tile_lo_d(:, n), & - tile_hi_d(:, n), & - U_d(:, :, :, :, n), & - hydro_op1_auxc_d(:, :, :, n) & - ) + CALL wrapper_Hydro_computeSoundSpeedHll_gpu_oacc( & + n, & + tile_lo_d, & + tile_hi_d, & + U_d, & + hydro_op1_auxc_d & + ) end do !$acc end parallel loop !$acc parallel loop gang default(none) & !$acc& async(dataQ_h) do n = 1, nTiles_d - CALL Hydro_computeFluxesHll_X_gpu_oacc( & - dt_d, & - tile_lo_d(:, n), & - tile_hi_d(:, n), & - tile_deltas_d(:, n), & - U_d(:, :, :, :, n), & - hydro_op1_auxc_d(:, :, :, n), & - hydro_op1_flX_d(:, :, :, :, n) & - ) + CALL wrapper_Hydro_computeFluxesHll_X_gpu_oacc( & + n, & + dt_d, & + tile_lo_d, & + tile_hi_d, & + tile_deltas_d, & + U_d, & + hydro_op1_auxc_d, & + hydro_op1_flX_d & + ) end do !$acc end parallel loop !$acc parallel loop gang default(none) & !$acc& async(dataQ_h) do n = 1, nTiles_d - CALL Hydro_computeFluxesHll_Y_gpu_oacc( & + CALL wrapper_Hydro_computeFluxesHll_Y_gpu_oacc( & + n, & dt_d, & - tile_lo_d(:, n), & - tile_hi_d(:, n), & - tile_deltas_d(:, n), & - U_d(:, :, :, :, n), & - hydro_op1_auxc_d(:, :, :, n), & - hydro_op1_flY_d(:, :, :, :, n) & - ) + tile_lo_d, & + tile_hi_d, & + tile_deltas_d, & + U_d, & + hydro_op1_auxc_d, & + hydro_op1_flY_d & + ) end do !$acc end parallel loop !$acc parallel loop gang default(none) & !$acc& async(dataQ_h) do n = 1, nTiles_d - CALL Hydro_updateSolutionHll_gpu_oacc( & - tile_lo_d(:, n), & - tile_hi_d(:, n), & - hydro_op1_flX_d(:, :, :, :, n), & - hydro_op1_flY_d(:, :, :, :, n), & - hydro_op1_flZ_d(:, :, :, :, n), & - U_d(:, :, :, :, n) & - ) + CALL wrapper_Hydro_updateSolutionHll_gpu_oacc( & + n, & + tile_lo_d, & + tile_hi_d, & + hydro_op1_flX_d, & + hydro_op1_flY_d, & + hydro_op1_flZ_d, & + U_d & + ) end do !$acc end parallel loop @@ -147,5 +151,217 @@ subroutine gpu_tf_hydro_Fortran( & !$acc end data end subroutine gpu_tf_hydro_Fortran + subroutine wrapper_Hydro_computeSoundSpeedHll_gpu_oacc ( & + nblk, & + tile_lo_d, & + tile_hi_d, & + U_d, & + hydro_op1_auxc_d & + ) + + use dr_cg_hydroAdvance_mod, ONLY: Hydro_computeSoundSpeedHll_gpu_oacc + + !$acc routine vector + !$acc routine (Hydro_computeSoundSpeedHll_gpu_oacc) vector + + implicit none + + ! Arguments + integer, intent(IN) :: nblk + integer, target, intent(IN) :: tile_lo_d(:, :) + integer, target, intent(IN) :: tile_hi_d(:, :) + real, target, intent(INOUT) :: U_d(:, :, :, :, :) + real, target, intent(INOUT) :: hydro_op1_auxc_d(:, :, :, :) + + ! Local variables + integer, pointer :: tile_lo_d_p(:) + integer, pointer :: tile_hi_d_p(:) + real, pointer :: U_d_p(:, :, :, :) + real, pointer :: hydro_op1_auxc_d_p(:, :, :) + + ! Attach pointers + tile_lo_d_p => tile_lo_d(:, nblk) + tile_hi_d_p => tile_hi_d(:, nblk) + U_d_p => U_d(:, :, :, :, nblk) + hydro_op1_auxc_d_p => hydro_op1_auxc_d(:, :, :, nblk) + + ! Call subroutine + CALL Hydro_computeSoundSpeedHll_gpu_oacc( & + tile_lo_d_p, & + tile_hi_d_p, & + U_d_p, & + hydro_op1_auxc_d_p & + ) + + end subroutine wrapper_Hydro_computeSoundSpeedHll_gpu_oacc + + subroutine wrapper_Hydro_computeFluxesHll_X_gpu_oacc ( & + nblk, & + dt_d, & + tile_lo_d, & + tile_hi_d, & + tile_deltas_d, & + U_d, & + hydro_op1_auxc_d, & + hydro_op1_flX_d & + ) + + use dr_cg_hydroAdvance_mod, ONLY: Hydro_computeFluxesHll_X_gpu_oacc + + !$acc routine vector + !$acc routine (Hydro_computeFluxesHll_X_gpu_oacc) vector + + implicit none + + ! Arguments + integer, intent(IN) :: nblk + real, target, intent(IN) :: dt_d + integer, target, intent(IN) :: tile_lo_d(:, :) + integer, target, intent(IN) :: tile_hi_d(:, :) + real, target, intent(IN) :: tile_deltas_d(:, :) + real, target, intent(INOUT) :: U_d(:, :, :, :, :) + real, target, intent(INOUT) :: hydro_op1_auxc_d(:, :, :, :) + real, target, intent(INOUT) :: hydro_op1_flX_d(:, :, :, :, :) + + ! Local variables + integer, pointer :: tile_lo_d_p(:) + integer, pointer :: tile_hi_d_p(:) + real, pointer :: tile_deltas_d_p(:) + real, pointer :: U_d_p(:, :, :, :) + real, pointer :: hydro_op1_auxc_d_p(:, :, :) + real, pointer :: hydro_op1_flX_d_p(:, :, :, :) + + ! Attach pointers + tile_lo_d_p => tile_lo_d(:, nblk) + tile_hi_d_p => tile_hi_d(:, nblk) + tile_deltas_d_p => tile_deltas_d(:, nblk) + U_d_p => U_d(:, :, :, :, nblk) + hydro_op1_auxc_d_p => hydro_op1_auxc_d(:, :, :, nblk) + hydro_op1_flX_d_p => hydro_op1_flX_d(:, :, :, :, nblk) + + ! Call subroutine + CALL Hydro_computeFluxesHll_X_gpu_oacc( & + dt_d, & + tile_lo_d_p, & + tile_hi_d_p, & + tile_deltas_d_p, & + U_d_p, & + hydro_op1_auxc_d_p, & + hydro_op1_flX_d_p & + ) + + end subroutine wrapper_Hydro_computeFluxesHll_X_gpu_oacc + + subroutine wrapper_Hydro_computeFluxesHll_Y_gpu_oacc ( & + nblk, & + dt_d, & + tile_lo_d, & + tile_hi_d, & + tile_deltas_d, & + U_d, & + hydro_op1_auxc_d, & + hydro_op1_flY_d & + ) + + use dr_cg_hydroAdvance_mod, ONLY: Hydro_computeFluxesHll_Y_gpu_oacc + + !$acc routine vector + !$acc routine (Hydro_computeFluxesHll_Y_gpu_oacc) vector + + implicit none + + ! Arguments + integer, intent(IN) :: nblk + real, target, intent(IN) :: dt_d + integer, target, intent(IN) :: tile_lo_d(:, :) + integer, target, intent(IN) :: tile_hi_d(:, :) + real, target, intent(IN) :: tile_deltas_d(:, :) + real, target, intent(INOUT) :: U_d(:, :, :, :, :) + real, target, intent(INOUT) :: hydro_op1_auxc_d(:, :, :, :) + real, target, intent(INOUT) :: hydro_op1_flY_d(:, :, :, :, :) + + ! Local variables + integer, pointer :: tile_lo_d_p(:) + integer, pointer :: tile_hi_d_p(:) + real, pointer :: tile_deltas_d_p(:) + real, pointer :: U_d_p(:, :, :, :) + real, pointer :: hydro_op1_auxc_d_p(:, :, :) + real, pointer :: hydro_op1_flY_d_p(:, :, :, :) + + ! Attach pointers + tile_lo_d_p => tile_lo_d(:, nblk) + tile_hi_d_p => tile_hi_d(:, nblk) + tile_deltas_d_p => tile_deltas_d(:, nblk) + U_d_p => U_d(:, :, :, :, nblk) + hydro_op1_auxc_d_p => hydro_op1_auxc_d(:, :, :, nblk) + hydro_op1_flY_d_p => hydro_op1_flY_d(:, :, :, :, nblk) + + ! Call subroutine + CALL Hydro_computeFluxesHll_Y_gpu_oacc( & + dt_d, & + tile_lo_d_p, & + tile_hi_d_p, & + tile_deltas_d_p, & + U_d_p, & + hydro_op1_auxc_d_p, & + hydro_op1_flY_d_p & + ) + + end subroutine wrapper_Hydro_computeFluxesHll_Y_gpu_oacc + + subroutine wrapper_Hydro_updateSolutionHll_gpu_oacc ( & + nblk, & + tile_lo_d, & + tile_hi_d, & + hydro_op1_flX_d, & + hydro_op1_flY_d, & + hydro_op1_flZ_d, & + U_d & + ) + + use dr_cg_hydroAdvance_mod, ONLY: Hydro_updateSolutionHll_gpu_oacc + + !$acc routine vector + !$acc routine (Hydro_updateSolutionHll_gpu_oacc) vector + + implicit none + + ! Arguments + integer, intent(IN) :: nblk + integer, target, intent(IN) :: tile_lo_d(:, :) + integer, target, intent(IN) :: tile_hi_d(:, :) + real, target, intent(INOUT) :: hydro_op1_flX_d(:, :, :, :, :) + real, target, intent(INOUT) :: hydro_op1_flY_d(:, :, :, :, :) + real, target, intent(INOUT) :: hydro_op1_flZ_d(:, :, :, :, :) + real, target, intent(INOUT) :: U_d(:, :, :, :, :) + + ! Local variables + integer, pointer :: tile_lo_d_p(:) + integer, pointer :: tile_hi_d_p(:) + real, pointer :: hydro_op1_flX_d_p(:, :, :, :) + real, pointer :: hydro_op1_flY_d_p(:, :, :, :) + real, pointer :: hydro_op1_flZ_d_p(:, :, :, :) + real, pointer :: U_d_p(:, :, :, :) + + ! Attach pointers + tile_lo_d_p => tile_lo_d(:, nblk) + tile_hi_d_p => tile_hi_d(:, nblk) + hydro_op1_flX_d_p => hydro_op1_flX_d(:, :, :, :, nblk) + hydro_op1_flY_d_p => hydro_op1_flY_d(:, :, :, :, nblk) + hydro_op1_flZ_d_p => hydro_op1_flZ_d(:, :, :, :, nblk) + U_d_p => U_d(:, :, :, :, nblk) + + ! Call subroutine + CALL Hydro_updateSolutionHll_gpu_oacc( & + tile_lo_d_p, & + tile_hi_d_p, & + hydro_op1_flX_d_p, & + hydro_op1_flY_d_p, & + hydro_op1_flZ_d_p, & + U_d_p & + ) + + end subroutine wrapper_Hydro_updateSolutionHll_gpu_oacc + end module gpu_tf_hydro_mod diff --git a/tools/milhoja_pypkg/src/milhoja/tests/data/FlashX/REF_gpu_tf_hydro_3D.F90 b/tools/milhoja_pypkg/src/milhoja/tests/data/FlashX/REF_gpu_tf_hydro_3D.F90 index bc55bf19..0f948550 100644 --- a/tools/milhoja_pypkg/src/milhoja/tests/data/FlashX/REF_gpu_tf_hydro_3D.F90 +++ b/tools/milhoja_pypkg/src/milhoja/tests/data/FlashX/REF_gpu_tf_hydro_3D.F90 @@ -20,17 +20,17 @@ end subroutine gpu_tf_hydro_Cpp2C contains - subroutine gpu_tf_hydro_Fortran( & - C_packet_h, & - dataQ_h, & - queue2_h, & - queue3_h, & - nTiles_d, & + subroutine gpu_tf_hydro_Fortran( & + C_packet_h, & + dataQ_h, & + queue2_h, & + queue3_h, & + nTiles_d, & external_hydro_op1_dt_d, & - tile_deltas_d, & - tile_hi_d, & - tile_lo_d, & - CC_1_d, & + tile_deltas_d, & + tile_hi_d, & + tile_lo_d, & + CC_1_d, & scratch_hydro_op1_auxC_d, & scratch_hydro_op1_flX_d, & scratch_hydro_op1_flY_d, & @@ -54,19 +54,19 @@ subroutine gpu_tf_hydro_Fortran( & use dr_cg_hydroAdvance_mod, ONLY : Hydro_updateSolutionHll_gpu_oacc #ifndef SUPPRESS_ACC_ROUTINE_FOR_METH_IN_APP - !$acc routine (Hydro_computeSoundSpeedHll_gpu_oacc) vector + !$acc routine (wrapper_Hydro_computeSoundSpeedHll_gpu_oacc) vector #endif #ifndef SUPPRESS_ACC_ROUTINE_FOR_METH_IN_APP - !$acc routine (Hydro_computeFluxesHll_X_gpu_oacc) vector + !$acc routine (wrapper_Hydro_computeFluxesHll_X_gpu_oacc) vector #endif #ifndef SUPPRESS_ACC_ROUTINE_FOR_METH_IN_APP - !$acc routine (Hydro_computeFluxesHll_Y_gpu_oacc) vector + !$acc routine (wrapper_Hydro_computeFluxesHll_Y_gpu_oacc) vector #endif #ifndef SUPPRESS_ACC_ROUTINE_FOR_METH_IN_APP - !$acc routine (Hydro_computeFluxesHll_Z_gpu_oacc) vector + !$acc routine (wrapper_Hydro_computeFluxesHll_Z_gpu_oacc) vector #endif #ifndef SUPPRESS_ACC_ROUTINE_FOR_METH_IN_APP - !$acc routine (Hydro_updateSolutionHll_gpu_oacc) vector + !$acc routine (wrapper_Hydro_updateSolutionHll_gpu_oacc) vector #endif implicit none @@ -118,14 +118,15 @@ subroutine gpu_tf_hydro_Fortran( & !$acc parallel loop gang default(none) & !$acc& async(dataQ_h) do n = 1, nTiles_d - CALL Hydro_computeSoundSpeedHll_gpu_oacc( & - tile_lo_d(:, n), & - tile_hi_d(:, n), & - CC_1_d(:, :, :, :, n), & - lbdd_CC_1_d(:, n), & - scratch_hydro_op1_auxC_d(:, :, :, n), & - lbdd_scratch_hydro_op1_auxC_d(:, n) & - ) + CALL wrapper_Hydro_computeSoundSpeedHll_gpu_oacc( & + n, & + tile_lo_d, & + tile_hi_d, & + CC_1_d, & + lbdd_CC_1_d, & + scratch_hydro_op1_auxC_d, & + lbdd_scratch_hydro_op1_auxC_d & + ) end do !$acc end parallel loop @@ -134,17 +135,18 @@ subroutine gpu_tf_hydro_Fortran( & !$acc parallel loop gang default(none) & !$acc& async(dataQ_h) do n = 1, nTiles_d - CALL Hydro_computeFluxesHll_X_gpu_oacc( & - external_hydro_op1_dt_d, & - tile_lo_d(:, n), & - tile_hi_d(:, n), & - tile_deltas_d(:, n), & - CC_1_d(:, :, :, :, n), & - lbdd_CC_1_d(:, n), & - scratch_hydro_op1_auxC_d(:, :, :, n), & - lbdd_scratch_hydro_op1_auxC_d(:, n), & - scratch_hydro_op1_flX_d(:, :, :, :, n), & - lbdd_scratch_hydro_op1_flX_d(:, n) & + CALL wrapper_Hydro_computeFluxesHll_X_gpu_oacc( & + n, & + external_hydro_op1_dt_d, & + tile_lo_d, & + tile_hi_d, & + tile_deltas_d, & + CC_1_d, & + lbdd_CC_1_d, & + scratch_hydro_op1_auxC_d, & + lbdd_scratch_hydro_op1_auxC_d, & + scratch_hydro_op1_flX_d, & + lbdd_scratch_hydro_op1_flX_d & ) end do !$acc end parallel loop @@ -152,64 +154,67 @@ subroutine gpu_tf_hydro_Fortran( & !$acc parallel loop gang default(none) & !$acc& async(queue2_h) do n = 1, nTiles_d - CALL Hydro_computeFluxesHll_Y_gpu_oacc( & - external_hydro_op1_dt_d, & - tile_lo_d(:, n), & - tile_hi_d(:, n), & - tile_deltas_d(:, n), & - CC_1_d(:, :, :, :, n), & - lbdd_CC_1_d(:, n), & - scratch_hydro_op1_auxC_d(:, :, :, n), & - lbdd_scratch_hydro_op1_auxC_d(:, n), & - scratch_hydro_op1_flY_d(:, :, :, :, n), & - lbdd_scratch_hydro_op1_flY_d(:, n) & - ) + CALL wrapper_Hydro_computeFluxesHll_Y_gpu_oacc( & + n, & + external_hydro_op1_dt_d, & + tile_lo_d, & + tile_hi_d, & + tile_deltas_d, & + CC_1_d, & + lbdd_CC_1_d, & + scratch_hydro_op1_auxC_d, & + lbdd_scratch_hydro_op1_auxC_d, & + scratch_hydro_op1_flY_d, & + lbdd_scratch_hydro_op1_flY_d & + ) end do !$acc end parallel loop !$acc parallel loop gang default(none) & !$acc& async(queue3_h) do n = 1, nTiles_d - CALL Hydro_computeFluxesHll_Z_gpu_oacc( & + CALL wrapper_Hydro_computeFluxesHll_Z_gpu_oacc( & + n, & external_hydro_op1_dt_d, & - tile_lo_d(:, n), & - tile_hi_d(:, n), & - tile_deltas_d(:, n), & - CC_1_d(:, :, :, :, n), & - lbdd_CC_1_d(:, n), & - scratch_hydro_op1_auxC_d(:, :, :, n), & - lbdd_scratch_hydro_op1_auxC_d(:, n), & - scratch_hydro_op1_flZ_d(:, :, :, :, n), & - lbdd_scratch_hydro_op1_flZ_d(:, n) & - ) + tile_lo_d, & + tile_hi_d, & + tile_deltas_d, & + CC_1_d, & + lbdd_CC_1_d, & + scratch_hydro_op1_auxC_d, & + lbdd_scratch_hydro_op1_auxC_d, & + scratch_hydro_op1_flZ_d, & + lbdd_scratch_hydro_op1_flZ_d & + ) end do !$acc end parallel loop - !$acc wait( & - !$acc& queue2_h, & - !$acc& queue3_h & - !$acc& ) + !$acc wait( & + !$acc& queue2_h, & + !$acc& queue3_h & + !$acc& ) !$acc parallel loop gang default(none) & !$acc& async(dataQ_h) do n = 1, nTiles_d - CALL Hydro_updateSolutionHll_gpu_oacc( & - tile_lo_d(:, n), & - tile_hi_d(:, n), & - scratch_hydro_op1_flX_d(:, :, :, :, n), & - scratch_hydro_op1_flY_d(:, :, :, :, n), & - scratch_hydro_op1_flZ_d(:, :, :, :, n), & - lbdd_scratch_hydro_op1_flX_d(:, n), & - CC_1_d(:, :, :, :, n), & - lbdd_CC_1_d(:, n) & - ) + CALL wrapper_Hydro_updateSolutionHll_gpu_oacc( & + n, & + tile_lo_d, & + tile_hi_d, & + scratch_hydro_op1_flX_d, & + scratch_hydro_op1_flY_d, & + scratch_hydro_op1_flZ_d, & + lbdd_scratch_hydro_op1_flX_d, & + CC_1_d, & + lbdd_CC_1_d & + ) end do !$acc end parallel loop !$acc wait( & - !$acc& dataQ_h & - !$acc& ) - + !$acc& dataQ_h & + !$acc& ) + MH_idx = INT(2, kind=MILHOJA_INT) MH_ierr = release_gpu_tf_hydro_extra_queue_c(C_packet_h, MH_idx) if (MH_ierr /= MILHOJA_SUCCESS) then @@ -227,5 +232,339 @@ subroutine gpu_tf_hydro_Fortran( & !$acc end data end subroutine gpu_tf_hydro_Fortran + subroutine wrapper_Hydro_computeSoundSpeedHll_gpu_oacc ( & + nblk, & + tile_lo_d, & + tile_hi_d, & + CC_1_d, & + lbdd_CC_1_d, & + scratch_hydro_op1_auxC_d, & + lbdd_scratch_hydro_op1_auxC_d & + ) + + use dr_cg_hydroAdvance_mod, ONLY: Hydro_computeSoundSpeedHll_gpu_oacc + + !$acc routine vector + !$acc routine (Hydro_computeSoundSpeedHll_gpu_oacc) vector + + implicit none + + ! Arguments + integer, intent(IN) :: nblk + integer, target, intent(IN) :: tile_lo_d(:, :) + integer, target, intent(IN) :: tile_hi_d(:, :) + real, target, intent(INOUT) :: CC_1_d(:, :, :, :, :) + integer, target, intent(IN) :: lbdd_CC_1_d(:, :) + real, target, intent(INOUT) :: scratch_hydro_op1_auxC_d(:, :, :, :) + integer, target, intent(IN) :: lbdd_scratch_hydro_op1_auxC_d(:, :) + + ! Local variables + integer, pointer :: tile_lo_d_p(:) + integer, pointer :: tile_hi_d_p(:) + real, pointer :: CC_1_d_p(:, :, :, :) + integer, pointer :: lbdd_CC_1_d_p(:) + real, pointer :: scratch_hydro_op1_auxC_d_p(:, :, :) + integer, pointer :: lbdd_scratch_hydro_op1_auxC_d_p(:) + + ! Attach pointers + tile_lo_d_p => tile_lo_d(:, nblk) + tile_hi_d_p => tile_hi_d(:, nblk) + CC_1_d_p => CC_1_d(:, :, :, :, nblk) + lbdd_CC_1_d_p => lbdd_CC_1_d(:, nblk) + scratch_hydro_op1_auxC_d_p => scratch_hydro_op1_auxC_d(:, :, :, nblk) + lbdd_scratch_hydro_op1_auxC_d_p => lbdd_scratch_hydro_op1_auxC_d(:, nblk) + + ! Call subroutine + CALL Hydro_computeSoundSpeedHll_gpu_oacc( & + tile_lo_d_p, & + tile_hi_d_p, & + CC_1_d_p, & + lbdd_CC_1_d_p, & + scratch_hydro_op1_auxC_d_p, & + lbdd_scratch_hydro_op1_auxC_d_p & + ) + + end subroutine wrapper_Hydro_computeSoundSpeedHll_gpu_oacc + + subroutine wrapper_Hydro_computeFluxesHll_X_gpu_oacc ( & + nblk, & + external_hydro_op1_dt_d, & + tile_lo_d, & + tile_hi_d, & + tile_deltas_d, & + CC_1_d, & + lbdd_CC_1_d, & + scratch_hydro_op1_auxC_d, & + lbdd_scratch_hydro_op1_auxC_d, & + scratch_hydro_op1_flX_d, & + lbdd_scratch_hydro_op1_flX_d & + ) + + use dr_cg_hydroAdvance_mod, ONLY: Hydro_computeFluxesHll_X_gpu_oacc + + !$acc routine vector + !$acc routine (Hydro_computeFluxesHll_X_gpu_oacc) vector + + implicit none + + ! Arguments + integer, intent(IN) :: nblk + real, target, intent(IN) :: external_hydro_op1_dt_d + integer, target, intent(IN) :: tile_lo_d(:, :) + integer, target, intent(IN) :: tile_hi_d(:, :) + real, target, intent(IN) :: tile_deltas_d(:, :) + real, target, intent(INOUT) :: CC_1_d(:, :, :, :, :) + integer, target, intent(IN) :: lbdd_CC_1_d(:, :) + real, target, intent(INOUT) :: scratch_hydro_op1_auxC_d(:, :, :, :) + integer, target, intent(IN) :: lbdd_scratch_hydro_op1_auxC_d(:, :) + real, target, intent(INOUT) :: scratch_hydro_op1_flX_d(:, :, :, :, :) + integer, target, intent(IN) :: lbdd_scratch_hydro_op1_flX_d(:, :) + + ! Local variables + integer, pointer :: tile_lo_d_p(:) + integer, pointer :: tile_hi_d_p(:) + real, pointer :: tile_deltas_d_p(:) + real, pointer :: CC_1_d_p(:, :, :, :) + integer, pointer :: lbdd_CC_1_d_p(:) + real, pointer :: scratch_hydro_op1_auxC_d_p(:, :, :) + integer, pointer :: lbdd_scratch_hydro_op1_auxC_d_p(:) + real, pointer :: scratch_hydro_op1_flX_d_p(:, :, :, :) + integer, pointer :: lbdd_scratch_hydro_op1_flX_d_p(:) + + ! Attach pointers + tile_lo_d_p => tile_lo_d(:, nblk) + tile_hi_d_p => tile_hi_d(:, nblk) + tile_deltas_d_p => tile_deltas_d(:, nblk) + CC_1_d_p => CC_1_d(:, :, :, :, nblk) + lbdd_CC_1_d_p => lbdd_CC_1_d(:, nblk) + scratch_hydro_op1_auxC_d_p => scratch_hydro_op1_auxC_d(:, :, :, nblk) + lbdd_scratch_hydro_op1_auxC_d_p => lbdd_scratch_hydro_op1_auxC_d(:, nblk) + scratch_hydro_op1_flX_d_p => scratch_hydro_op1_flX_d(:, :, :, :, nblk) + lbdd_scratch_hydro_op1_flX_d_p => lbdd_scratch_hydro_op1_flX_d(:, nblk) + + ! Call subroutine + CALL Hydro_computeFluxesHll_X_gpu_oacc( & + external_hydro_op1_dt_d, & + tile_lo_d_p, & + tile_hi_d_p, & + tile_deltas_d_p, & + CC_1_d_p, & + lbdd_CC_1_d_p, & + scratch_hydro_op1_auxC_d_p, & + lbdd_scratch_hydro_op1_auxC_d_p, & + scratch_hydro_op1_flX_d_p, & + lbdd_scratch_hydro_op1_flX_d_p & + ) + + end subroutine wrapper_Hydro_computeFluxesHll_X_gpu_oacc + + subroutine wrapper_Hydro_computeFluxesHll_Y_gpu_oacc ( & + nblk, & + external_hydro_op1_dt_d, & + tile_lo_d, & + tile_hi_d, & + tile_deltas_d, & + CC_1_d, & + lbdd_CC_1_d, & + scratch_hydro_op1_auxC_d, & + lbdd_scratch_hydro_op1_auxC_d, & + scratch_hydro_op1_flY_d, & + lbdd_scratch_hydro_op1_flY_d & + ) + + use dr_cg_hydroAdvance_mod, ONLY: Hydro_computeFluxesHll_Y_gpu_oacc + + !$acc routine vector + !$acc routine (Hydro_computeFluxesHll_Y_gpu_oacc) vector + + implicit none + + ! Arguments + integer, intent(IN) :: nblk + real, target, intent(IN) :: external_hydro_op1_dt_d + integer, target, intent(IN) :: tile_lo_d(:, :) + integer, target, intent(IN) :: tile_hi_d(:, :) + real, target, intent(IN) :: tile_deltas_d(:, :) + real, target, intent(INOUT) :: CC_1_d(:, :, :, :, :) + integer, target, intent(IN) :: lbdd_CC_1_d(:, :) + real, target, intent(INOUT) :: scratch_hydro_op1_auxC_d(:, :, :, :) + integer, target, intent(IN) :: lbdd_scratch_hydro_op1_auxC_d(:, :) + real, target, intent(INOUT) :: scratch_hydro_op1_flY_d(:, :, :, :, :) + integer, target, intent(IN) :: lbdd_scratch_hydro_op1_flY_d(:, :) + + ! Local variables + integer, pointer :: tile_lo_d_p(:) + integer, pointer :: tile_hi_d_p(:) + real, pointer :: tile_deltas_d_p(:) + real, pointer :: CC_1_d_p(:, :, :, :) + integer, pointer :: lbdd_CC_1_d_p(:) + real, pointer :: scratch_hydro_op1_auxC_d_p(:, :, :) + integer, pointer :: lbdd_scratch_hydro_op1_auxC_d_p(:) + real, pointer :: scratch_hydro_op1_flY_d_p(:, :, :, :) + integer, pointer :: lbdd_scratch_hydro_op1_flY_d_p(:) + + ! Attach pointers + tile_lo_d_p => tile_lo_d(:, nblk) + tile_hi_d_p => tile_hi_d(:, nblk) + tile_deltas_d_p => tile_deltas_d(:, nblk) + CC_1_d_p => CC_1_d(:, :, :, :, nblk) + lbdd_CC_1_d_p => lbdd_CC_1_d(:, nblk) + scratch_hydro_op1_auxC_d_p => scratch_hydro_op1_auxC_d(:, :, :, nblk) + lbdd_scratch_hydro_op1_auxC_d_p => lbdd_scratch_hydro_op1_auxC_d(:, nblk) + scratch_hydro_op1_flY_d_p => scratch_hydro_op1_flY_d(:, :, :, :, nblk) + lbdd_scratch_hydro_op1_flY_d_p => lbdd_scratch_hydro_op1_flY_d(:, nblk) + + ! Call subroutine + CALL Hydro_computeFluxesHll_Y_gpu_oacc( & + external_hydro_op1_dt_d, & + tile_lo_d_p, & + tile_hi_d_p, & + tile_deltas_d_p, & + CC_1_d_p, & + lbdd_CC_1_d_p, & + scratch_hydro_op1_auxC_d_p, & + lbdd_scratch_hydro_op1_auxC_d_p, & + scratch_hydro_op1_flY_d_p, & + lbdd_scratch_hydro_op1_flY_d_p & + ) + + end subroutine wrapper_Hydro_computeFluxesHll_Y_gpu_oacc + + subroutine wrapper_Hydro_computeFluxesHll_Z_gpu_oacc ( & + nblk, & + external_hydro_op1_dt_d, & + tile_lo_d, & + tile_hi_d, & + tile_deltas_d, & + CC_1_d, & + lbdd_CC_1_d, & + scratch_hydro_op1_auxC_d, & + lbdd_scratch_hydro_op1_auxC_d, & + scratch_hydro_op1_flZ_d, & + lbdd_scratch_hydro_op1_flZ_d & + ) + + use dr_cg_hydroAdvance_mod, ONLY: Hydro_computeFluxesHll_Z_gpu_oacc + + !$acc routine vector + !$acc routine (Hydro_computeFluxesHll_Z_gpu_oacc) vector + + implicit none + + ! Arguments + integer, intent(IN) :: nblk + real, target, intent(IN) :: external_hydro_op1_dt_d + integer, target, intent(IN) :: tile_lo_d(:, :) + integer, target, intent(IN) :: tile_hi_d(:, :) + real, target, intent(IN) :: tile_deltas_d(:, :) + real, target, intent(INOUT) :: CC_1_d(:, :, :, :, :) + integer, target, intent(IN) :: lbdd_CC_1_d(:, :) + real, target, intent(INOUT) :: scratch_hydro_op1_auxC_d(:, :, :, :) + integer, target, intent(IN) :: lbdd_scratch_hydro_op1_auxC_d(:, :) + real, target, intent(INOUT) :: scratch_hydro_op1_flZ_d(:, :, :, :, :) + integer, target, intent(IN) :: lbdd_scratch_hydro_op1_flZ_d(:, :) + + ! Local variables + integer, pointer :: tile_lo_d_p(:) + integer, pointer :: tile_hi_d_p(:) + real, pointer :: tile_deltas_d_p(:) + real, pointer :: CC_1_d_p(:, :, :, :) + integer, pointer :: lbdd_CC_1_d_p(:) + real, pointer :: scratch_hydro_op1_auxC_d_p(:, :, :) + integer, pointer :: lbdd_scratch_hydro_op1_auxC_d_p(:) + real, pointer :: scratch_hydro_op1_flZ_d_p(:, :, :, :) + integer, pointer :: lbdd_scratch_hydro_op1_flZ_d_p(:) + + ! Attach pointers + tile_lo_d_p => tile_lo_d(:, nblk) + tile_hi_d_p => tile_hi_d(:, nblk) + tile_deltas_d_p => tile_deltas_d(:, nblk) + CC_1_d_p => CC_1_d(:, :, :, :, nblk) + lbdd_CC_1_d_p => lbdd_CC_1_d(:, nblk) + scratch_hydro_op1_auxC_d_p => scratch_hydro_op1_auxC_d(:, :, :, nblk) + lbdd_scratch_hydro_op1_auxC_d_p => lbdd_scratch_hydro_op1_auxC_d(:, nblk) + scratch_hydro_op1_flZ_d_p => scratch_hydro_op1_flZ_d(:, :, :, :, nblk) + lbdd_scratch_hydro_op1_flZ_d_p => lbdd_scratch_hydro_op1_flZ_d(:, nblk) + + ! Call subroutine + CALL Hydro_computeFluxesHll_Z_gpu_oacc( & + external_hydro_op1_dt_d, & + tile_lo_d_p, & + tile_hi_d_p, & + tile_deltas_d_p, & + CC_1_d_p, & + lbdd_CC_1_d_p, & + scratch_hydro_op1_auxC_d_p, & + lbdd_scratch_hydro_op1_auxC_d_p, & + scratch_hydro_op1_flZ_d_p, & + lbdd_scratch_hydro_op1_flZ_d_p & + ) + + end subroutine wrapper_Hydro_computeFluxesHll_Z_gpu_oacc + + subroutine wrapper_Hydro_updateSolutionHll_gpu_oacc ( & + nblk, & + tile_lo_d, & + tile_hi_d, & + scratch_hydro_op1_flX_d, & + scratch_hydro_op1_flY_d, & + scratch_hydro_op1_flZ_d, & + lbdd_scratch_hydro_op1_flX_d, & + CC_1_d, & + lbdd_CC_1_d & + ) + + use dr_cg_hydroAdvance_mod, ONLY: Hydro_updateSolutionHll_gpu_oacc + + !$acc routine vector + !$acc routine (Hydro_updateSolutionHll_gpu_oacc) vector + + implicit none + + ! Arguments + integer, intent(IN) :: nblk + integer, target, intent(IN) :: tile_lo_d(:, :) + integer, target, intent(IN) :: tile_hi_d(:, :) + real, target, intent(INOUT) :: scratch_hydro_op1_flX_d(:, :, :, :, :) + real, target, intent(INOUT) :: scratch_hydro_op1_flY_d(:, :, :, :, :) + real, target, intent(INOUT) :: scratch_hydro_op1_flZ_d(:, :, :, :, :) + integer, target, intent(IN) :: lbdd_scratch_hydro_op1_flX_d(:, :) + real, target, intent(INOUT) :: CC_1_d(:, :, :, :, :) + integer, target, intent(IN) :: lbdd_CC_1_d(:, :) + + ! Local variables + integer, pointer :: tile_lo_d_p(:) + integer, pointer :: tile_hi_d_p(:) + real, pointer :: scratch_hydro_op1_flX_d_p(:, :, :, :) + real, pointer :: scratch_hydro_op1_flY_d_p(:, :, :, :) + real, pointer :: scratch_hydro_op1_flZ_d_p(:, :, :, :) + integer, pointer :: lbdd_scratch_hydro_op1_flX_d_p(:) + real, pointer :: CC_1_d_p(:, :, :, :) + integer, pointer :: lbdd_CC_1_d_p(:) + + ! Attach pointers + tile_lo_d_p => tile_lo_d(:, nblk) + tile_hi_d_p => tile_hi_d(:, nblk) + scratch_hydro_op1_flX_d_p => scratch_hydro_op1_flX_d(:, :, :, :, nblk) + scratch_hydro_op1_flY_d_p => scratch_hydro_op1_flY_d(:, :, :, :, nblk) + scratch_hydro_op1_flZ_d_p => scratch_hydro_op1_flZ_d(:, :, :, :, nblk) + lbdd_scratch_hydro_op1_flX_d_p => lbdd_scratch_hydro_op1_flX_d(:, nblk) + CC_1_d_p => CC_1_d(:, :, :, :, nblk) + lbdd_CC_1_d_p => lbdd_CC_1_d(:, nblk) + + ! Call subroutine + CALL Hydro_updateSolutionHll_gpu_oacc( & + tile_lo_d_p, & + tile_hi_d_p, & + scratch_hydro_op1_flX_d_p, & + scratch_hydro_op1_flY_d_p, & + scratch_hydro_op1_flZ_d_p, & + lbdd_scratch_hydro_op1_flX_d_p, & + CC_1_d_p, & + lbdd_CC_1_d_p & + ) + + end subroutine wrapper_Hydro_updateSolutionHll_gpu_oacc + end module gpu_tf_hydro_mod diff --git a/tools/milhoja_pypkg/src/milhoja/tests/data/Sedov/REF_gpu_tf_test2_3D.F90 b/tools/milhoja_pypkg/src/milhoja/tests/data/Sedov/REF_gpu_tf_test2_3D.F90 index 6d78c7a7..e248030a 100644 --- a/tools/milhoja_pypkg/src/milhoja/tests/data/Sedov/REF_gpu_tf_test2_3D.F90 +++ b/tools/milhoja_pypkg/src/milhoja/tests/data/Sedov/REF_gpu_tf_test2_3D.F90 @@ -48,16 +48,16 @@ subroutine gpu_tf_test2_Fortran( & use dr_cg_hydroAdvance_mod, ONLY : Hydro_computeFluxesHll_Z_gpu_oacc #ifndef SUPPRESS_ACC_ROUTINE_FOR_METH_IN_APP - !$acc routine (Hydro_computeSoundSpeedHll_gpu_oacc) vector + !$acc routine (wrapper_Hydro_computeSoundSpeedHll_gpu_oacc) vector #endif #ifndef SUPPRESS_ACC_ROUTINE_FOR_METH_IN_APP - !$acc routine (Hydro_computeFluxesHll_X_gpu_oacc) vector + !$acc routine (wrapper_Hydro_computeFluxesHll_X_gpu_oacc) vector #endif #ifndef SUPPRESS_ACC_ROUTINE_FOR_METH_IN_APP - !$acc routine (Hydro_computeFluxesHll_Y_gpu_oacc) vector + !$acc routine (wrapper_Hydro_computeFluxesHll_Y_gpu_oacc) vector #endif #ifndef SUPPRESS_ACC_ROUTINE_FOR_METH_IN_APP - !$acc routine (Hydro_computeFluxesHll_Z_gpu_oacc) vector + !$acc routine (wrapper_Hydro_computeFluxesHll_Z_gpu_oacc) vector #endif implicit none @@ -99,12 +99,13 @@ subroutine gpu_tf_test2_Fortran( & !$acc parallel loop gang default(none) & !$acc& async(dataQ_h) do n = 1, nTiles_d - CALL Hydro_computeSoundSpeedHll_gpu_oacc( & - tile_lo_d(:, n), & - tile_hi_d(:, n), & - CC_1_d(:, :, :, :, n), & - hydro_op1_auxc_d(:, :, :, n) & - ) + CALL wrapper_Hydro_computeSoundSpeedHll_gpu_oacc( & + n, & + tile_lo_d, & + tile_hi_d, & + CC_1_d, & + hydro_op1_auxc_d & + ) end do !$acc end parallel loop @@ -113,45 +114,48 @@ subroutine gpu_tf_test2_Fortran( & !$acc parallel loop gang default(none) & !$acc& async(dataQ_h) do n = 1, nTiles_d - CALL Hydro_computeFluxesHll_X_gpu_oacc( & + CALL wrapper_Hydro_computeFluxesHll_X_gpu_oacc( & + n, & dt_d, & - tile_lo_d(:, n), & - tile_hi_d(:, n), & - tile_deltas_d(:, n), & - CC_1_d(:, :, :, :, n), & - hydro_op1_auxc_d(:, :, :, n), & - hydro_op1_flX_d(:, :, :, :, n) & - ) + tile_lo_d, & + tile_hi_d, & + tile_deltas_d, & + CC_1_d, & + hydro_op1_auxc_d, & + hydro_op1_flX_d & + ) end do !$acc end parallel loop !$acc parallel loop gang default(none) & !$acc& async(queue2_h) do n = 1, nTiles_d - CALL Hydro_computeFluxesHll_Y_gpu_oacc( & + CALL wrapper_Hydro_computeFluxesHll_Y_gpu_oacc( & + n, & dt_d, & - tile_lo_d(:, n), & - tile_hi_d(:, n), & - tile_deltas_d(:, n), & - CC_1_d(:, :, :, :, n), & - hydro_op1_auxc_d(:, :, :, n), & - hydro_op1_flY_d(:, :, :, :, n) & - ) + tile_lo_d, & + tile_hi_d, & + tile_deltas_d, & + CC_1_d, & + hydro_op1_auxc_d, & + hydro_op1_flY_d & + ) end do !$acc end parallel loop !$acc parallel loop gang default(none) & !$acc& async(queue3_h) do n = 1, nTiles_d - CALL Hydro_computeFluxesHll_Z_gpu_oacc( & + CALL wrapper_Hydro_computeFluxesHll_Z_gpu_oacc( & + n, & dt_d, & - tile_lo_d(:, n), & - tile_hi_d(:, n), & - tile_deltas_d(:, n), & - CC_1_d(:, :, :, :, n), & - hydro_op1_auxc_d(:, :, :, n), & - hydro_op1_flZ_d(:, :, :, :, n) & - ) + tile_lo_d, & + tile_hi_d, & + tile_deltas_d, & + CC_1_d, & + hydro_op1_auxc_d, & + hydro_op1_flZ_d & + ) end do !$acc end parallel loop @@ -178,5 +182,220 @@ subroutine gpu_tf_test2_Fortran( & !$acc end data end subroutine gpu_tf_test2_Fortran + subroutine wrapper_Hydro_computeSoundSpeedHll_gpu_oacc ( & + nblk, & + tile_lo_d, & + tile_hi_d, & + CC_1_d, & + hydro_op1_auxc_d & + ) + + use dr_cg_hydroAdvance_mod, ONLY: Hydro_computeSoundSpeedHll_gpu_oacc + + !$acc routine vector + !$acc routine (Hydro_computeSoundSpeedHll_gpu_oacc) vector + + implicit none + + ! Arguments + integer, intent(IN) :: nblk + integer, target, intent(IN) :: tile_lo_d(:, :) + integer, target, intent(IN) :: tile_hi_d(:, :) + real, target, intent(INOUT) :: CC_1_d(:, :, :, :, :) + real, target, intent(INOUT) :: hydro_op1_auxc_d(:, :, :, :) + + ! Local variables + integer, pointer :: tile_lo_d_p(:) + integer, pointer :: tile_hi_d_p(:) + real, pointer :: CC_1_d_p(:, :, :, :) + real, pointer :: hydro_op1_auxc_d_p(:, :, :) + + ! Attach pointers + tile_lo_d_p => tile_lo_d(:, nblk) + tile_hi_d_p => tile_hi_d(:, nblk) + CC_1_d_p => CC_1_d(:, :, :, :, nblk) + hydro_op1_auxc_d_p => hydro_op1_auxc_d(:, :, :, nblk) + + ! Call subroutine + CALL Hydro_computeSoundSpeedHll_gpu_oacc( & + tile_lo_d_p, & + tile_hi_d_p, & + CC_1_d_p, & + hydro_op1_auxc_d_p & + ) + + end subroutine wrapper_Hydro_computeSoundSpeedHll_gpu_oacc + + subroutine wrapper_Hydro_computeFluxesHll_X_gpu_oacc ( & + nblk, & + dt_d, & + tile_lo_d, & + tile_hi_d, & + tile_deltas_d, & + CC_1_d, & + hydro_op1_auxc_d, & + hydro_op1_flX_d & + ) + + use dr_cg_hydroAdvance_mod, ONLY: Hydro_computeFluxesHll_X_gpu_oacc + + !$acc routine vector + !$acc routine (Hydro_computeFluxesHll_X_gpu_oacc) vector + + implicit none + + ! Arguments + integer, intent(IN) :: nblk + real, target, intent(IN) :: dt_d + integer, target, intent(IN) :: tile_lo_d(:, :) + integer, target, intent(IN) :: tile_hi_d(:, :) + real, target, intent(IN) :: tile_deltas_d(:, :) + real, target, intent(INOUT) :: CC_1_d(:, :, :, :, :) + real, target, intent(INOUT) :: hydro_op1_auxc_d(:, :, :, :) + real, target, intent(INOUT) :: hydro_op1_flX_d(:, :, :, :, :) + + ! Local variables + integer, pointer :: tile_lo_d_p(:) + integer, pointer :: tile_hi_d_p(:) + real, pointer :: tile_deltas_d_p(:) + real, pointer :: CC_1_d_p(:, :, :, :) + real, pointer :: hydro_op1_auxc_d_p(:, :, :) + real, pointer :: hydro_op1_flX_d_p(:, :, :, :) + + ! Attach pointers + tile_lo_d_p => tile_lo_d(:, nblk) + tile_hi_d_p => tile_hi_d(:, nblk) + tile_deltas_d_p => tile_deltas_d(:, nblk) + CC_1_d_p => CC_1_d(:, :, :, :, nblk) + hydro_op1_auxc_d_p => hydro_op1_auxc_d(:, :, :, nblk) + hydro_op1_flX_d_p => hydro_op1_flX_d(:, :, :, :, nblk) + + ! Call subroutine + CALL Hydro_computeFluxesHll_X_gpu_oacc( & + dt_d, & + tile_lo_d_p, & + tile_hi_d_p, & + tile_deltas_d_p, & + CC_1_d_p, & + hydro_op1_auxc_d_p, & + hydro_op1_flX_d_p & + ) + + end subroutine wrapper_Hydro_computeFluxesHll_X_gpu_oacc + + subroutine wrapper_Hydro_computeFluxesHll_Y_gpu_oacc ( & + nblk, & + dt_d, & + tile_lo_d, & + tile_hi_d, & + tile_deltas_d, & + CC_1_d, & + hydro_op1_auxc_d, & + hydro_op1_flY_d & + ) + + use dr_cg_hydroAdvance_mod, ONLY: Hydro_computeFluxesHll_Y_gpu_oacc + + !$acc routine vector + !$acc routine (Hydro_computeFluxesHll_Y_gpu_oacc) vector + + implicit none + + ! Arguments + integer, intent(IN) :: nblk + real, target, intent(IN) :: dt_d + integer, target, intent(IN) :: tile_lo_d(:, :) + integer, target, intent(IN) :: tile_hi_d(:, :) + real, target, intent(IN) :: tile_deltas_d(:, :) + real, target, intent(INOUT) :: CC_1_d(:, :, :, :, :) + real, target, intent(INOUT) :: hydro_op1_auxc_d(:, :, :, :) + real, target, intent(INOUT) :: hydro_op1_flY_d(:, :, :, :, :) + + ! Local variables + integer, pointer :: tile_lo_d_p(:) + integer, pointer :: tile_hi_d_p(:) + real, pointer :: tile_deltas_d_p(:) + real, pointer :: CC_1_d_p(:, :, :, :) + real, pointer :: hydro_op1_auxc_d_p(:, :, :) + real, pointer :: hydro_op1_flY_d_p(:, :, :, :) + + ! Attach pointers + tile_lo_d_p => tile_lo_d(:, nblk) + tile_hi_d_p => tile_hi_d(:, nblk) + tile_deltas_d_p => tile_deltas_d(:, nblk) + CC_1_d_p => CC_1_d(:, :, :, :, nblk) + hydro_op1_auxc_d_p => hydro_op1_auxc_d(:, :, :, nblk) + hydro_op1_flY_d_p => hydro_op1_flY_d(:, :, :, :, nblk) + + ! Call subroutine + CALL Hydro_computeFluxesHll_Y_gpu_oacc( & + dt_d, & + tile_lo_d_p, & + tile_hi_d_p, & + tile_deltas_d_p, & + CC_1_d_p, & + hydro_op1_auxc_d_p, & + hydro_op1_flY_d_p & + ) + + end subroutine wrapper_Hydro_computeFluxesHll_Y_gpu_oacc + + subroutine wrapper_Hydro_computeFluxesHll_Z_gpu_oacc ( & + nblk, & + dt_d, & + tile_lo_d, & + tile_hi_d, & + tile_deltas_d, & + CC_1_d, & + hydro_op1_auxc_d, & + hydro_op1_flZ_d & + ) + + use dr_cg_hydroAdvance_mod, ONLY: Hydro_computeFluxesHll_Z_gpu_oacc + + !$acc routine vector + !$acc routine (Hydro_computeFluxesHll_Z_gpu_oacc) vector + + implicit none + + ! Arguments + integer, intent(IN) :: nblk + real, target, intent(IN) :: dt_d + integer, target, intent(IN) :: tile_lo_d(:, :) + integer, target, intent(IN) :: tile_hi_d(:, :) + real, target, intent(IN) :: tile_deltas_d(:, :) + real, target, intent(INOUT) :: CC_1_d(:, :, :, :, :) + real, target, intent(INOUT) :: hydro_op1_auxc_d(:, :, :, :) + real, target, intent(INOUT) :: hydro_op1_flZ_d(:, :, :, :, :) + + ! Local variables + integer, pointer :: tile_lo_d_p(:) + integer, pointer :: tile_hi_d_p(:) + real, pointer :: tile_deltas_d_p(:) + real, pointer :: CC_1_d_p(:, :, :, :) + real, pointer :: hydro_op1_auxc_d_p(:, :, :) + real, pointer :: hydro_op1_flZ_d_p(:, :, :, :) + + ! Attach pointers + tile_lo_d_p => tile_lo_d(:, nblk) + tile_hi_d_p => tile_hi_d(:, nblk) + tile_deltas_d_p => tile_deltas_d(:, nblk) + CC_1_d_p => CC_1_d(:, :, :, :, nblk) + hydro_op1_auxc_d_p => hydro_op1_auxc_d(:, :, :, nblk) + hydro_op1_flZ_d_p => hydro_op1_flZ_d(:, :, :, :, nblk) + + ! Call subroutine + CALL Hydro_computeFluxesHll_Z_gpu_oacc( & + dt_d, & + tile_lo_d_p, & + tile_hi_d_p, & + tile_deltas_d_p, & + CC_1_d_p, & + hydro_op1_auxc_d_p, & + hydro_op1_flZ_d_p & + ) + + end subroutine wrapper_Hydro_computeFluxesHll_Z_gpu_oacc + end module gpu_tf_test2_mod diff --git a/tools/milhoja_pypkg/src/milhoja/tests/data/Sedov/REF_gpu_tf_test_3D.F90 b/tools/milhoja_pypkg/src/milhoja/tests/data/Sedov/REF_gpu_tf_test_3D.F90 index ea8881b3..0cca5fb9 100644 --- a/tools/milhoja_pypkg/src/milhoja/tests/data/Sedov/REF_gpu_tf_test_3D.F90 +++ b/tools/milhoja_pypkg/src/milhoja/tests/data/Sedov/REF_gpu_tf_test_3D.F90 @@ -48,16 +48,16 @@ subroutine gpu_tf_test_Fortran( & use dr_cg_hydroAdvance_mod, ONLY : Hydro_updateSolutionHll_gpu_oacc #ifndef SUPPRESS_ACC_ROUTINE_FOR_METH_IN_APP - !$acc routine (Hydro_computeFluxesHll_X_gpu_oacc) vector + !$acc routine (wrapper_Hydro_computeFluxesHll_X_gpu_oacc) vector #endif #ifndef SUPPRESS_ACC_ROUTINE_FOR_METH_IN_APP - !$acc routine (Hydro_computeFluxesHll_Y_gpu_oacc) vector + !$acc routine (wrapper_Hydro_computeFluxesHll_Y_gpu_oacc) vector #endif #ifndef SUPPRESS_ACC_ROUTINE_FOR_METH_IN_APP - !$acc routine (Hydro_computeFluxesHll_Z_gpu_oacc) vector + !$acc routine (wrapper_Hydro_computeFluxesHll_Z_gpu_oacc) vector #endif #ifndef SUPPRESS_ACC_ROUTINE_FOR_METH_IN_APP - !$acc routine (Hydro_updateSolutionHll_gpu_oacc) vector + !$acc routine (wrapper_Hydro_updateSolutionHll_gpu_oacc) vector #endif implicit none @@ -101,45 +101,48 @@ subroutine gpu_tf_test_Fortran( & !$acc parallel loop gang default(none) & !$acc& async(dataQ_h) do n = 1, nTiles_d - CALL Hydro_computeFluxesHll_X_gpu_oacc( & + CALL wrapper_Hydro_computeFluxesHll_X_gpu_oacc( & + n, & dt_d, & - tile_lo_d(:, n), & - tile_hi_d(:, n), & - tile_deltas_d(:, n), & - CC_1_d(:, :, :, :, n), & - hydro_op1_auxc_d(:, :, :, n), & - hydro_op1_flX_d(:, :, :, :, n) & - ) + tile_lo_d, & + tile_hi_d, & + tile_deltas_d, & + CC_1_d, & + hydro_op1_auxc_d, & + hydro_op1_flX_d & + ) end do !$acc end parallel loop !$acc parallel loop gang default(none) & !$acc& async(queue2_h) do n = 1, nTiles_d - CALL Hydro_computeFluxesHll_Y_gpu_oacc( & + CALL wrapper_Hydro_computeFluxesHll_Y_gpu_oacc( & + n, & dt_d, & - tile_lo_d(:, n), & - tile_hi_d(:, n), & - tile_deltas_d(:, n), & - CC_1_d(:, :, :, :, n), & - hydro_op1_auxc_d(:, :, :, n), & - hydro_op1_flY_d(:, :, :, :, n) & - ) + tile_lo_d, & + tile_hi_d, & + tile_deltas_d, & + CC_1_d, & + hydro_op1_auxc_d, & + hydro_op1_flY_d & + ) end do !$acc end parallel loop !$acc parallel loop gang default(none) & !$acc& async(queue3_h) do n = 1, nTiles_d - CALL Hydro_computeFluxesHll_Z_gpu_oacc( & + CALL wrapper_Hydro_computeFluxesHll_Z_gpu_oacc( & + n, & dt_d, & - tile_lo_d(:, n), & - tile_hi_d(:, n), & - tile_deltas_d(:, n), & - CC_1_d(:, :, :, :, n), & - hydro_op1_auxc_d(:, :, :, n), & - hydro_op1_flZ_d(:, :, :, :, n) & - ) + tile_lo_d, & + tile_hi_d, & + tile_deltas_d, & + CC_1_d, & + hydro_op1_auxc_d, & + hydro_op1_flZ_d & + ) end do !$acc end parallel loop @@ -151,14 +154,15 @@ subroutine gpu_tf_test_Fortran( & !$acc parallel loop gang default(none) & !$acc& async(dataQ_h) do n = 1, nTiles_d - CALL Hydro_updateSolutionHll_gpu_oacc( & - tile_lo_d(:, n), & - tile_hi_d(:, n), & - hydro_op1_flX_d(:, :, :, :, n), & - hydro_op1_flY_d(:, :, :, :, n), & - hydro_op1_flZ_d(:, :, :, :, n), & - CC_1_d(:, :, :, :, n) & - ) + CALL wrapper_Hydro_updateSolutionHll_gpu_oacc( & + n, & + tile_lo_d, & + tile_hi_d, & + hydro_op1_flX_d, & + hydro_op1_flY_d, & + hydro_op1_flZ_d, & + CC_1_d & + ) end do !$acc end parallel loop @@ -183,5 +187,230 @@ subroutine gpu_tf_test_Fortran( & !$acc end data end subroutine gpu_tf_test_Fortran + subroutine wrapper_Hydro_computeFluxesHll_X_gpu_oacc ( & + nblk, & + dt_d, & + tile_lo_d, & + tile_hi_d, & + tile_deltas_d, & + CC_1_d, & + hydro_op1_auxc_d, & + hydro_op1_flX_d & + ) + + use dr_cg_hydroAdvance_mod, ONLY: Hydro_computeFluxesHll_X_gpu_oacc + + !$acc routine vector + !$acc routine (Hydro_computeFluxesHll_X_gpu_oacc) vector + + implicit none + + ! Arguments + integer, intent(IN) :: nblk + real, target, intent(IN) :: dt_d + integer, target, intent(IN) :: tile_lo_d(:, :) + integer, target, intent(IN) :: tile_hi_d(:, :) + real, target, intent(IN) :: tile_deltas_d(:, :) + real, target, intent(INOUT) :: CC_1_d(:, :, :, :, :) + real, target, intent(INOUT) :: hydro_op1_auxc_d(:, :, :, :) + real, target, intent(INOUT) :: hydro_op1_flX_d(:, :, :, :, :) + + ! Local variables + integer, pointer :: tile_lo_d_p(:) + integer, pointer :: tile_hi_d_p(:) + real, pointer :: tile_deltas_d_p(:) + real, pointer :: CC_1_d_p(:, :, :, :) + real, pointer :: hydro_op1_auxc_d_p(:, :, :) + real, pointer :: hydro_op1_flX_d_p(:, :, :, :) + + ! Attach pointers + tile_lo_d_p => tile_lo_d(:, nblk) + tile_hi_d_p => tile_hi_d(:, nblk) + tile_deltas_d_p => tile_deltas_d(:, nblk) + CC_1_d_p => CC_1_d(:, :, :, :, nblk) + hydro_op1_auxc_d_p => hydro_op1_auxc_d(:, :, :, nblk) + hydro_op1_flX_d_p => hydro_op1_flX_d(:, :, :, :, nblk) + + ! Call subroutine + CALL Hydro_computeFluxesHll_X_gpu_oacc( & + dt_d, & + tile_lo_d_p, & + tile_hi_d_p, & + tile_deltas_d_p, & + CC_1_d_p, & + hydro_op1_auxc_d_p, & + hydro_op1_flX_d_p & + ) + + end subroutine wrapper_Hydro_computeFluxesHll_X_gpu_oacc + + subroutine wrapper_Hydro_computeFluxesHll_Y_gpu_oacc ( & + nblk, & + dt_d, & + tile_lo_d, & + tile_hi_d, & + tile_deltas_d, & + CC_1_d, & + hydro_op1_auxc_d, & + hydro_op1_flY_d & + ) + + use dr_cg_hydroAdvance_mod, ONLY: Hydro_computeFluxesHll_Y_gpu_oacc + + !$acc routine vector + !$acc routine (Hydro_computeFluxesHll_Y_gpu_oacc) vector + + implicit none + + ! Arguments + integer, intent(IN) :: nblk + real, target, intent(IN) :: dt_d + integer, target, intent(IN) :: tile_lo_d(:, :) + integer, target, intent(IN) :: tile_hi_d(:, :) + real, target, intent(IN) :: tile_deltas_d(:, :) + real, target, intent(INOUT) :: CC_1_d(:, :, :, :, :) + real, target, intent(INOUT) :: hydro_op1_auxc_d(:, :, :, :) + real, target, intent(INOUT) :: hydro_op1_flY_d(:, :, :, :, :) + + ! Local variables + integer, pointer :: tile_lo_d_p(:) + integer, pointer :: tile_hi_d_p(:) + real, pointer :: tile_deltas_d_p(:) + real, pointer :: CC_1_d_p(:, :, :, :) + real, pointer :: hydro_op1_auxc_d_p(:, :, :) + real, pointer :: hydro_op1_flY_d_p(:, :, :, :) + + ! Attach pointers + tile_lo_d_p => tile_lo_d(:, nblk) + tile_hi_d_p => tile_hi_d(:, nblk) + tile_deltas_d_p => tile_deltas_d(:, nblk) + CC_1_d_p => CC_1_d(:, :, :, :, nblk) + hydro_op1_auxc_d_p => hydro_op1_auxc_d(:, :, :, nblk) + hydro_op1_flY_d_p => hydro_op1_flY_d(:, :, :, :, nblk) + + ! Call subroutine + CALL Hydro_computeFluxesHll_Y_gpu_oacc( & + dt_d, & + tile_lo_d_p, & + tile_hi_d_p, & + tile_deltas_d_p, & + CC_1_d_p, & + hydro_op1_auxc_d_p, & + hydro_op1_flY_d_p & + ) + + end subroutine wrapper_Hydro_computeFluxesHll_Y_gpu_oacc + + subroutine wrapper_Hydro_computeFluxesHll_Z_gpu_oacc ( & + nblk, & + dt_d, & + tile_lo_d, & + tile_hi_d, & + tile_deltas_d, & + CC_1_d, & + hydro_op1_auxc_d, & + hydro_op1_flZ_d & + ) + + use dr_cg_hydroAdvance_mod, ONLY: Hydro_computeFluxesHll_Z_gpu_oacc + + !$acc routine vector + !$acc routine (Hydro_computeFluxesHll_Z_gpu_oacc) vector + + implicit none + + ! Arguments + integer, intent(IN) :: nblk + real, target, intent(IN) :: dt_d + integer, target, intent(IN) :: tile_lo_d(:, :) + integer, target, intent(IN) :: tile_hi_d(:, :) + real, target, intent(IN) :: tile_deltas_d(:, :) + real, target, intent(INOUT) :: CC_1_d(:, :, :, :, :) + real, target, intent(INOUT) :: hydro_op1_auxc_d(:, :, :, :) + real, target, intent(INOUT) :: hydro_op1_flZ_d(:, :, :, :, :) + + ! Local variables + integer, pointer :: tile_lo_d_p(:) + integer, pointer :: tile_hi_d_p(:) + real, pointer :: tile_deltas_d_p(:) + real, pointer :: CC_1_d_p(:, :, :, :) + real, pointer :: hydro_op1_auxc_d_p(:, :, :) + real, pointer :: hydro_op1_flZ_d_p(:, :, :, :) + + ! Attach pointers + tile_lo_d_p => tile_lo_d(:, nblk) + tile_hi_d_p => tile_hi_d(:, nblk) + tile_deltas_d_p => tile_deltas_d(:, nblk) + CC_1_d_p => CC_1_d(:, :, :, :, nblk) + hydro_op1_auxc_d_p => hydro_op1_auxc_d(:, :, :, nblk) + hydro_op1_flZ_d_p => hydro_op1_flZ_d(:, :, :, :, nblk) + + ! Call subroutine + CALL Hydro_computeFluxesHll_Z_gpu_oacc( & + dt_d, & + tile_lo_d_p, & + tile_hi_d_p, & + tile_deltas_d_p, & + CC_1_d_p, & + hydro_op1_auxc_d_p, & + hydro_op1_flZ_d_p & + ) + + end subroutine wrapper_Hydro_computeFluxesHll_Z_gpu_oacc + + subroutine wrapper_Hydro_updateSolutionHll_gpu_oacc ( & + nblk, & + tile_lo_d, & + tile_hi_d, & + hydro_op1_flX_d, & + hydro_op1_flY_d, & + hydro_op1_flZ_d, & + CC_1_d & + ) + + use dr_cg_hydroAdvance_mod, ONLY: Hydro_updateSolutionHll_gpu_oacc + + !$acc routine vector + !$acc routine (Hydro_updateSolutionHll_gpu_oacc) vector + + implicit none + + ! Arguments + integer, intent(IN) :: nblk + integer, target, intent(IN) :: tile_lo_d(:, :) + integer, target, intent(IN) :: tile_hi_d(:, :) + real, target, intent(INOUT) :: hydro_op1_flX_d(:, :, :, :, :) + real, target, intent(INOUT) :: hydro_op1_flY_d(:, :, :, :, :) + real, target, intent(INOUT) :: hydro_op1_flZ_d(:, :, :, :, :) + real, target, intent(INOUT) :: CC_1_d(:, :, :, :, :) + + ! Local variables + integer, pointer :: tile_lo_d_p(:) + integer, pointer :: tile_hi_d_p(:) + real, pointer :: hydro_op1_flX_d_p(:, :, :, :) + real, pointer :: hydro_op1_flY_d_p(:, :, :, :) + real, pointer :: hydro_op1_flZ_d_p(:, :, :, :) + real, pointer :: CC_1_d_p(:, :, :, :) + + ! Attach pointers + tile_lo_d_p => tile_lo_d(:, nblk) + tile_hi_d_p => tile_hi_d(:, nblk) + hydro_op1_flX_d_p => hydro_op1_flX_d(:, :, :, :, nblk) + hydro_op1_flY_d_p => hydro_op1_flY_d(:, :, :, :, nblk) + hydro_op1_flZ_d_p => hydro_op1_flZ_d(:, :, :, :, nblk) + CC_1_d_p => CC_1_d(:, :, :, :, nblk) + + ! Call subroutine + CALL Hydro_updateSolutionHll_gpu_oacc( & + tile_lo_d_p, & + tile_hi_d_p, & + hydro_op1_flX_d_p, & + hydro_op1_flY_d_p, & + hydro_op1_flZ_d_p, & + CC_1_d_p & + ) + + end subroutine wrapper_Hydro_updateSolutionHll_gpu_oacc + end module gpu_tf_test_mod