diff --git a/.gitmodules b/.gitmodules index e69de29bb..c9a26c699 100644 --- a/.gitmodules +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "cholla-tests-data"] + path = cholla-tests-data + url = https://github.com/cholla-hydro/cholla-tests-data.git diff --git a/Makefile b/Makefile index dd1915537..fa2bc4500 100644 --- a/Makefile +++ b/Makefile @@ -10,7 +10,7 @@ CUDA_ARCH ?= sm_70 DIRS := src src/analysis src/chemistry_gpu src/cooling src/cooling_grackle src/cosmology \ src/cpu src/global src/gravity src/gravity/paris src/grid src/hydro \ - src/integrators src/io src/main.cpp src/main_tests.cpp \ + src/integrators src/io src/main.cpp src/main_tests.cpp src/mhd\ src/model src/mpi src/old_cholla src/particles src/reconstruction \ src/riemann_solvers src/system_tests src/utils src/dust @@ -134,6 +134,7 @@ ifdef HIPCONFIG DFLAGS += -DO_HIP CXXFLAGS += $(HIPCONFIG) GPUCXX ?= hipcc + #GPUFLAGS += -Wall LD := $(CXX) LDFLAGS := $(CXXFLAGS) -L$(ROCM_PATH)/lib LIBS += -lamdhip64 diff --git a/builds/make.host.c3po b/builds/make.host.c3po index 79574201c..02f658896 100644 --- a/builds/make.host.c3po +++ b/builds/make.host.c3po @@ -3,13 +3,9 @@ CC = mpicc CXX = mpicxx CFLAGS_DEBUG = -g -O0 CFLAGS_OPTIMIZE = -g -O2 -CXXFLAGS_DEBUG = -g -O0 -std=c++11 ${F_OFFLOAD} -CXXFLAGS_OPTIMIZE = -g -Ofast -std=c++11 ${F_OFFLOAD} -GPUFLAGS_DEBUG = -g -O0 -std=c++11 -ccbin=mpicxx -GPUFLAGS_OPTIMIZE = -g -O3 -std=c++11 -ccbin=mpicxx CXXFLAGS_DEBUG = -g -O0 -std=c++17 ${F_OFFLOAD} CXXFLAGS_OPTIMIZE = -g -Ofast -std=c++17 ${F_OFFLOAD} -GPUFLAGS_DEBUG = -g -G -cudart shared -O0 -std=c++17 -ccbin=mpicxx +GPUFLAGS_DEBUG = -g -G -cudart shared -O0 -std=c++17 -ccbin=mpicxx -Xcompiler -rdynamic GPUFLAGS_OPTIMIZE = -g -O3 -std=c++17 -ccbin=mpicxx OMP_NUM_THREADS = 7 diff --git a/builds/make.host.frontier b/builds/make.host.frontier index c225b3655..69f715871 100644 --- a/builds/make.host.frontier +++ b/builds/make.host.frontier @@ -11,8 +11,8 @@ CFLAGS_OPTIMIZE = -g -O2 CXXFLAGS_DEBUG = -g -O0 -std=c++17 CXXFLAGS_OPTIMIZE = -g -Ofast -std=c++17 -Wno-unused-result -GPUFLAGS_OPTIMIZE = -std=c++17 --offload-arch=gfx90a -Wno-unused-result -GPUFLAGS_DEBUG = -g -O0 -std=c++17 --offload-arch=gfx90a -Wno-unused-result +GPUFLAGS_OPTIMIZE = -std=c++17 --offload-arch=gfx90a -Wall -Wno-unused-result +GPUFLAGS_DEBUG = -g -O0 -std=c++17 --offload-arch=gfx90a -Wall -Wno-unused-result HIPCONFIG = -I$(ROCM_PATH)/include $(shell hipconfig -C) # workaround for Rocm 5.2 warnings #HIPCONFIG = $(shell hipconfig -C) diff --git a/builds/make.host.spock b/builds/make.host.spock index b839e95ae..8cac7c086 100644 --- a/builds/make.host.spock +++ b/builds/make.host.spock @@ -6,6 +6,7 @@ CXX = CC CFLAGS_DEBUG = -g -O0 CFLAGS_OPTIMIZE = -g -O2 + CXXFLAGS_DEBUG = -g -O0 -std=c++17 CXXFLAGS_OPTIMIZE = -g -Ofast -std=c++17 diff --git a/builds/make.type.mhd b/builds/make.type.mhd index 3f67ea88f..486ba2547 100644 --- a/builds/make.type.mhd +++ b/builds/make.type.mhd @@ -9,15 +9,14 @@ MPI_GPU ?= DFLAGS += -DCUDA DFLAGS += -DMPI_CHOLLA DFLAGS += -DPRECISION=2 -DFLAGS += -DPPMP +# Currently MHD only supports PCM reconstruction. Higher order reconstruction +# methods will be added later +DFLAGS += -DPCM DFLAGS += -DHLLD DFLAGS += -DMHD -ifeq ($(findstring cosmology,$(TYPE)),cosmology) -DFLAGS += -DSIMPLE -else +# MHD only supports the Van Leer integrator DFLAGS += -DVL -endif # need this if using Disk_3D # DFLAGS += -DDISK_ICS @@ -45,3 +44,15 @@ DFLAGS += $(OUTPUT) #If not specified, MPI_GPU is off by default #This is set in the system make.host file DFLAGS += $(MPI_GPU) + +# NOTE: The following macros are to help facilitate debugging and should not be +# used on scientific runs + +# Do CUDA error checking +DFLAGS += -DCUDA_ERROR_CHECK + +# Limit the number of steps to evolve. +# DFLAGS += -DN_STEPS_LIMIT=1000 + +# Output on every time step +# DFLAGS += -DOUTPUT_ALWAYS diff --git a/builds/run_tests.sh b/builds/run_tests.sh index 519c9d928..80fcab2a1 100755 --- a/builds/run_tests.sh +++ b/builds/run_tests.sh @@ -110,7 +110,7 @@ buildCholla () { echo -e "\nBuilding Cholla...\n" builtin cd $CHOLLA_ROOT - make -j TYPE=${CHOLLA_MAKE_TYPE} BUILD=${1} + make --jobs=$(nproc) TYPE=${CHOLLA_MAKE_TYPE} BUILD=${1} } # ============================================================================== @@ -121,7 +121,7 @@ buildChollaTests () { echo builtin cd $CHOLLA_ROOT - make -j TYPE=${CHOLLA_MAKE_TYPE} TEST=true + make --jobs=$(nproc) TYPE=${CHOLLA_MAKE_TYPE} TEST=true } # ============================================================================== diff --git a/cholla-tests-data b/cholla-tests-data index 66d592821..c069bb7a6 160000 --- a/cholla-tests-data +++ b/cholla-tests-data @@ -1 +1 @@ -Subproject commit 66d5928213b495c2fef61b0653b90a25ae3aa7cf +Subproject commit c069bb7a6de79546f60d3ea47f6c10ba19df3c76 diff --git a/examples/1D/sound_wave.txt b/examples/1D/sound_wave.txt index c6555c662..97b7c92b1 100644 --- a/examples/1D/sound_wave.txt +++ b/examples/1D/sound_wave.txt @@ -30,18 +30,17 @@ outdir=./ ################################################# # Parameters for linear wave problems -# initial density +# initial density rho=1.0 -# velocity in the x direction +# velocity in the x direction vx=0 # velocity in the y direction vy=0 # velocity in the z direction vz=0 -# initial pressure +# initial pressure P=0.6 # amplitude of perturbing oscillations A=1e-4 # value of gamma gamma=1.666666666666667 - diff --git a/examples/2D/sound_wave.txt b/examples/2D/sound_wave.txt index d69b1270f..58608bac2 100644 --- a/examples/2D/sound_wave.txt +++ b/examples/2D/sound_wave.txt @@ -33,18 +33,17 @@ outdir=./ ################################################# # Parameters for linear wave problems -# initial density +# initial density rho=1.0 -# velocity in the x direction +# velocity in the x direction vx=0 # velocity in the y direction vy=0 # velocity in the z direction vz=0 -# initial pressure +# initial pressure P=0.6 # amplitude of perturbing oscillations A=1e-4 # value of gamma gamma=1.666666666666667 - diff --git a/examples/3D/Brio_and_Wu.txt b/examples/3D/Brio_and_Wu.txt index a742ae207..fba126396 100644 --- a/examples/3D/Brio_and_Wu.txt +++ b/examples/3D/Brio_and_Wu.txt @@ -6,11 +6,11 @@ ################################################ # number of grid cells in the x dimension -nx=32 +nx=256 # number of grid cells in the y dimension -ny=32 +ny=256 # number of grid cells in the z dimension -nz=32 +nz=256 # final output time tout=0.1 # time interval for output diff --git a/examples/3D/Ryu_and_Jones_1a.txt b/examples/3D/Ryu_and_Jones_1a.txt new file mode 100644 index 000000000..168fcdffa --- /dev/null +++ b/examples/3D/Ryu_and_Jones_1a.txt @@ -0,0 +1,74 @@ +# +# Parameter File for 3D Ryu & Jones MHD shock tube 4d. +# Citation: Ryu & Jones 1995 "Numerical Magnetohydrodynamics in Astrophysics: +# Algorithms and Tests for One-Dimensional Flow" +# +# Note: There are many shock tubes in this paper. This settings file is +# specifically for shock tube 4d +# + +################################################ +# number of grid cells in the x dimension +nx=32 +# number of grid cells in the y dimension +ny=32 +# number of grid cells in the z dimension +nz=32 +# final output time +tout=0.08 +# time interval for output +outstep=0.08 +# name of initial conditions +init=Riemann + +# domain properties +xmin=0.0 +ymin=0.0 +zmin=0.0 +xlen=1.0 +ylen=1.0 +zlen=1.0 + +# type of boundary conditions +xl_bcnd=3 +xu_bcnd=3 +yl_bcnd=3 +yu_bcnd=3 +zl_bcnd=3 +zu_bcnd=3 + +# path to output directory +outdir=./ + +################################################# +# Parameters for 1D Riemann problems +# density of left state +rho_l=1.0 +# velocity of left state +vx_l=10.0 +vy_l=0.0 +vz_l=0.0 +# pressure of left state +P_l=20.0 +# Magnetic field of the left state +Bx_l=1.4104739588693909 +By_l=1.4104739588693909 +Bz_l=0.0 + +# density of right state +rho_r=1.0 +# velocity of right state +vx_r=-10.0 +vy_r=0.0 +vz_r=0.0 +# pressure of right state +P_r=1.0 +# Magnetic field of the right state +Bx_r=1.4104739588693909 +By_r=1.4104739588693909 +Bz_r=0.0 + +# location of initial discontinuity +diaph=0.5 +# value of gamma +gamma=1.6666666666666667 diff --git a/examples/3D/Ryu_and_Jones_2a.txt b/examples/3D/Ryu_and_Jones_2a.txt new file mode 100644 index 000000000..f886b4de3 --- /dev/null +++ b/examples/3D/Ryu_and_Jones_2a.txt @@ -0,0 +1,74 @@ +# +# Parameter File for 3D Ryu & Jones MHD shock tube 4d. +# Citation: Ryu & Jones 1995 "Numerical Magnetohydrodynamics in Astrophysics: +# Algorithms and Tests for One-Dimensional Flow" +# +# Note: There are many shock tubes in this paper. This settings file is +# specifically for shock tube 4d +# + +################################################ +# number of grid cells in the x dimension +nx=32 +# number of grid cells in the y dimension +ny=32 +# number of grid cells in the z dimension +nz=32 +# final output time +tout=0.2 +# time interval for output +outstep=0.2 +# name of initial conditions +init=Riemann + +# domain properties +xmin=0.0 +ymin=0.0 +zmin=0.0 +xlen=1.0 +ylen=1.0 +zlen=1.0 + +# type of boundary conditions +xl_bcnd=3 +xu_bcnd=3 +yl_bcnd=3 +yu_bcnd=3 +zl_bcnd=3 +zu_bcnd=3 + +# path to output directory +outdir=./ + +################################################# +# Parameters for 1D Riemann problems +# density of left state +rho_l=1.08 +# velocity of left state +vx_l=1.2 +vy_l=0.01 +vz_l=0.5 +# pressure of left state +P_l=0.95 +# Magnetic field of the left state +Bx_l=7.0898154036220635 +By_l=1.0155412503859613 +Bz_l=0.5641895835477563 + +# density of right state +rho_r=1.0 +# velocity of right state +vx_r=0.0 +vy_r=0.0 +vz_r=0.0 +# pressure of right state +P_r=1.0 +# Magnetic field of the right state +Bx_r=7.0898154036220635 +By_r=1.1283791670955126 +Bz_r=0.5641895835477563 + +# location of initial discontinuity +diaph=0.5 +# value of gamma +gamma=1.6666666666666667 diff --git a/examples/3D/alfven_wave.txt b/examples/3D/alfven_wave.txt new file mode 100644 index 000000000..bfacbc968 --- /dev/null +++ b/examples/3D/alfven_wave.txt @@ -0,0 +1,71 @@ +# +# Parameter File for MHD Alfven Wave +# See [this blog post](https://robertcaddy.com/posts/Classes-and-bugfixing-6/) +# for details on each wave +# The right eigenvector for this wave is: +# (1/3) * [0, 0, +/-1, -/+2*sqrt(2), 0, -1, 2*sqrt(2), 0] +# The terms with two sign options: use the left one for right moving waves and +# the right one for left moving waves +# + +################################################ +# number of grid cells in the x dimension +nx=256 +# number of grid cells in the y dimension +ny=256 +# number of grid cells in the z dimension +nz=256 +# final output time +tout=1.0 +# time interval for output +outstep=1.0 +# name of initial conditions +init=Linear_Wave +# domain properties +xmin=0.0 +ymin=0.0 +zmin=0.0 +xlen=1.0 +ylen=1.0 +zlen=1.0 +# type of boundary conditions +xl_bcnd=1 +xu_bcnd=1 +yl_bcnd=1 +yu_bcnd=1 +zl_bcnd=1 +zu_bcnd=1 +# path to output directory +outdir=./ + +################################################# +# Parameters for linear wave problems +# initial density +rho=1.0 +# velocity in the x direction +vx=0 +# velocity in the y direction +vy=0 +# velocity in the z direction +vz=0 +# initial pressure +P=0.6 +# magnetic field in the x direction +Bx=1 +# magnetic field in the y direction +By=1.5 +# magnetic field in the z direction +Bz=0 +# amplitude of perturbing oscillations +A=1e-6 +# value of gamma +gamma=1.666666666666667 +# The right eigenvectors to set the wave properly +rEigenVec_rho=0 +rEigenVec_MomentumX=0 +rEigenVec_MomentumY=0 +rEigenVec_MomentumZ=-1 +rEigenVec_Bx=0 +rEigenVec_By=0 +rEigenVec_Bz=1 +rEigenVec_E=0 diff --git a/examples/3D/constant.txt b/examples/3D/constant.txt index ca3b411e2..871fbb7b7 100644 --- a/examples/3D/constant.txt +++ b/examples/3D/constant.txt @@ -42,9 +42,9 @@ vz=0 # pressure P=1.380658e-5 # Magnetic Field -Bx=0.0 -By=0.0 -Bz=0.0 +Bx=1.0e-5 +By=2.0e-5 +Bz=3.0e-5 # value of gamma gamma=1.666666667 diff --git a/examples/3D/fast_magnetosonic.txt b/examples/3D/fast_magnetosonic.txt new file mode 100644 index 000000000..bc134a79a --- /dev/null +++ b/examples/3D/fast_magnetosonic.txt @@ -0,0 +1,71 @@ +# +# Parameter File for MHD fast magnetosonic wave +# See [this blog post](https://robertcaddy.com/posts/Classes-and-bugfixing-6/) +# for details on each wave. +# The right eigenvector for this wave is: +# (1/(6*sqrt(5))) * [6, +/-12, -/+4*sqrt(2), -/+2, 0, 8*sqrt(2), 4, 27] +# The terms with two sign options: use the left one for right moving waves and +# the right one for left moving waves +# + +################################################ +# number of grid cells in the x dimension +nx=256 +# number of grid cells in the y dimension +ny=256 +# number of grid cells in the z dimension +nz=256 +# final output time +tout=0.5 +# time interval for output +outstep=0.5 +# name of initial conditions +init=Linear_Wave +# domain properties +xmin=0.0 +ymin=0.0 +zmin=0.0 +xlen=1.0 +ylen=1.0 +zlen=1.0 +# type of boundary conditions +xl_bcnd=1 +xu_bcnd=1 +yl_bcnd=1 +yu_bcnd=1 +zl_bcnd=1 +zu_bcnd=1 +# path to output directory +outdir=./ + +################################################# +# Parameters for linear wave problems +# initial density +rho=1.0 +# velocity in the x direction +vx=0 +# velocity in the y direction +vy=0 +# velocity in the z direction +vz=0 +# initial pressure +P=0.6 +# magnetic field in the x direction +Bx=1 +# magnetic field in the y direction +By=1.5 +# magnetic field in the z direction +Bz=0 +# amplitude of perturbing oscillations +A=1e-6 +# value of gamma +gamma=1.666666666666667 +# The right eigenvectors to set the wave properly +rEigenVec_rho=0.4472135954999579 +rEigenVec_MomentumX=0.8944271909999159 +rEigenVec_MomentumY=-0.4472135954999579 +rEigenVec_MomentumZ=0.0 +rEigenVec_Bx=0.0 +rEigenVec_By=0.8944271909999159 +rEigenVec_Bz=0.0 +rEigenVec_E=2.0124611797498106 diff --git a/examples/3D/mhd_contact_wave.txt b/examples/3D/mhd_contact_wave.txt new file mode 100644 index 000000000..9250bba5a --- /dev/null +++ b/examples/3D/mhd_contact_wave.txt @@ -0,0 +1,71 @@ +# +# Parameter File for MHD contact wave +# See [this blog post](https://robertcaddy.com/posts/Classes-and-bugfixing-6/) +# for details on each wave +# The right eigenvector for this wave is: +# (1/2) * [2, +/-2, 0, 0, 0, 0, 0, 1] +# The terms with two sign options: use the left one for right moving waves and +# the right one for left moving waves +# + +################################################ +# number of grid cells in the x dimension +nx=256 +# number of grid cells in the y dimension +ny=256 +# number of grid cells in the z dimension +nz=256 +# final output time +tout=1.0 +# time interval for output +outstep=1.0 +# name of initial conditions +init=Linear_Wave +# domain properties +xmin=0.0 +ymin=0.0 +zmin=0.0 +xlen=1.0 +ylen=1.0 +zlen=1.0 +# type of boundary conditions +xl_bcnd=1 +xu_bcnd=1 +yl_bcnd=1 +yu_bcnd=1 +zl_bcnd=1 +zu_bcnd=1 +# path to output directory +outdir=./ + +################################################# +# Parameters for linear wave problems +# initial density +rho=1.0 +# velocity in the x direction +vx=1 +# velocity in the y direction +vy=0 +# velocity in the z direction +vz=0 +# initial pressure +P=0.6 +# magnetic field in the x direction +Bx=1 +# magnetic field in the y direction +By=1.5 +# magnetic field in the z direction +Bz=0 +# amplitude of perturbing oscillations +A=1e-1 +# value of gamma +gamma=1.666666666666667 +# The right eigenvectors to set the wave properly +rEigenVec_rho=1.0 +rEigenVec_MomentumX=1.0 +rEigenVec_MomentumY=0.0 +rEigenVec_MomentumZ=0.0 +rEigenVec_Bx=0.0 +rEigenVec_By=0.0 +rEigenVec_Bz=0.0 +rEigenVec_E=0.5 diff --git a/examples/3D/slow_magnetosonic.txt b/examples/3D/slow_magnetosonic.txt new file mode 100644 index 000000000..960952b5f --- /dev/null +++ b/examples/3D/slow_magnetosonic.txt @@ -0,0 +1,72 @@ +# +# Parameter File for MHD slow magnetosonic wave +# See [this blog post](https://robertcaddy.com/posts/Classes-and-bugfixing-6/) +# for details on each wave +# The right eigenvector for this wave is: +# (1/(6*sqrt(5))) * [12, +/-6, +/-8*sqrt(2), +/-4, 0, -4*sqrt(2), -2, 9] +# The terms with two sign options: use the left one for right moving waves and +# the right one for left moving waves +# + +################################################ +# number of grid cells in the x dimension +nx=256 +# number of grid cells in the y dimension +ny=256 +# number of grid cells in the z dimension +nz=256 +# final output time +tout=2.0 +# time interval for output +outstep=2.0 +# name of initial conditions +init=Linear_Wave +# domain properties +xmin=0.0 +ymin=0.0 +zmin=0.0 +xlen=1.0 +ylen=1.0 +zlen=1.0 +# type of boundary conditions +xl_bcnd=1 +xu_bcnd=1 +yl_bcnd=1 +yu_bcnd=1 +zl_bcnd=1 +zu_bcnd=1 +# path to output directory +outdir=./ + +################################################# +# Parameters for linear wave problems +# initial density +rho=1.0 +# velocity in the x direction +vx=0 +# velocity in the y direction +vy=0 +# velocity in the z direction +vz=0 +# initial pressure +P=0.6 +# magnetic field in the x direction +Bx=1 +# magnetic field in the y direction +By=1.5 +# magnetic field in the z direction +Bz=0 +# amplitude of perturbing oscillations +A=1e-6 +# value of gamma +gamma=1.666666666666667 +# The right eigenvectors to set the wave properly +rEigenVec_rho=0.8944271909999159 +rEigenVec_MomentumX=0.4472135954999579 +rEigenVec_MomentumY=0.8944271909999159 +rEigenVec_MomentumZ=0.0 +rEigenVec_Bx=0.0 +rEigenVec_By=-0.4472135954999579 +rEigenVec_Bz=0.0 +rEigenVec_E=0.6708203932499369 + diff --git a/examples/3D/sound_wave.txt b/examples/3D/sound_wave.txt index 0f3866226..6c226c0ab 100644 --- a/examples/3D/sound_wave.txt +++ b/examples/3D/sound_wave.txt @@ -34,18 +34,17 @@ outdir=./ ################################################# # Parameters for linear wave problems -# initial density +# initial density rho=1.0 -# velocity in the x direction +# velocity in the x direction vx=0 # velocity in the y direction vy=0 # velocity in the z direction vz=0 -# initial pressure +# initial pressure P=0.6 # amplitude of perturbing oscillations A=1e-4 # value of gamma gamma=1.666666666666667 - diff --git a/python_scripts/cat_dset_3D.py b/python_scripts/cat_dset_3D.py index 0c6d4b3ad..1a823e268 100755 --- a/python_scripts/cat_dset_3D.py +++ b/python_scripts/cat_dset_3D.py @@ -11,11 +11,10 @@ iend = 1*n_proc dnamein = './hdf5/raw/' dnameout = './hdf5/' -DE = 0 # loop over outputs for n in range(ns, ne+1): - + # loop over files for a given output for i in range(istart, iend): @@ -26,7 +25,7 @@ # read in the header data from the input file head = filein.attrs - # if it's the first input file, write the header attributes + # if it's the first input file, write the header attributes # and create the datasets in the output file if (i == 0): nx = head['dims'][0] @@ -47,8 +46,17 @@ my = fileout.create_dataset("momentum_y", (nx, ny, nz), chunks=True) mz = fileout.create_dataset("momentum_z", (nx, ny, nz), chunks=True) E = fileout.create_dataset("Energy", (nx, ny, nz), chunks=True) - if (DE): + try: GE = fileout.create_dataset("GasEnergy", (nx, ny, nz), chunks=True) + except KeyError: + print('No Dual energy data present'); + try: + [nx_mag, ny_mag, nz_mag] = head['magnetic_field_dims'] + bx = fileout.create_dataset("magnetic_x", (nx_mag, ny_mag, nz_mag), chunks=True) + by = fileout.create_dataset("magnetic_y", (nx_mag, ny_mag, nz_mag), chunks=True) + bz = fileout.create_dataset("magnetic_z", (nx_mag, ny_mag, nz_mag), chunks=True) + except KeyError: + print('No magnetic field data present'); # write data from individual processor file to # correct location in concatenated file @@ -63,9 +71,18 @@ fileout['momentum_y'][xs:xs+nxl,ys:ys+nyl,zs:zs+nzl] = filein['momentum_y'] fileout['momentum_z'][xs:xs+nxl,ys:ys+nyl,zs:zs+nzl] = filein['momentum_z'] fileout['Energy'][xs:xs+nxl,ys:ys+nyl,zs:zs+nzl] = filein['Energy'] - if (DE): + try: fileout['GasEnergy'][xs:xs+nxl,ys:ys+nyl,zs:zs+nzl] = filein['GasEnergy'] - + except KeyError: + print('No Dual energy data present'); + try: + [nxl_mag, nyl_mag, nzl_mag] = head['magnetic_field_dims_local'] + fileout['magnetic_x'][xs:xs+nxl_mag,ys:ys+nyl_mag,zs:zs+nzl_mag] = filein['magnetic_x'] + fileout['magnetic_y'][xs:xs+nxl_mag,ys:ys+nyl_mag,zs:zs+nzl_mag] = filein['magnetic_y'] + fileout['magnetic_z'][xs:xs+nxl_mag,ys:ys+nyl_mag,zs:zs+nzl_mag] = filein['magnetic_z'] + except KeyError: + print('No magnetic field data present'); + filein.close() fileout.close() diff --git a/src/dust/dust_cuda.cu b/src/dust/dust_cuda.cu index 524b58cd0..50356c3c5 100644 --- a/src/dust/dust_cuda.cu +++ b/src/dust/dust_cuda.cu @@ -111,7 +111,7 @@ __global__ void Dust_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_g } // McKinnon et al. (2017) -__device__ Real calc_tau_sp(Real n, Real T) { +__device__ __host__ Real calc_tau_sp(Real n, Real T) { Real YR_IN_S = 3.154e7; Real a1 = 1; // dust grain size in units of 0.1 micrometers Real d0 = n / (6e-4); // gas density in units of 10^-27 g/cm^3 @@ -125,7 +125,7 @@ __device__ Real calc_tau_sp(Real n, Real T) { } // McKinnon et al. (2017) -__device__ Real calc_dd_dt(Real d_dust, Real tau_sp) { +__device__ __host__ Real calc_dd_dt(Real d_dust, Real tau_sp) { return -d_dust / (tau_sp/3); } diff --git a/src/global/global.cpp b/src/global/global.cpp index 1f6a5cbfa..ab384144b 100644 --- a/src/global/global.cpp +++ b/src/global/global.cpp @@ -329,6 +329,26 @@ void parse_param(char *name,char *value, struct parameters *parms){ parms->Bz_r = atof(value); else if (strcmp(name, "diaph")==0) parms->diaph = atof(value); + else if (strcmp(name, "rEigenVec_rho")==0) + parms->rEigenVec_rho = atof(value); + else if (strcmp(name, "rEigenVec_MomentumX")==0) + parms->rEigenVec_MomentumX = atof(value); + else if (strcmp(name, "rEigenVec_MomentumY")==0) + parms->rEigenVec_MomentumY = atof(value); + else if (strcmp(name, "rEigenVec_MomentumZ")==0) + parms->rEigenVec_MomentumZ = atof(value); + else if (strcmp(name, "rEigenVec_E")==0) + parms->rEigenVec_E = atof(value); + else if (strcmp(name, "rEigenVec_Bx")==0) + parms->rEigenVec_Bx = atof(value); + else if (strcmp(name, "rEigenVec_By")==0) + parms->rEigenVec_By = atof(value); + else if (strcmp(name, "rEigenVec_Bz")==0) + parms->rEigenVec_Bz = atof(value); + else if (strcmp(name, "pitch")==0) + parms->pitch = atof(value); + else if (strcmp(name, "yaw")==0) + parms->yaw = atof(value); #ifdef PARTICLES else if (strcmp(name, "prng_seed")==0) parms->prng_seed = atoi(value); diff --git a/src/global/global.h b/src/global/global.h index 79d3dbc88..50f0acf03 100644 --- a/src/global/global.h +++ b/src/global/global.h @@ -42,7 +42,7 @@ typedef double Real; #define MSUN_CGS 1.98847e33; //Msun in gr #define KPC_CGS 3.086e21; //kpc in cm #define KM_CGS 1e5; //km in cm -#define MH 1.67262171e-24 //Mass of hydrogen [g] +#define MH 1.67262171e-24 //Mass of hydrogen [g] #define TIME_UNIT 3.15569e10 // 1 kyr in s #define LENGTH_UNIT 3.08567758e21 // 1 kpc in cm @@ -235,9 +235,9 @@ struct parameters Real vz; Real P; Real A; - Real Bx; - Real By; - Real Bz; + Real Bx=0; + Real By=0; + Real Bz=0; Real rho_l; Real vx_l; Real vy_l=0; @@ -255,6 +255,16 @@ struct parameters Real By_r; Real Bz_r; Real diaph; + Real rEigenVec_rho = 0; + Real rEigenVec_MomentumX = 0; + Real rEigenVec_MomentumY = 0; + Real rEigenVec_MomentumZ = 0; + Real rEigenVec_E = 0; + Real rEigenVec_Bx = 0; + Real rEigenVec_By = 0; + Real rEigenVec_Bz = 0; + Real pitch = 0; + Real yaw = 0; #ifdef PARTICLES // The random seed for particle simulations. With the default of 0 then a // machine dependent seed will be generated. @@ -294,7 +304,7 @@ struct parameters int bc_potential_type; #if defined(COOLING_GRACKLE) || defined (CHEMISTRY_GPU) char UVB_rates_file[MAXLEN]; //File for the UVB photoheating and photoionization rates of HI, HeI and HeII -#endif +#endif #ifdef ANALYSIS char analysis_scale_outputs_file[MAXLEN]; //File for the scale_factor output values for cosmological simulations {{}} char analysisdir[MAXLEN]; diff --git a/src/global/global_cuda.cu b/src/global/global_cuda.cu index bd2e235c1..2153b1615 100644 --- a/src/global/global_cuda.cu +++ b/src/global/global_cuda.cu @@ -9,16 +9,12 @@ bool memory_allocated; Real *dev_conserved, *dev_conserved_half; Real *Q_Lx, *Q_Rx, *Q_Ly, *Q_Ry, *Q_Lz, *Q_Rz, *F_x, *F_y, *F_z; +Real *ctElectricFields; Real *eta_x, *eta_y, *eta_z, *etah_x, *etah_y, *etah_z; -Real *dev_dti; //Arrays for potential in GPU: Will be set to NULL if not using GRAVITY Real *dev_grav_potential; Real *temp_potential; Real *buffer_potential; -// Arrays for calc_dt -Real *host_dti_array; -Real *dev_dti_array; - #endif //CUDA diff --git a/src/global/global_cuda.h b/src/global/global_cuda.h index 9150647c0..61cbc0752 100644 --- a/src/global/global_cuda.h +++ b/src/global/global_cuda.h @@ -25,14 +25,12 @@ extern bool memory_allocated; // Flag becomes true after allocating the memory o // conserved variables extern Real *dev_conserved, *dev_conserved_half; // input states and associated interface fluxes (Q* and F* from Stone, 2008) +// Note that for hydro the size of these arrays is n_fields*n_cells*sizeof(Real) +// while for MHD it is (n_fields-1)*n_cells*sizeof(Real), i.e. they has one +// fewer field than you would expect extern Real *Q_Lx, *Q_Rx, *Q_Ly, *Q_Ry, *Q_Lz, *Q_Rz, *F_x, *F_y, *F_z; - -// Scalar for storing device side hydro/MHD time steps -extern Real *dev_dti; - -// array of inverse timesteps for dt calculation (brought back by Alwin May 24 2022) -extern Real *host_dti_array; -extern Real *dev_dti_array; +// Constrained transport electric fields +extern Real *ctElectricFields; //Arrays for potential in GPU: Will be set to NULL if not using GRAVITY extern Real *dev_grav_potential; diff --git a/src/grid/boundary_conditions.cpp b/src/grid/boundary_conditions.cpp index aab040ede..afe0e0a42 100644 --- a/src/grid/boundary_conditions.cpp +++ b/src/grid/boundary_conditions.cpp @@ -38,14 +38,14 @@ void Grid3D::Set_Boundary_Conditions_Grid( parameters P){ #ifdef GRAVITY #ifdef CPU_TIME Timer.Pot_Boundaries.Start(); - #endif + #endif //CPU_TIME Grav.TRANSFER_POTENTIAL_BOUNDARIES = true; Set_Boundary_Conditions(P); Grav.TRANSFER_POTENTIAL_BOUNDARIES = false; #ifdef CPU_TIME Timer.Pot_Boundaries.End(); - #endif - #endif + #endif //CPU_TIME + #endif //GRAVITY } /*! \fn void Set_Boundary_Conditions(parameters P) diff --git a/src/grid/cuda_boundaries.cu b/src/grid/cuda_boundaries.cu index f69ac3c59..8b689601e 100644 --- a/src/grid/cuda_boundaries.cu +++ b/src/grid/cuda_boundaries.cu @@ -3,9 +3,9 @@ #include "../global/global_cuda.h" #include "cuda_boundaries.h" -__device__ int FindIndex(int ig, int nx, int flag, int face, int n_ghost, Real *a, int &idMag); +__device__ int FindIndex(int ig, int nx, int flag, int face, int n_ghost, Real *a); -__device__ int SetBoundaryMapping(int ig, int jg, int kg, Real *a, int flags[],int nx, int ny, int nz, int n_ghost, int &magneticIdx); +__device__ int SetBoundaryMapping(int ig, int jg, int kg, Real *a, int flags[],int nx, int ny, int nz, int n_ghost); __global__ void PackBuffers3DKernel(Real * buffer, Real * c_head, int isize, int jsize, int ksize, int nx, int ny, int idxoffset, int buffer_ncells, int n_fields, int n_cells) { @@ -66,7 +66,7 @@ __global__ void SetGhostCellsKernel(Real * c_head, int f0, int f1, int f2, int f3, int f4, int f5, int isize, int jsize, int ksize, int imin, int jmin, int kmin, int dir){ - int id,i,j,k,gidx,idx,ii, magneticIdx; + int id,i,j,k,gidx,idx,ii; Real a[3] = {1.,1.,1.}; int flags[6] = {f0,f1,f2,f3,f4,f5}; @@ -93,17 +93,11 @@ __global__ void SetGhostCellsKernel(Real * c_head, gidx = i + j*nx + k*nx*ny; // calculate idx (index of real cell) and a[:] for reflection - idx = SetBoundaryMapping(i,j,k,&a[0],flags,nx,ny,nz,n_ghost,magneticIdx); + idx = SetBoundaryMapping(i,j,k,&a[0],flags,nx,ny,nz,n_ghost); if (idx>=0){ for (ii=0; ii1) { // set index on -x face if (ig < n_ghost) { - ir = FindIndex(ig, nx, flags[0], 0, n_ghost, &a[0], irMag); + ir = FindIndex(ig, nx, flags[0], 0, n_ghost, &a[0]); } // set index on +x face else if (ig >= nx-n_ghost) { - ir = FindIndex(ig, nx, flags[1], 1, n_ghost, &a[0], irMag); + ir = FindIndex(ig, nx, flags[1], 1, n_ghost, &a[0]); } // set i index for multi-D problems else { ir = ig; - #ifdef MHD - irMag = ig; - #endif //MHD } // if custom x boundaries are needed, set index to -1 and return if (ir < 0) { - #ifdef MHD - magneticIdx = -1; - #endif //MHD return idx = -1; } // otherwise add i index to ghost cell mapping idx += ir; - #ifdef MHD - magneticIdx += irMag; - #endif //MHD } @@ -201,33 +185,24 @@ __device__ int SetBoundaryMapping(int ig, int jg, int kg, Real *a, int flags[], // set index on -y face if (jg < n_ghost) { - jr = FindIndex(jg, ny, flags[2], 0, n_ghost, &a[1], jrMag); + jr = FindIndex(jg, ny, flags[2], 0, n_ghost, &a[1]); } // set index on +y face else if (jg >= ny-n_ghost) { - jr = FindIndex(jg, ny, flags[3], 1, n_ghost, &a[1], jrMag); + jr = FindIndex(jg, ny, flags[3], 1, n_ghost, &a[1]); } // set j index for multi-D problems else { jr = jg; - #ifdef MHD - jrMag = jg; - #endif //MHD } // if custom y boundaries are needed, set index to -1 and return if (jr < 0) { - #ifdef MHD - magneticIdx = -1; - #endif //MHD return idx = -1; } // otherwise add j index to ghost cell mapping idx += nx*jr; - #ifdef MHD - magneticIdx += nx*jrMag; - #endif //MHD } @@ -236,38 +211,29 @@ __device__ int SetBoundaryMapping(int ig, int jg, int kg, Real *a, int flags[], // set index on -z face if (kg < n_ghost) { - kr = FindIndex(kg, nz, flags[4], 0, n_ghost, &a[2], krMag); + kr = FindIndex(kg, nz, flags[4], 0, n_ghost, &a[2]); } // set index on +z face else if (kg >= nz-n_ghost) { - kr = FindIndex(kg, nz, flags[5], 1, n_ghost, &a[2], krMag); + kr = FindIndex(kg, nz, flags[5], 1, n_ghost, &a[2]); } // set k index for multi-D problems else { kr = kg; - #ifdef MHD - krMag = kg; - #endif //MHD } // if custom z boundaries are needed, set index to -1 and return if (kr < 0) { - #ifdef MHD - magneticIdx = -1; - #endif //MHD return idx = -1; } // otherwise add k index to ghost cell mapping idx += nx*ny*kr; - #ifdef MHD - magneticIdx += nx*ny*krMag; - #endif //MHD } return idx; } -__device__ int FindIndex(int ig, int nx, int flag, int face, int n_ghost, Real *a, int &idMag){ +__device__ int FindIndex(int ig, int nx, int flag, int face, int n_ghost, Real *a){ int id; // lower face @@ -278,45 +244,27 @@ __device__ int FindIndex(int ig, int nx, int flag, int face, int n_ghost, Real * // periodic case 1: id = ig+nx-2*n_ghost; - #ifdef MHD - idMag = id; - #endif //MHD break; // reflective case 2: id = 2*n_ghost-ig-1; *(a) = -1.0; - #ifdef MHD - idMag = id - 1; - #endif //MHD break; // transmissive case 3: id = n_ghost; - #ifdef MHD - idMag = id - 1; - #endif //MHD break; // custom case 4: id = -1; - #ifdef MHD - idMag = -1; - #endif //MHD break; // MPI case 5: id = ig; - #ifdef MHD - idMag = id; - #endif //MHD break; // default is periodic default: id = ig+nx-2*n_ghost; - #ifdef MHD - idMag = id; - #endif //MHD } } // upper face @@ -349,9 +297,6 @@ __device__ int FindIndex(int ig, int nx, int flag, int face, int n_ghost, Real * default: id = ig-nx+2*n_ghost; } - #ifdef MHD - idMag = id; - #endif //MHD } return id; } diff --git a/src/grid/grid3D.cpp b/src/grid/grid3D.cpp index e100def1c..8025f3744 100644 --- a/src/grid/grid3D.cpp +++ b/src/grid/grid3D.cpp @@ -43,7 +43,6 @@ #include "../dust/dust_cuda.h" // provides Dust_Update #endif - /*! \fn Grid3D(void) * \brief Constructor for the Grid. */ Grid3D::Grid3D(void) @@ -72,6 +71,15 @@ Grid3D::Grid3D(void) H.n_ghost_potential_offset = H.n_ghost - N_GHOST_POTENTIAL; #endif + #ifdef MHD + // Set the number of ghost cells high enough for MHD + if (H.n_ghost < 3) + { + chprintf("Insufficient number of ghost cells for MHD. H.n_ghost was %i, setting to 3.\n", H.n_ghost); + H.n_ghost = 3; + } + #endif //MHD + } /*! \fn void Get_Position(long i, long j, long k, Real *xpos, Real *ypos, Real *zpos) @@ -139,7 +147,7 @@ void Grid3D::Initialize(struct parameters *P) // Set the CFL coefficient (a global variable) C_cfl = 0.3; - + #ifdef AVERAGE_SLOW_CELLS H.min_dt_slow = 1e-100; //Initialize the minumum dt to a tiny number #endif // AVERAGE_SLOW_CELLS @@ -286,9 +294,9 @@ void Grid3D::AllocateMemory(void) #endif #endif //SCALAR #ifdef MHD - C.magnetic_x = &(C.host[(5 + NSCALARS)*H.n_cells]); - C.magnetic_y = &(C.host[(6 + NSCALARS)*H.n_cells]); - C.magnetic_z = &(C.host[(7 + NSCALARS)*H.n_cells]); + C.magnetic_x = &(C.host[(grid_enum::magnetic_x)*H.n_cells]); + C.magnetic_y = &(C.host[(grid_enum::magnetic_y)*H.n_cells]); + C.magnetic_z = &(C.host[(grid_enum::magnetic_z)*H.n_cells]); #endif //MHD #ifdef DE C.GasEnergy = &(C.host[(H.n_fields-1)*H.n_cells]); @@ -296,6 +304,7 @@ void Grid3D::AllocateMemory(void) // allocate memory for the conserved variable arrays on the device CudaSafeCall( cudaMalloc((void**)&C.device, H.n_fields*H.n_cells*sizeof(Real)) ); + cuda_utilities::initGpuMemory(C.device, H.n_fields*H.n_cells*sizeof(Real)); C.d_density = C.device; C.d_momentum_x = &(C.device[H.n_cells]); C.d_momentum_y = &(C.device[2*H.n_cells]); @@ -308,22 +317,14 @@ void Grid3D::AllocateMemory(void) #endif #endif // SCALAR #ifdef MHD - C.d_magnetic_x = &(C.device[(5 + NSCALARS)*H.n_cells]); - C.d_magnetic_y = &(C.device[(6 + NSCALARS)*H.n_cells]); - C.d_magnetic_z = &(C.device[(7 + NSCALARS)*H.n_cells]); + C.d_magnetic_x = &(C.device[(grid_enum::magnetic_x)*H.n_cells]); + C.d_magnetic_y = &(C.device[(grid_enum::magnetic_y)*H.n_cells]); + C.d_magnetic_z = &(C.device[(grid_enum::magnetic_z)*H.n_cells]); #endif //MHD #ifdef DE C.d_GasEnergy = &(C.device[(H.n_fields-1)*H.n_cells]); #endif // DE - - // arrays that hold the max_dti calculation for hydro for each thread block (pre reduction) - int ngrid = (H.n_cells + TPB - 1) / TPB; - CudaSafeCall( cudaHostAlloc(&host_dti_array, ngrid*sizeof(Real), cudaHostAllocDefault) ); - CudaSafeCall( cudaMalloc((void**)&dev_dti_array, ngrid*sizeof(Real)) ); - CudaSafeCall( cudaMalloc((void**)&dev_dti, sizeof(Real)) ); - - #if defined( GRAVITY ) CudaSafeCall( cudaHostAlloc(&C.Grav_potential, H.n_cells*sizeof(Real), cudaHostAllocDefault) ); CudaSafeCall( cudaMalloc((void**)&C.d_Grav_potential, H.n_cells*sizeof(Real)) ); @@ -451,7 +452,7 @@ Real Grid3D::Update_Grid(void) #ifdef VL VL_Algorithm_2D_CUDA(C.device, H.nx, H.ny, x_off, y_off, H.n_ghost, H.dx, H.dy, H.xbound, H.ybound, H.dt, H.n_fields); #endif //VL - #ifdef SIMPLE + #ifdef SIMPLE Simple_Algorithm_2D_CUDA(C.device, H.nx, H.ny, x_off, y_off, H.n_ghost, H.dx, H.dy, H.xbound, H.ybound, H.dt, H.n_fields); #endif //SIMPLE #endif //CUDA @@ -493,7 +494,7 @@ Real Grid3D::Update_Grid(void) Timer.Chemistry.RecordTime( Chem.H.runtime_chemistry_step ); #endif #endif - + #ifdef AVERAGE_SLOW_CELLS //Set the min_delta_t for averaging a slow cell Real max_dti_slow; @@ -540,7 +541,7 @@ Real Grid3D::Update_Hydro_Grid( ){ #ifdef ONLY_PARTICLES // Don't integrate the Hydro when only solving for particles return 1e-10; - #endif + #endif //ONLY_PARTICLES Real dti; @@ -551,26 +552,26 @@ Real Grid3D::Update_Hydro_Grid( ){ #ifdef GRAVITY // Extrapolate gravitational potential for hydro step Extrapolate_Grav_Potential(); - #endif + #endif //GRAVITY dti = Update_Grid(); #ifdef CPU_TIME #ifdef CHEMISTRY_GPU Timer.Hydro.Subtract(Chem.H.runtime_chemistry_step); - //Subtract the time spent on the Chemical Update - #endif + //Subtract the time spent on the Chemical Update + #endif //CHEMISTRY_GPU Timer.Hydro.End(); #endif //CPU_TIME #ifdef COOLING_GRACKLE #ifdef CPU_TIME Timer.Cooling.Start(); - #endif + #endif //CPU_TIME Do_Cooling_Step_Grackle( ); #ifdef CPU_TIME Timer.Cooling.End(); - #endif + #endif //CPU_TIME #endif//COOLING_GRACKLE @@ -623,11 +624,6 @@ void Grid3D::FreeMemory(void) // free the conserved variable arrays CudaSafeCall( cudaFreeHost(C.host) ); - // free the timestep arrays - CudaSafeCall( cudaFreeHost(host_dti_array) ); - cudaFree(dev_dti_array); - cudaFree(dev_dti); - #ifdef GRAVITY CudaSafeCall( cudaFreeHost(C.Grav_potential) ); CudaSafeCall( cudaFree(C.d_Grav_potential) ); diff --git a/src/grid/grid3D.h b/src/grid/grid3D.h index f121bd423..d05b90214 100644 --- a/src/grid/grid3D.h +++ b/src/grid/grid3D.h @@ -245,7 +245,6 @@ struct Header Real sphere_center_y; Real sphere_center_z; - #ifdef GRAVITY /*! \var n_ghost_potential_offset * \brief Number of offset betewen hydro_ghost_cells and potential_ghost_cells */ @@ -407,7 +406,7 @@ class Grid3D /*! pointer to conserved variable on device */ Real *device; Real *d_density, *d_momentum_x, *d_momentum_y, *d_momentum_z, - *d_Energy, *d_scalar, *d_basic_scalar, + *d_Energy, *d_scalar, *d_basic_scalar, *d_magnetic_x, *d_magnetic_y, *d_magnetic_z, *d_GasEnergy; @@ -450,26 +449,6 @@ class Grid3D void set_dt_Gravity(); #endif - /*! \fn Real calc_dti_CPU_1D() - * \brief Calculate the maximum inverse timestep on 1D, according to the CFL condition (Toro 6.17). */ - Real calc_dti_CPU_1D(); - - /*! \fn Real calc_dti_CPU_2D() - * \brief Calculate the maximum inverse timestep on 2D, according to the CFL condition (Toro 6.17). */ - Real calc_dti_CPU_2D(); - - /*! \fn Real calc_dti_CPU_3D_function() - * \brief Calculate the maximum inverse timestep on 3D using openMP, according to the CFL condition (Toro 6.17). */ - Real calc_dti_CPU_3D_function( int g_start, int g_end ); - - /*! \fn Real calc_dti_CPU_3D() - * \brief Calculate the maximum inverse timestep on 3D, according to the CFL condition (Toro 6.17). */ - Real calc_dti_CPU_3D(); - - /*! \fn Real calc_dti_CPU() - * \brief Calculate the maximum inverse timestep, according to the CFL condition (Toro 6.17). */ - Real calc_dti_CPU(); - /*! \fn void Update_Grid(void) * \brief Update the conserved quantities in each cell. */ Real Update_Grid(void); @@ -479,8 +458,7 @@ class Grid3D Real Update_Hydro_Grid(void); void Update_Time(); - - /*! \fn void Write_Header_Text(FILE *fp) + /*! \fn void Write_Header_Text(FILE *fp) * \brief Write the relevant header info to a text output file. */ void Write_Header_Text(FILE *fp); @@ -553,6 +531,41 @@ class Grid3D * \brief Sine wave perturbation. */ void Sound_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A); + /*! + * \brief Initialize the grid with a simple linear wave. + * + * \param[in] rho The background density + * \param[in] vx The background velocity in the X-direction + * \param[in] vy The background velocity in the Y-direction + * \param[in] vz The background velocity in the Z-direction + * \param[in] P The background pressure + * \param[in] A The amplitude of the wave + * \param[in] Bx The background magnetic field in the X-direction + * \param[in] By The background magnetic field in the Y-direction + * \param[in] Bz The background magnetic field in the Z-direction + * \param[in] rEigenVec_rho The right eigenvector component for the density + * \param[in] rEigenVec_MomentumX The right eigenvector component for the velocity + * in the X-direction + * \param[in] rEigenVec_MomentumY The right eigenvector component for the velocity + * in the Y-direction + * \param[in] rEigenVec_MomentumZ The right eigenvector component for the velocity + * in the Z-direction + * \param[in] rEigenVec_E The right eigenvector component for the energy + * \param[in] rEigenVec_Bx The right eigenvector component for the magnetic + * field in the X-direction + * \param[in] rEigenVec_By The right eigenvector component for the magnetic + * field in the Y-direction + * \param[in] rEigenVec_Bz The right eigenvector component for the magnetic + * field in the Z-direction + * \param[in] pitch The pitch angle of the linear wave + * \param[in] yaw The yaw angle of the linear wave + */ + void Linear_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A, + Real Bx, Real By, Real Bz, Real rEigenVec_rho, + Real rEigenVec_MomentumX, Real rEigenVec_MomentumY, Real rEigenVec_MomentumZ, + Real rEigenVec_E, Real rEigenVec_Bx, Real rEigenVec_By, + Real rEigenVec_Bz, Real pitch, Real yaw); + /*! \fn void Square_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A) * \brief Square wave density perturbation with amplitude A*rho in pressure equilibrium. */ void Square_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A); @@ -655,7 +668,7 @@ class Grid3D void Spherical_Overdensity_3D(); void Clouds(); - + void Uniform_Grid(); void Zeldovich_Pancake( struct parameters P ); diff --git a/src/grid/grid_enum.h b/src/grid/grid_enum.h index 6f25676dd..b96f0f4ca 100644 --- a/src/grid/grid_enum.h +++ b/src/grid/grid_enum.h @@ -2,8 +2,8 @@ // An enum which holds offsets for grid quantities // In the final form of this approach, this file will also set nfields (not yet) and NSCALARS (done) -// so that adding a field only requires registering it here: -// grid knows to allocate memory based on nfields and NSCALARS +// so that adding a field only requires registering it here: +// grid knows to allocate memory based on nfields and NSCALARS // and values can be accessed with density[id + ncells*grid_enum::enum_name] // example: C.device[id + H.n_cells*grid_enum::basic_scalar] @@ -55,7 +55,7 @@ enum : int { #endif // SCALAR finalscalar_plus_1, // needed to calculate NSCALARS - finalscalar = finalscalar_plus_1 - 1, // resets enum to finalscalar so fields afterwards are correct + finalscalar = finalscalar_plus_1 - 1, // resets enum to finalscalar so fields afterwards are correct // so that anything after starts with scalar + NSCALARS #ifdef MHD @@ -68,8 +68,36 @@ enum : int { #endif num_fields, -//Aliases and manually computed enums + //Aliases and manually computed enums nscalars = finalscalar_plus_1 - scalar, + + #ifdef MHD + num_flux_fields = num_fields-1, + num_interface_fields = num_fields-1, + #else + num_flux_fields = num_fields, + num_interface_fields = num_fields, + #endif //MHD + + #ifdef MHD + magnetic_start = magnetic_x, + magnetic_end = magnetic_z, + + // Note that the direction of the flux, the suffix _? indicates the direction of the electric field, not the magnetic flux + fluxX_magnetic_z = magnetic_start, + fluxX_magnetic_y = magnetic_start+1, + fluxY_magnetic_x = magnetic_start, + fluxY_magnetic_z = magnetic_start+1, + fluxZ_magnetic_y = magnetic_start, + fluxZ_magnetic_x = magnetic_start+1, + + Q_x_magnetic_y = magnetic_start, + Q_x_magnetic_z = magnetic_start+1, + Q_y_magnetic_z = magnetic_start, + Q_y_magnetic_x = magnetic_start+1, + Q_z_magnetic_x = magnetic_start, + Q_z_magnetic_y = magnetic_start+1 + #endif // MHD }; } diff --git a/src/grid/initial_conditions.cpp b/src/grid/initial_conditions.cpp index 4786fb446..8c8c5b3f9 100644 --- a/src/grid/initial_conditions.cpp +++ b/src/grid/initial_conditions.cpp @@ -1,5 +1,5 @@ /*! \file initial_conditions.cpp -/* \brief Definitions of initial conditions for different tests. + * \brief Definitions of initial conditions for different tests. Note that the grid is mapped to 1D as i + (x_dim)*j + (x_dim*y_dim)*k. Functions are members of the Grid3D class. */ @@ -13,10 +13,13 @@ #include "../mpi/mpi_routines.h" #include "../io/io.h" #include "../utils/error_handling.h" +#include "../utils/mhd_utilities.h" +#include "../utils/math_utilities.h" #include #include #include #include +#include using namespace std; @@ -31,6 +34,10 @@ void Grid3D::Set_Initial_Conditions(parameters P) { Constant(P.rho, P.vx, P.vy, P.vz, P.P, P.Bx, P.By, P.Bz); } else if (strcmp(P.init, "Sound_Wave")==0) { Sound_Wave(P.rho, P.vx, P.vy, P.vz, P.P, P.A); + } else if (strcmp(P.init, "Linear_Wave")==0) { + Linear_Wave(P.rho, P.vx, P.vy, P.vz, P.P, P.A, P.Bx, P.By, P.Bz, + P.rEigenVec_rho, P.rEigenVec_MomentumX, P.rEigenVec_MomentumY, P.rEigenVec_MomentumZ, + P.rEigenVec_E, P.rEigenVec_Bx, P.rEigenVec_By, P.rEigenVec_Bz, P.pitch, P.yaw); } else if (strcmp(P.init, "Square_Wave")==0) { Square_Wave(P.rho, P.vx, P.vy, P.vz, P.P, P.A); } else if (strcmp(P.init, "Riemann")==0) { @@ -62,9 +69,9 @@ void Grid3D::Set_Initial_Conditions(parameters P) { } else if (strcmp(P.init, "Spherical_Overpressure_3D")==0) { Spherical_Overpressure_3D(); } else if (strcmp(P.init, "Spherical_Overdensity_3D")==0) { - Spherical_Overdensity_3D(); + Spherical_Overdensity_3D(); } else if (strcmp(P.init, "Clouds")==0) { - Clouds(); + Clouds(); } else if (strcmp(P.init, "Read_Grid")==0) { #ifndef ONLY_PARTICLES Read_Grid(P); @@ -116,8 +123,8 @@ void Grid3D::Set_Domain_Properties(struct parameters P) H.zblocal = H.zbound; H.xblocal_max = H.xblocal + P.xlen; - H.yblocal_max = H.yblocal + P.ylen; - H.zblocal_max = H.zblocal + P.zlen; + H.yblocal_max = H.yblocal + P.ylen; + H.zblocal_max = H.zblocal + P.zlen; #else Real nx_param = (Real) nx_global; @@ -237,7 +244,6 @@ void Grid3D::Constant(Real rho, Real vx, Real vy, Real vz, Real P, Real Bx, Real } - /*! \fn void Sound_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A) * \brief Sine wave perturbation. */ void Grid3D::Sound_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A) @@ -291,6 +297,61 @@ void Grid3D::Sound_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A) #ifdef DE C.GasEnergy[id] = P/(gama-1.0); #endif //DE + #ifdef DE + C.GasEnergy[id] = P/(gama-1.0); + #endif //DE + } + } + } + +} + +/*! \fn void Linear_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A) + * \brief Sine wave perturbation. */ +void Grid3D::Linear_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A, + Real Bx, Real By, Real Bz, Real rEigenVec_rho, + Real rEigenVec_MomentumX, Real rEigenVec_MomentumY, Real rEigenVec_MomentumZ, + Real rEigenVec_E, Real rEigenVec_Bx, Real rEigenVec_By, + Real rEigenVec_Bz, Real pitch, Real yaw) +{ + auto [stagger, junk1, junk2] = math_utils::rotateCoords(H.dx/2, H.dy/2, H.dz/2, pitch, yaw); + + // set initial values of conserved variables + for(int k=H.n_ghost; k(i, j, k, pitch, yaw); + + //get cell index + int id = i + j*H.nx + k*H.nx*H.ny; + + // get cell-centered position + Real x_pos, y_pos, z_pos; + Get_Position(i_rot, j_rot, k_rot, &x_pos, &y_pos, &z_pos); + + // set constant initial states. Note that mhd::utils::computeEnergy + // computes the hydro energy if MHD is turned off + Real sine_wave = std::sin(2.0 * PI * x_pos); + + C.density[id] = rho; + C.momentum_x[id] = rho*vx; + C.momentum_y[id] = rho*vy; + C.momentum_z[id] = rho*vz; + C.Energy[id] = mhd::utils::computeEnergy(P, rho, vx, vy, vz, Bx, By, Bz, gama); + // add small-amplitude perturbations + C.density[id] += A * rEigenVec_rho * sine_wave; + C.momentum_x[id] += A * rEigenVec_MomentumX * sine_wave; + C.momentum_y[id] += A * rEigenVec_MomentumY * sine_wave; + C.momentum_z[id] += A * rEigenVec_MomentumZ * sine_wave; + C.Energy[id] += A * rEigenVec_E * sine_wave; + + #ifdef MHD + sine_wave = std::sin(2.0 * PI * (x_pos+stagger)); + C.magnetic_x[id] = Bx + A * rEigenVec_Bx * sine_wave; + C.magnetic_y[id] = By + A * rEigenVec_By * sine_wave; + C.magnetic_z[id] = Bz + A * rEigenVec_Bz * sine_wave; + #endif //MHD } } } @@ -405,26 +466,6 @@ void Grid3D::Riemann(Real rho_l, Real vx_l, Real vy_l, Real vz_l, Real P_l, Real kend = H.nz; } - #ifdef MHD - auto setMagnetFields = [&] () - { - Real x_pos_face = x_pos + 0.5 * H.dx; - - if (x_pos_face < diaph) - { - C.magnetic_x[id] = Bx_l; - C.magnetic_y[id] = By_l; - C.magnetic_z[id] = Bz_l; - } - else - { - C.magnetic_x[id] = Bx_r; - C.magnetic_y[id] = By_r; - C.magnetic_z[id] = Bz_r; - } - }; - #endif // MHD - // set initial values of conserved variables for(k=kstart-1; k #include #include +#include #include "../utils/gpu.hpp" #include "../global/global.h" @@ -14,6 +15,7 @@ #include "../utils/hydro_utilities.h" #include "../utils/cuda_utilities.h" #include "../utils/reduction_utilities.h" +#include "../utils/DeviceVector.h" __global__ void Update_Conserved_Variables_1D(Real *dev_conserved, Real *dev_F, int n_cells, int x_off, int n_ghost, Real dx, Real xbound, Real dt, Real gamma, int n_fields) @@ -425,8 +427,8 @@ __device__ __host__ Real mhdInverseCrossingTime(Real const &E, Real const &gamma) { // Compute the gas pressure and fast magnetosonic speed - Real gasP = mhdUtils::computeGasPressure(E, d, vx*d, vy*d, vz*d, avgBx, avgBy, avgBz, gamma); - Real cf = mhdUtils::fastMagnetosonicSpeed(d, gasP, avgBx, avgBy, avgBz, gamma); + Real gasP = mhd::utils::computeGasPressure(E, d, vx*d, vy*d, vz*d, avgBx, avgBy, avgBz, gamma); + Real cf = mhd::utils::fastMagnetosonicSpeed(d, gasP, avgBx, avgBy, avgBz, gamma); // Find maximum inverse crossing time in the cell (i.e. minimum crossing time) Real cellMaxInverseDt = fmax((fabs(vx)+cf)/dx, (fabs(vy)+cf)/dy); @@ -470,10 +472,8 @@ __global__ void Calc_dt_1D(Real *dev_conserved, Real *dev_dti, Real gamma, int n } } - // do the block wide reduction (find the max inverse timestep in the block) - // then write it to that block's location in the dev_dti array - max_dti = reduction_utilities::blockReduceMax(max_dti); - if (threadIdx.x == 0) dev_dti[blockIdx.x] = max_dti; + // do the grid wide reduction (find the max inverse timestep in the grid) + reduction_utilities::gridReduceMax(max_dti, dev_dti); } @@ -514,10 +514,8 @@ __global__ void Calc_dt_2D(Real *dev_conserved, Real *dev_dti, Real gamma, int n } } - // do the block wide reduction (find the max inverse timestep in the block) - // then write it to that block's location in the dev_dti array - max_dti = reduction_utilities::blockReduceMax(max_dti); - if (threadIdx.x == 0) dev_dti[blockIdx.x] = max_dti; + // do the grid wide reduction (find the max inverse timestep in the grid) + reduction_utilities::gridReduceMax(max_dti, dev_dti); } @@ -526,9 +524,6 @@ __global__ void Calc_dt_3D(Real *dev_conserved, Real *dev_dti, Real gamma, int n Real max_dti = -DBL_MAX; Real d, d_inv, vx, vy, vz, E; - #ifdef MHD - Real avgBx, avgBy, avgBz; - #endif //MHD int xid, yid, zid, n_cells; n_cells = nx*ny*nz; @@ -553,14 +548,12 @@ __global__ void Calc_dt_3D(Real *dev_conserved, Real *dev_dti, Real gamma, int n vy = dev_conserved[2*n_cells + id] * d_inv; vz = dev_conserved[3*n_cells + id] * d_inv; E = dev_conserved[4*n_cells + id]; - #ifdef MHD - // Compute the cell centered magnetic field using a straight average of - // the faces - mhdUtils::cellCenteredMagneticFields(dev_conserved, id, xid, yid, zid, n_cells, nx, ny, avgBx, avgBy, avgBz); - #endif //MHD // Compute the maximum inverse crossing time in the cell #ifdef MHD + // Compute the cell centered magnetic field using a straight average of + // the faces + auto const [avgBx, avgBy, avgBz] = mhd::utils::cellCenteredMagneticFields(dev_conserved, id, xid, yid, zid, n_cells, nx, ny); max_dti = fmax(max_dti,mhdInverseCrossingTime(E, d, d_inv, vx, vy, vz, avgBx, avgBy, avgBz, dx, dy, dz, gamma)); #else // not MHD max_dti = fmax(max_dti,hydroInverseCrossingTime(E, d, d_inv, vx, vy, vz, dx, dy, dz, gamma)); @@ -569,62 +562,45 @@ __global__ void Calc_dt_3D(Real *dev_conserved, Real *dev_dti, Real gamma, int n } } - // do the block wide reduction (find the max inverse timestep in the block) - // then write it to that block's location in the dev_dti array - max_dti = reduction_utilities::blockReduceMax(max_dti); - if (threadIdx.x == 0) dev_dti[blockIdx.x] = max_dti; + // do the grid wide reduction (find the max inverse timestep in the grid) + reduction_utilities::gridReduceMax(max_dti, dev_dti); } Real Calc_dt_GPU(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dx, Real dy, Real dz, Real gamma ) { - // set values for GPU kernels - uint threadsPerBlock, numBlocks; - int ngrid = (nx*ny*nz + TPB - 1 )/TPB; - // reduction_utilities::reductionLaunchParams(numBlocks, threadsPerBlock); // Uncomment this if we fix the AtomicDouble bug - Alwin - threadsPerBlock = TPB; - numBlocks = ngrid; - - Real* dev_dti = dev_dti_array; + // Allocate the device memory + cuda_utilities::DeviceVector static dev_dti(1); + // Set the device side inverse time step to the smallest possible double so + // that the reduction isn't using the maximum value of the previous iteration + dev_dti.assign(std::numeric_limits::lowest()); // compute dt and store in dev_dti if (nx > 1 && ny == 1 && nz == 1) //1D { - hipLaunchKernelGGL(Calc_dt_1D, numBlocks, threadsPerBlock, 0, 0, dev_conserved, dev_dti, gamma, n_ghost, nx, dx); + // set launch parameters for GPU kernels. + cuda_utilities::AutomaticLaunchParams static const launchParams(Calc_dt_1D); + hipLaunchKernelGGL(Calc_dt_1D, launchParams.numBlocks, launchParams.threadsPerBlock, 0, 0, + dev_conserved, dev_dti.data(), gamma, n_ghost, nx, dx); } else if (nx > 1 && ny > 1 && nz == 1) //2D { - hipLaunchKernelGGL(Calc_dt_2D, numBlocks, threadsPerBlock, 0, 0, dev_conserved, dev_dti, gamma, n_ghost, nx, ny, dx, dy); + // set launch parameters for GPU kernels. + cuda_utilities::AutomaticLaunchParams static const launchParams(Calc_dt_2D); + hipLaunchKernelGGL(Calc_dt_2D, launchParams.numBlocks, launchParams.threadsPerBlock, 0, 0, + dev_conserved, dev_dti.data(), gamma, n_ghost, nx, ny, dx, dy); } else if (nx > 1 && ny > 1 && nz > 1) //3D { - hipLaunchKernelGGL(Calc_dt_3D, numBlocks, threadsPerBlock, 0, 0, dev_conserved, dev_dti, gamma, n_ghost, n_fields, nx, ny, nz, dx, dy, dz); + // set launch parameters for GPU kernels. + cuda_utilities::AutomaticLaunchParams static const launchParams(Calc_dt_3D); + hipLaunchKernelGGL(Calc_dt_3D, launchParams.numBlocks, launchParams.threadsPerBlock, 0, 0, + dev_conserved, dev_dti.data(), gamma, n_ghost, n_fields, nx, ny, nz, dx, dy, dz); } CudaCheckError(); - Real max_dti=0; - - /* Uncomment the below if we fix the AtomicDouble bug - Alwin - // copy device side max_dti to host side max_dti - - - CudaSafeCall( cudaMemcpy(&max_dti, dev_dti, sizeof(Real), cudaMemcpyDeviceToHost) ); - cudaDeviceSynchronize(); - - return max_dti; - */ - - int dev_dti_length = numBlocks; - CudaSafeCall(cudaMemcpy(host_dti_array,dev_dti, dev_dti_length*sizeof(Real), cudaMemcpyDeviceToHost)); - cudaDeviceSynchronize(); - - for (int i=0;i max_dti_slow){ speed = sqrt(vx*vx + vy*vy + vz*vz); temp = (gamma - 1)*(E - 0.5*(speed*speed)*d)*ENERGY_UNIT/(d*DENSITY_UNIT/0.6/MP)/KB; P = (E - 0.5*d*(vx*vx + vy*vy + vz*vz)) * (gamma - 1.0); cs = sqrt(d_inv * gamma * P)*VELOCITY_UNIT*1e-5; // Average this cell - printf(" Average Slow Cell [ %d %d %d ] -> dt_cell=%f dt_min=%f, n=%.3e, T=%.3e, v=%.3e (%.3e, %.3e, %.3e), cs=%.3e\n", xid, yid, zid, 1./max_dti, 1./max_dti_slow, + printf(" Average Slow Cell [ %d %d %d ] -> dt_cell=%f dt_min=%f, n=%.3e, T=%.3e, v=%.3e (%.3e, %.3e, %.3e), cs=%.3e\n", xid, yid, zid, 1./max_dti, 1./max_dti_slow, dev_conserved[id]*DENSITY_UNIT/0.6/MP, temp, speed*VELOCITY_UNIT*1e-5, vx*VELOCITY_UNIT*1e-5, vy*VELOCITY_UNIT*1e-5, vz*VELOCITY_UNIT*1e-5, cs); Average_Cell_All_Fields( xid, yid, zid, nx, ny, nz, n_cells, n_fields, dev_conserved ); } } } -#endif //AVERAGE_SLOW_CELLS +#endif //AVERAGE_SLOW_CELLS #ifdef DE @@ -829,7 +793,12 @@ __global__ void Partial_Update_Advected_Internal_Energy_3D( Real *dev_conserved, //PRESSURE_DE E = dev_conserved[4*n_cells + id]; GE = dev_conserved[(n_fields-1)*n_cells + id]; - E_kin = 0.5 * d * ( vx*vx + vy*vy + vz*vz ); + E_kin = hydro_utilities::Calc_Kinetic_Energy_From_Velocity(d, vx, vy, vz); + #ifdef MHD + // Add the magnetic energy + auto [centeredBx, centeredBy, centeredBz] = mhd::utils::cellCenteredMagneticFields(dev_conserved, id, xid, yid, zid, n_cells, nx, ny) + E_kin += mhd::utils::computeMagneticEnergy(magX, magY, magZ); + #endif //MHD P = hydro_utilities::Get_Pressure_From_DE( E, E - E_kin, GE, gamma ); P = fmax(P, (Real) TINY_NUMBER); @@ -1205,15 +1174,7 @@ __device__ void Average_Cell_All_Fields( int i, int j, int k, int nx, int ny, in Average_Cell_Single_Field( 3, i, j, k, nx, ny, nz, ncells, conserved ); // Average Energy Average_Cell_Single_Field( 4, i, j, k, nx, ny, nz, ncells, conserved ); - #ifdef MHD - // Average MHD - Average_Cell_Single_Field( 5+NSCALARS, i, j, k, nx, ny, nz, ncells, conserved ); - Average_Cell_Single_Field( 6+NSCALARS, i, j, k, nx, ny, nz, ncells, conserved ); - Average_Cell_Single_Field( 7+NSCALARS, i, j, k, nx, ny, nz, ncells, conserved ); - Average_Cell_Single_Field( 5+NSCALARS, i-1, j, k, nx, ny, nz, ncells, conserved ); - Average_Cell_Single_Field( 6+NSCALARS, i, j-1, k, nx, ny, nz, ncells, conserved ); - Average_Cell_Single_Field( 7+NSCALARS, i, j, k-1, nx, ny, nz, ncells, conserved ); - #endif //MHD + #ifdef DE // Average GasEnergy Average_Cell_Single_Field( n_fields-1, i, j, k, nx, ny, nz, ncells, conserved ); diff --git a/src/hydro/hydro_cuda_tests.cu b/src/hydro/hydro_cuda_tests.cu index a6d00e96b..eb9c3f9ed 100644 --- a/src/hydro/hydro_cuda_tests.cu +++ b/src/hydro/hydro_cuda_tests.cu @@ -19,6 +19,7 @@ #include "../global/global_cuda.h" #include "../utils/gpu.hpp" #include "../utils/testing_utilities.h" +#include "../utils/DeviceVector.h" #include "../hydro/hydro_cuda.h" // Include code to test #if defined(CUDA) @@ -44,38 +45,31 @@ TEST(tHYDROCalcDt3D, CorrectInputExpectCorrectOutput) Real dx = 1.0; Real dy = 1.0; Real dz = 1.0; - Real *host_conserved; - Real *dev_conserved; - Real *dev_dti_array; + std::vector host_conserved(n_fields); + cuda_utilities::DeviceVector dev_conserved(n_fields); + cuda_utilities::DeviceVector dev_dti(1); Real gamma = 5.0/3.0; - // Allocate host and device arrays and copy data - cudaHostAlloc(&host_conserved, n_fields*sizeof(Real), cudaHostAllocDefault); - CudaSafeCall(cudaMalloc(&dev_conserved, n_fields*sizeof(Real))); - CudaSafeCall(cudaMalloc(&dev_dti_array, sizeof(Real))); - // Set values of conserved variables for input (host) - host_conserved[0] = 1.0; // density - host_conserved[1] = 0.0; // x momentum - host_conserved[2] = 0.0; // y momentum - host_conserved[3] = 0.0; // z momentum - host_conserved[4] = 1.0; // Energy + host_conserved.at(0) = 1.0; // density + host_conserved.at(1) = 0.0; // x momentum + host_conserved.at(2) = 0.0; // y momentum + host_conserved.at(3) = 0.0; // z momentum + host_conserved.at(4) = 1.0; // Energy // Copy host data to device arrray - CudaSafeCall(cudaMemcpy(dev_conserved, host_conserved, n_fields*sizeof(Real), cudaMemcpyHostToDevice)); - //__global__ void Calc_dt_3D(Real *dev_conserved, Real *dev_dti, Real gamma, int n_ghost, int n_fields, int nx, int ny, int nz, Real dx, Real dy, Real dz) + dev_conserved.cpyHostToDevice(host_conserved); // Run the kernel - hipLaunchKernelGGL(Calc_dt_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, dev_dti_array, gamma, n_ghost, n_fields, nx, ny, nz, dx, dy, dz); + hipLaunchKernelGGL(Calc_dt_3D, dim1dGrid, dim1dBlock, 0, 0, + dev_conserved.data(), dev_dti.data(), gamma, n_ghost, + n_fields, nx, ny, nz, dx, dy, dz); CudaCheckError(); - // Copy the dt value back from the GPU - CudaSafeCall(cudaMemcpy(testDt, dev_dti_array, sizeof(Real), cudaMemcpyDeviceToHost)); - // Compare results // Check for equality and if not equal return difference - double fiducialDt = 1.0540925533894598; - double testData = testDt[0]; + double const fiducialDt = 1.0540925533894598; + double const testData = dev_dti.at(0); double absoluteDiff; int64_t ulpsDiff; bool areEqual; diff --git a/src/integrators/VL_3D_cuda.cu b/src/integrators/VL_3D_cuda.cu index 097708ede..2fb413870 100644 --- a/src/integrators/VL_3D_cuda.cu +++ b/src/integrators/VL_3D_cuda.cu @@ -1,8 +1,10 @@ /*! \file VL_3D_cuda.cu - * \brief Definitions of the cuda 3D VL algorithm functions. */ + * \brief Definitions of the cuda 3 D VL algorithm functions. MHD algorithm + * from Stone & Gardiner 2009 "A simple unsplit Godunov method for + * multidimensional MHD" + */ -#ifdef CUDA -#ifdef VL +#if defined(CUDA) && defined(VL) #include #include @@ -21,8 +23,11 @@ #include "../riemann_solvers/exact_cuda.h" #include "../riemann_solvers/roe_cuda.h" #include "../riemann_solvers/hllc_cuda.h" -#include "../io/io.h" #include "../riemann_solvers/hll_cuda.h" +#include "../riemann_solvers/hlld_cuda.h" +#include "../mhd/ct_electric_fields.h" +#include "../mhd/magnetic_update.h" +#include "../io/io.h" __global__ void Update_Conserved_Variables_3D_half(Real *dev_conserved, Real *dev_conserved_half, Real *dev_F_x, Real *dev_F_y, Real *dev_F_z, int nx, int ny, int nz, int n_ghost, Real dx, Real dy, Real dz, Real dt, Real gamma, int n_fields, Real density_floor); @@ -33,7 +38,6 @@ void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int Real ybound, Real zbound, Real dt, int n_fields, Real density_floor, Real U_floor, Real *host_grav_potential ) { - //Here, *dev_conserved contains the entire //set of conserved variables on the grid //concatenated into a 1-d array @@ -53,42 +57,86 @@ void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int if ( !memory_allocated ){ // allocate memory on the GPU - //CudaSafeCall( cudaMalloc((void**)&dev_conserved, n_fields*n_cells*sizeof(Real)) ); dev_conserved = d_conserved; + + // Set the size of the interface and flux arrays + #ifdef MHD + // In MHD/Constrained Transport the interface arrays have one fewer fields + // since the magnetic field that is stored on the face does not require + // reconstructions. Similarly the fluxes have one fewer fields since the + // magnetic field on that face doesn't have an associated flux. Each + // interface array store the magnetic fields on that interface that are + // not perpendicular to the interface and arranged cyclically. I.e. the + // `Q_Lx` interface store the reconstructed Y and Z magnetic fields in + // that order, the `Q_Ly` interface stores the Z and X mangetic fields in + // that order, and the `Q_Lz` interface stores the X and Y magnetic fields + // in that order. These fields can be indexed with the Q_?_dir grid_enums. The interface state arrays + // store in the interface on the "right" side of the cell, so the flux + // arrays store the fluxes through the right interface + // + // According to Stone et al. 2008 section 5.3 and the source code of + // Athena, the following equation relate the magnetic flux to the face + // centered electric fields/EMF. -cross(V,B)x is the negative of the + // x-component of V cross B. Note that "X" is the direction the solver is + // running in this case, not necessarily the true "X". + // F_x[(grid_enum::fluxX_magnetic_z)*n_cells] = VxBy - BxVy = -(-cross(V,B))z = -EMF_Z + // F_x[(grid_enum::fluxX_magnetic_y)*n_cells] = VxBz - BxVz = (-cross(V,B))y = EMF_Y + // F_y[(grid_enum::fluxY_magnetic_x)*n_cells] = VxBy - BxVy = -(-cross(V,B))z = -EMF_X + // F_y[(grid_enum::fluxY_magnetic_z)*n_cells] = VxBz - BxVz = (-cross(V,B))y = EMF_Z + // F_z[(grid_enum::fluxZ_magnetic_y)*n_cells] = VxBy - BxVy = -(-cross(V,B))z = -EMF_Y + // F_z[(grid_enum::fluxZ_magnetic_x)*n_cells] = VxBz - BxVz = (-cross(V,B))y = EMF_X + size_t const arraySize = (n_fields-1) * n_cells * sizeof(Real); + size_t const ctArraySize = 3 * n_cells * sizeof(Real); + #else // not MHD + size_t const arraySize = n_fields*n_cells*sizeof(Real); + #endif //MHD CudaSafeCall( cudaMalloc((void**)&dev_conserved_half, n_fields*n_cells*sizeof(Real)) ); - CudaSafeCall( cudaMalloc((void**)&Q_Lx, n_fields*n_cells*sizeof(Real)) ); - CudaSafeCall( cudaMalloc((void**)&Q_Rx, n_fields*n_cells*sizeof(Real)) ); - CudaSafeCall( cudaMalloc((void**)&Q_Ly, n_fields*n_cells*sizeof(Real)) ); - CudaSafeCall( cudaMalloc((void**)&Q_Ry, n_fields*n_cells*sizeof(Real)) ); - CudaSafeCall( cudaMalloc((void**)&Q_Lz, n_fields*n_cells*sizeof(Real)) ); - CudaSafeCall( cudaMalloc((void**)&Q_Rz, n_fields*n_cells*sizeof(Real)) ); - CudaSafeCall( cudaMalloc((void**)&F_x, n_fields*n_cells*sizeof(Real)) ); - CudaSafeCall( cudaMalloc((void**)&F_y, n_fields*n_cells*sizeof(Real)) ); - CudaSafeCall( cudaMalloc((void**)&F_z, n_fields*n_cells*sizeof(Real)) ); + CudaSafeCall( cudaMalloc((void**)&Q_Lx, arraySize) ); + CudaSafeCall( cudaMalloc((void**)&Q_Rx, arraySize) ); + CudaSafeCall( cudaMalloc((void**)&Q_Ly, arraySize) ); + CudaSafeCall( cudaMalloc((void**)&Q_Ry, arraySize) ); + CudaSafeCall( cudaMalloc((void**)&Q_Lz, arraySize) ); + CudaSafeCall( cudaMalloc((void**)&Q_Rz, arraySize) ); + CudaSafeCall( cudaMalloc((void**)&F_x, arraySize) ); + CudaSafeCall( cudaMalloc((void**)&F_y, arraySize) ); + CudaSafeCall( cudaMalloc((void**)&F_z, arraySize) ); + + cuda_utilities::initGpuMemory(dev_conserved_half, n_fields*n_cells*sizeof(Real)); + cuda_utilities::initGpuMemory(Q_Lx, arraySize); + cuda_utilities::initGpuMemory(Q_Rx, arraySize); + cuda_utilities::initGpuMemory(Q_Ly, arraySize); + cuda_utilities::initGpuMemory(Q_Ry, arraySize); + cuda_utilities::initGpuMemory(Q_Lz, arraySize); + cuda_utilities::initGpuMemory(Q_Rz, arraySize); + cuda_utilities::initGpuMemory(F_x, arraySize); + cuda_utilities::initGpuMemory(F_y, arraySize); + cuda_utilities::initGpuMemory(F_z, arraySize); + + #ifdef MHD + CudaSafeCall( cudaMalloc((void**)&ctElectricFields, ctArraySize) ); + cuda_utilities::initGpuMemory(ctElectricFields, ctArraySize); + #endif //MHD #if defined( GRAVITY ) - // CudaSafeCall( cudaMalloc((void**)&dev_grav_potential, n_cells*sizeof(Real)) ); dev_grav_potential = d_grav_potential; - #else + #else // not GRAVITY dev_grav_potential = NULL; - #endif + #endif //GRAVITY // If memory is single allocated: memory_allocated becomes true and successive timesteps won't allocate memory. // If the memory is not single allocated: memory_allocated remains Null and memory is allocated every timestep. memory_allocated = true; - } #if defined( GRAVITY ) && !defined( GRAVITY_GPU ) CudaSafeCall( cudaMemcpy(dev_grav_potential, temp_potential, n_cells*sizeof(Real), cudaMemcpyHostToDevice) ); - #endif + #endif //GRAVITY and GRAVITY_GPU // Step 1: Use PCM reconstruction to put primitive variables into interface arrays hipLaunchKernelGGL(PCM_Reconstruction_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, Q_Ly, Q_Ry, Q_Lz, Q_Rz, nx, ny, nz, n_ghost, gama, n_fields); CudaCheckError(); - // Step 2: Calculate first-order upwind fluxes #ifdef EXACT hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); @@ -110,18 +158,32 @@ void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int hipLaunchKernelGGL(Calculate_HLL_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); hipLaunchKernelGGL(Calculate_HLL_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, 2, n_fields); #endif //HLL + #ifdef HLLD + hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, &(dev_conserved[(grid_enum::magnetic_x) * n_cells]), F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); + hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, &(dev_conserved[(grid_enum::magnetic_y) * n_cells]), F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); + hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lz, Q_Rz, &(dev_conserved[(grid_enum::magnetic_z) * n_cells]), F_z, nx, ny, nz, n_ghost, gama, 2, n_fields); + #endif //HLLD CudaCheckError(); + #ifdef MHD + // Step 2.5: Compute the Constrained transport electric fields + hipLaunchKernelGGL(mhd::Calculate_CT_Electric_Fields, dim1dGrid, dim1dBlock, 0, 0, F_x, F_y, F_z, dev_conserved, ctElectricFields, nx, ny, nz, n_cells); + CudaCheckError(); + #endif //MHD // Step 3: Update the conserved variables half a timestep hipLaunchKernelGGL(Update_Conserved_Variables_3D_half, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, dev_conserved_half, F_x, F_y, F_z, nx, ny, nz, n_ghost, dx, dy, dz, 0.5*dt, gama, n_fields, density_floor ); CudaCheckError(); - + #ifdef MHD + // Update the magnetic fields + hipLaunchKernelGGL(mhd::Update_Magnetic_Field_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, dev_conserved_half, ctElectricFields, nx, ny, nz, n_cells, 0.5*dt, dx, dy, dz); + CudaCheckError(); + #endif //MHD // Step 4: Construct left and right interface values using updated conserved variables #ifdef PCM hipLaunchKernelGGL(PCM_Reconstruction_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, Q_Ly, Q_Ry, Q_Lz, Q_Rz, nx, ny, nz, n_ghost, gama, n_fields); - #endif + #endif //PCM #ifdef PLMP hipLaunchKernelGGL(PLMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields); hipLaunchKernelGGL(PLMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, gama, 1, n_fields); @@ -131,7 +193,7 @@ void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int hipLaunchKernelGGL(PLMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields); hipLaunchKernelGGL(PLMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, gama, 1, n_fields); hipLaunchKernelGGL(PLMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lz, Q_Rz, nx, ny, nz, n_ghost, dz, dt, gama, 2, n_fields); - #endif + #endif //PLMC #ifdef PPMP hipLaunchKernelGGL(PPMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields); hipLaunchKernelGGL(PPMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, gama, 1, n_fields); @@ -166,29 +228,46 @@ void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int hipLaunchKernelGGL(Calculate_HLL_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); hipLaunchKernelGGL(Calculate_HLL_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, 2, n_fields); #endif //HLLC + #ifdef HLLD + hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, &(dev_conserved_half[(grid_enum::magnetic_x) * n_cells]), F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); + hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, &(dev_conserved_half[(grid_enum::magnetic_y) * n_cells]), F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); + hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lz, Q_Rz, &(dev_conserved_half[(grid_enum::magnetic_z) * n_cells]), F_z, nx, ny, nz, n_ghost, gama, 2, n_fields); + #endif //HLLD CudaCheckError(); #ifdef DE // Compute the divergence of Vel before updating the conserved array, this solves synchronization issues when adding this term on Update_Conserved_Variables_3D hipLaunchKernelGGL(Partial_Update_Advected_Internal_Energy_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, Q_Ly, Q_Ry, Q_Lz, Q_Rz, nx, ny, nz, n_ghost, dx, dy, dz, dt, gama, n_fields ); CudaCheckError(); - #endif + #endif //DE + #ifdef MHD + // Step 5.5: Compute the Constrained transport electric fields + hipLaunchKernelGGL(mhd::Calculate_CT_Electric_Fields, dim1dGrid, dim1dBlock, 0, 0, F_x, F_y, F_z, dev_conserved_half, ctElectricFields, nx, ny, nz, n_cells); + CudaCheckError(); + #endif //MHD // Step 6: Update the conserved variable array hipLaunchKernelGGL(Update_Conserved_Variables_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, Q_Ly, Q_Ry, Q_Lz, Q_Rz, F_x, F_y, F_z, nx, ny, nz, x_off, y_off, z_off, n_ghost, dx, dy, dz, xbound, ybound, zbound, dt, gama, n_fields, density_floor, dev_grav_potential); CudaCheckError(); + #ifdef MHD + // Update the magnetic fields + hipLaunchKernelGGL(mhd::Update_Magnetic_Field_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, dev_conserved, ctElectricFields, nx, ny, nz, n_cells, dt, dx, dy, dz); + CudaCheckError(); + #endif //MHD + #ifdef DE hipLaunchKernelGGL(Select_Internal_Energy_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, n_fields); hipLaunchKernelGGL(Sync_Energies_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, gama, n_fields); CudaCheckError(); - #endif + #endif //DE #ifdef TEMPERATURE_FLOOR hipLaunchKernelGGL(Apply_Temperature_Floor, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, n_fields, U_floor ); CudaCheckError(); #endif //TEMPERATURE_FLOOR + return; } @@ -208,6 +287,7 @@ void Free_Memory_VL_3D(){ cudaFree(F_x); cudaFree(F_y); cudaFree(F_z); + cudaFree(ctElectricFields); } @@ -233,11 +313,11 @@ __global__ void Update_Conserved_Variables_3D_half(Real *dev_conserved, Real *de Real d, d_inv, vx, vy, vz; Real vx_imo, vx_ipo, vy_jmo, vy_jpo, vz_kmo, vz_kpo, P, E, E_kin, GE; int ipo, jpo, kpo; - #endif + #endif //DE #ifdef DENSITY_FLOOR Real dens_0; - #endif + #endif //DENSITY_FLOOR // threads corresponding to all cells except outer ring of ghost cells do the calculation if (xid > 0 && xid < nx-1 && yid > 0 && yid < ny-1 && zid > 0 && zid < nz-1) @@ -251,7 +331,12 @@ __global__ void Update_Conserved_Variables_3D_half(Real *dev_conserved, Real *de //PRESSURE_DE E = dev_conserved[4*n_cells + id]; GE = dev_conserved[(n_fields-1)*n_cells + id]; - E_kin = 0.5 * d * ( vx*vx + vy*vy + vz*vz ); + E_kin = hydro_utilities::Calc_Kinetic_Energy_From_Velocity(d, vx, vy, vz); + #ifdef MHD + // Add the magnetic energy + auto const [centeredBx, centeredBy, centeredBz] = mhd::utils::cellCenteredMagneticFields(dev_conserved, id, xid, yid, zid, n_cells, nx, ny) + E_kin += mhd::utils::computeMagneticEnergy(centeredBx, centeredBy, centeredBz); + #endif //MHD P = hydro_utilities::Get_Pressure_From_DE( E, E - E_kin, GE, gamma ); P = fmax(P, (Real) TINY_NUMBER); // P = (dev_conserved[4*n_cells + id] - 0.5*d*(vx*vx + vy*vy + vz*vz)) * (gamma - 1.0); @@ -266,7 +351,7 @@ __global__ void Update_Conserved_Variables_3D_half(Real *dev_conserved, Real *de vy_jpo = dev_conserved[2*n_cells + jpo] / dev_conserved[jpo]; vz_kmo = dev_conserved[3*n_cells + kmo] / dev_conserved[kmo]; vz_kpo = dev_conserved[3*n_cells + kpo] / dev_conserved[kpo]; - #endif + #endif //DE // update the conserved variable array dev_conserved_half[ id] = dev_conserved[ id] @@ -296,14 +381,14 @@ __global__ void Update_Conserved_Variables_3D_half(Real *dev_conserved, Real *de + dtody * (dev_F_y[(5+i)*n_cells + jmo] - dev_F_y[(5+i)*n_cells + id]) + dtodz * (dev_F_z[(5+i)*n_cells + kmo] - dev_F_z[(5+i)*n_cells + id]); } - #endif + #endif //SCALAR #ifdef DE dev_conserved_half[(n_fields-1)*n_cells + id] = dev_conserved[(n_fields-1)*n_cells + id] + dtodx * (dev_F_x[(n_fields-1)*n_cells + imo] - dev_F_x[(n_fields-1)*n_cells + id]) + dtody * (dev_F_y[(n_fields-1)*n_cells + jmo] - dev_F_y[(n_fields-1)*n_cells + id]) + dtodz * (dev_F_z[(n_fields-1)*n_cells + kmo] - dev_F_z[(n_fields-1)*n_cells + id]) + 0.5*P*(dtodx*(vx_imo-vx_ipo) + dtody*(vy_jmo-vy_jpo) + dtodz*(vz_kmo-vz_kpo)); - #endif + #endif //DE #ifdef DENSITY_FLOOR if ( dev_conserved_half[ id] < density_floor ){ @@ -317,19 +402,11 @@ __global__ void Update_Conserved_Variables_3D_half(Real *dev_conserved, Real *de dev_conserved_half[4*n_cells + id] *= (density_floor / dens_0); #ifdef DE dev_conserved_half[(n_fields-1)*n_cells + id] *= (density_floor / dens_0); - #endif + #endif //DE } - #endif - //if (dev_conserved_half[id] < 0.0 || dev_conserved_half[id] != dev_conserved_half[id] || dev_conserved_half[4*n_cells+id] < 0.0 || dev_conserved_half[4*n_cells+id] != dev_conserved_half[4*n_cells+id]) { - //printf("%3d %3d %3d Thread crashed in half step update. d: %e E: %e\n", xid, yid, zid, dev_conserved_half[id], dev_conserved_half[4*n_cells+id]); - //} - + #endif //DENSITY_FLOOR } } - - - -#endif //VL -#endif //CUDA +#endif //CUDA and VL diff --git a/src/io/io.h b/src/io/io.h index f7dfe6eb7..08489f7da 100644 --- a/src/io/io.h +++ b/src/io/io.h @@ -3,6 +3,8 @@ #include "../global/global.h" #include "../grid/grid3D.h" #include +#include +#include /* Write the data */ @@ -26,6 +28,23 @@ void OutputSlices(Grid3D &G, struct parameters P, int nfile); /* MPI-safe printf routine */ int chprintf(const char * __restrict sdata, ...); +/*! + * \brief Convert a floating point number to a string such that it can be + * exactly deserialized back from a string to the same floating point number. + * + * \tparam T Any floating point type + * \param[in] input The floating point number to convert + * \return std::string The string representation of the input floating point + */ +template +std::string to_string_exact(T const &input) +{ + std::stringstream output; + output << std::setprecision(std::numeric_limits::max_digits10); + output << input; + return output.str(); +} + void Create_Log_File( struct parameters P ); void Write_Message_To_Log_File( const char* message ); diff --git a/src/main.cpp b/src/main.cpp index da2348858..9e59bd651 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -17,11 +17,14 @@ #include "particles/supernova.h" #ifdef ANALYSIS #include "analysis/feedback_analysis.h" -#endif +#endif #endif //SUPERNOVA #ifdef STAR_FORMATION #include "particles/star_formation.h" #endif +#ifdef MHD +#include "mhd/magnetic_divergence.h" +#endif //MHD #include "grid/grid_enum.h" @@ -53,7 +56,6 @@ int main(int argc, char *argv[]) int nfile = 0; // number of output files Real outtime = 0; // current output time - // read in command line arguments if (argc < 2) { @@ -191,6 +193,12 @@ int main(int argc, char *argv[]) // add one to the output file count nfile++; #endif //OUTPUT + + #ifdef MHD + // Check that the initial magnetic field has zero divergence + mhd::checkMagneticDivergence(G); + #endif //MHD + // increment the next output time outtime += P.outstep; @@ -254,7 +262,7 @@ int main(int argc, char *argv[]) //Set the Grid boundary conditions for next time step G.Set_Boundary_Conditions_Grid(P); - + #ifdef GRAVITY_ANALYTIC_COMP G.Add_Analytic_Potential(); #endif @@ -266,7 +274,7 @@ int main(int argc, char *argv[]) #ifdef STAR_FORMATION star_formation::Star_Formation(G); - #endif + #endif #ifdef CPU_TIME G.Timer.Total.End(); @@ -332,6 +340,10 @@ int main(int argc, char *argv[]) } #endif + #ifdef MHD + // Check that the magnetic field has zero divergence + mhd::checkMagneticDivergence(G); + #endif //MHD } /*end loop over timesteps*/ diff --git a/src/mhd/ct_electric_fields.cu b/src/mhd/ct_electric_fields.cu new file mode 100644 index 000000000..542dda3a7 --- /dev/null +++ b/src/mhd/ct_electric_fields.cu @@ -0,0 +1,346 @@ +/*! + * \file ct_electric_fields.cu + * \author Robert 'Bob' Caddy (rvc@pitt.edu) + * \brief Contains implementation for the CT electric fields code. Method from + * Stone & Gardiner 2009 "A simple unsplit Godunov method for multidimensional + * MHD" hereafter referred to as "S&G 2009" + * + */ + +// STL Includes + +// External Includes + +// Local Includes +#include "../mhd/ct_electric_fields.h" +#ifdef MHD +namespace mhd +{ + // ========================================================================= + __global__ void Calculate_CT_Electric_Fields(Real const *fluxX, + Real const *fluxY, + Real const *fluxZ, + Real const *dev_conserved, + Real *ctElectricFields, + int const nx, + int const ny, + int const nz, + int const n_cells) + { + // get a thread index + int const threadId = threadIdx.x + blockIdx.x * blockDim.x; + int xid, yid, zid; + cuda_utilities::compute3DIndices(threadId, nx, ny, xid, yid, zid); + + // Thread guard to avoid overrun and to skip the first two cells since + // those ghost cells can't be reconstructed + if ( xid > 1 + and yid > 1 + and zid > 1 + and xid < nx + and yid < ny + and zid < nz) + { + // According to Stone et al. 2008 section 5.3 and the source code of + // Athena, the following equation relate the magnetic flux to the + // face centered electric fields/EMF. -cross(V,B)x is the negative + // of the x-component of V cross B. Note that "X" is the direction + // the solver is running in this case, not necessarily the true "X". + // F_x[(grid_enum::fluxX_magnetic_z)*n_cells] = VxBy - BxVy = -(-cross(V,B))z = -EMF_Z + // F_x[(grid_enum::fluxX_magnetic_y)*n_cells] = VxBz - BxVz = (-cross(V,B))y = EMF_Y + // F_y[(grid_enum::fluxY_magnetic_x)*n_cells] = VxBy - BxVy = -(-cross(V,B))z = -EMF_X + // F_y[(grid_enum::fluxY_magnetic_z)*n_cells] = VxBz - BxVz = (-cross(V,B))y = EMF_Z + // F_z[(grid_enum::fluxZ_magnetic_y)*n_cells] = VxBy - BxVy = -(-cross(V,B))z = -EMF_Y + // F_z[(grid_enum::fluxZ_magnetic_x)*n_cells] = VxBz - BxVz = (-cross(V,B))y = EMF_X + + // Notes on Implementation Details + // - The density flux has the same sign as the velocity on the face + // and we only care about the sign so we're using the density flux + // to perform upwinding checks + // - All slopes are computed without the factor of two shown in + // Stone & Gardiner 2008 eqn. 24. That factor of two is taken care + // of in the final assembly of the electric field + + // Variable to get the sign of the velocity at the interface. + Real signUpwind; + + // Slope and face variables. Format is + // "__". Slope/Face indicates if the + // value is a slope or a face centered EMF, direction indicates the + // direction of the derivative/face and pos/neg indicates if it's + // the slope on the positive or negative side of the edge field + // being computed. Note that the direction for the face is parallel + // to the face and the other direction that is parallel to that face + // is the direction of the electric field being calculated + Real slope_x_pos, slope_x_neg, + slope_y_pos, slope_y_neg, + slope_z_pos, slope_z_neg, + face_x_pos, face_x_neg, + face_y_pos, face_y_neg, + face_z_pos, face_z_neg; + // ================ + // X electric field + // ================ + + // Y-direction slope on the positive Y side. S&G 2009 equation 23 + signUpwind = fluxZ[cuda_utilities::compute1DIndex(xid, yid, zid-1, nx, ny)]; + if (signUpwind > 0.0) + { + slope_y_pos = mhd::_internal::_ctSlope(fluxY, dev_conserved, -1, 0, 2, -1, 1, 2, xid, yid, zid, nx, ny, n_cells); + } + else if (signUpwind < 0.0) + { + slope_y_pos = mhd::_internal::_ctSlope(fluxY, dev_conserved, -1, 0, -1, -1, 1, -1, xid, yid, zid, nx, ny, n_cells); + } + else + { + slope_y_pos = 0.5 * (mhd::_internal::_ctSlope(fluxY, dev_conserved, -1, 0, 2, -1, 1, 2, xid, yid, zid, nx, ny, n_cells) + + mhd::_internal::_ctSlope(fluxY, dev_conserved, -1, 0, -1, -1, 1, -1, xid, yid, zid, nx, ny, n_cells)); + } + + // Y-direction slope on the negative Y side. S&G 2009 equation 23 + signUpwind = fluxZ[cuda_utilities::compute1DIndex(xid, yid-1, zid-1, nx, ny)]; + if (signUpwind > 0.0) + { + slope_y_neg = mhd::_internal::_ctSlope(fluxY, dev_conserved, -1, 0, 1, 2, 1, 2, xid, yid, zid, nx, ny, n_cells); + } + else if (signUpwind < 0.0) + { + slope_y_neg = mhd::_internal::_ctSlope(fluxY, dev_conserved, -1, 0, 1, -1, 1, -1, xid, yid, zid, nx, ny, n_cells); + } + else + { + slope_y_neg = 0.5 * (mhd::_internal::_ctSlope(fluxY, dev_conserved, -1, 0, 1, 2, 1, 2, xid, yid, zid, nx, ny, n_cells) + + mhd::_internal::_ctSlope(fluxY, dev_conserved, -1, 0, 1, -1, 1, -1, xid, yid, zid, nx, ny, n_cells)); + } + + // Z-direction slope on the positive Z side. S&G 2009 equation 23 + signUpwind = fluxY[cuda_utilities::compute1DIndex(xid, yid-1, zid, nx, ny)]; + if (signUpwind > 0.0) + { + slope_z_pos = mhd::_internal::_ctSlope(fluxZ, dev_conserved, 1, 0, 1, -1, 1, 2, xid, yid, zid, nx, ny, n_cells); + } + else if (signUpwind < 0.0) + { + slope_z_pos = mhd::_internal::_ctSlope(fluxZ, dev_conserved, 1, 0, -1, -1, 2, -1, xid, yid, zid, nx, ny, n_cells); + } + else + { + slope_z_pos = 0.5 * (mhd::_internal::_ctSlope(fluxZ, dev_conserved, 1, 0, 1, -1, 1, 2, xid, yid, zid, nx, ny, n_cells) + + mhd::_internal::_ctSlope(fluxZ, dev_conserved, 1, 0, -1, -1, 2, -1, xid, yid, zid, nx, ny, n_cells)); + } + + // Z-direction slope on the negative Z side. S&G 2009 equation 23 + signUpwind = fluxY[cuda_utilities::compute1DIndex(xid, yid-1, zid-1, nx, ny)]; + if (signUpwind > 0.0) + { + slope_z_neg = mhd::_internal::_ctSlope(fluxZ, dev_conserved, 1, 0, 1, 2, 1, 2, xid, yid, zid, nx, ny, n_cells); + } + else if (signUpwind < 0.0) + { + slope_z_neg = mhd::_internal::_ctSlope(fluxZ, dev_conserved, 1, 0, 2, -1, -1, 2, xid, yid, zid, nx, ny, n_cells); + } + else + { + slope_z_neg = 0.5 * (mhd::_internal::_ctSlope(fluxZ, dev_conserved, 1, 0, 1, 2, 1, 2, xid, yid, zid, nx, ny, n_cells) + + mhd::_internal::_ctSlope(fluxZ, dev_conserved, 1, 0, 2, -1, -1, 2, xid, yid, zid, nx, ny, n_cells)); + } + + // Load the face centered electric fields Note the negative signs to + // convert from magnetic flux to electric field + + face_y_pos = + fluxZ[cuda_utilities::compute1DIndex(xid , yid , zid-1, nx, ny) + (grid_enum::fluxZ_magnetic_x)*n_cells]; + face_y_neg = + fluxZ[cuda_utilities::compute1DIndex(xid , yid-1, zid-1, nx, ny) + (grid_enum::fluxZ_magnetic_x)*n_cells]; + face_z_pos = - fluxY[cuda_utilities::compute1DIndex(xid , yid-1, zid , nx, ny) + (grid_enum::fluxY_magnetic_x)*n_cells]; + face_z_neg = - fluxY[cuda_utilities::compute1DIndex(xid , yid-1, zid-1, nx, ny) + (grid_enum::fluxY_magnetic_x)*n_cells]; + + // sum and average face centered electric fields and slopes to get the + // edge averaged electric field. + // S&G 2009 equation 22 + ctElectricFields[threadId + 0*n_cells] = 0.25 * (+ face_y_pos + + face_y_neg + + face_z_pos + + face_z_neg + + slope_y_pos + + slope_y_neg + + slope_z_pos + + slope_z_neg); + + // ================ + // Y electric field + // ================ + + // X-direction slope on the positive X side. S&G 2009 equation 23 + signUpwind = fluxZ[cuda_utilities::compute1DIndex(xid, yid, zid-1, nx, ny)]; + if (signUpwind > 0.0) + { + slope_x_pos = mhd::_internal::_ctSlope(fluxX, dev_conserved, 1, 1, 2, -1, 0, 2, xid, yid, zid, nx, ny, n_cells); + } + else if (signUpwind < 0.0) + { + slope_x_pos = mhd::_internal::_ctSlope(fluxX, dev_conserved, 1, 1, -1, -1, 0, -1, xid, yid, zid, nx, ny, n_cells); + } + else + { + slope_x_pos = 0.5 * (mhd::_internal::_ctSlope(fluxX, dev_conserved, 1, 1, 2, -1, 0, 2, xid, yid, zid, nx, ny, n_cells) + + mhd::_internal::_ctSlope(fluxX, dev_conserved, 1, 1, -1, -1, 0, -1, xid, yid, zid, nx, ny, n_cells)); + } + + // X-direction slope on the negative X side. S&G 2009 equation 23 + signUpwind = fluxZ[cuda_utilities::compute1DIndex(xid-1, yid, zid-1, nx, ny)]; + if (signUpwind > 0.0) + { + slope_x_neg = mhd::_internal::_ctSlope(fluxX, dev_conserved, 1, 1, 0, 2, 0, 2, xid, yid, zid, nx, ny, n_cells); + } + else if (signUpwind < 0.0) + { + slope_x_neg = mhd::_internal::_ctSlope(fluxX, dev_conserved, 1, 1, 0, -1, 0, -1, xid, yid, zid, nx, ny, n_cells); + } + else + { + slope_x_neg = 0.5 * (mhd::_internal::_ctSlope(fluxX, dev_conserved, 1, 1, 0, 2, 0, 2, xid, yid, zid, nx, ny, n_cells) + + mhd::_internal::_ctSlope(fluxX, dev_conserved, 1, 1, 0, -1, 0, -1, xid, yid, zid, nx, ny, n_cells)); + } + + // Z-direction slope on the positive Z side. S&G 2009 equation 23 + signUpwind = fluxX[cuda_utilities::compute1DIndex(xid-1, yid, zid, nx, ny)]; + if (signUpwind > 0.0) + { + slope_z_pos = mhd::_internal::_ctSlope(fluxZ, dev_conserved, -1, 1, 0, -1, 0, 2, xid, yid, zid, nx, ny, n_cells); + } + else if (signUpwind < 0.0) + { + slope_z_pos = mhd::_internal::_ctSlope(fluxZ, dev_conserved, -1, 1, -1, -1, 2, -1, xid, yid, zid, nx, ny, n_cells); + } + else + { + slope_z_pos = 0.5 * (mhd::_internal::_ctSlope(fluxZ, dev_conserved, -1, 1, 0, -1, 0, 2, xid, yid, zid, nx, ny, n_cells) + + mhd::_internal::_ctSlope(fluxZ, dev_conserved, -1, 1, -1, -1, 2, -1, xid, yid, zid, nx, ny, n_cells)); + } + + // Z-direction slope on the negative Z side. S&G 2009 equation 23 + signUpwind = fluxX[cuda_utilities::compute1DIndex(xid-1, yid, zid-1, nx, ny)]; + if (signUpwind > 0.0) + { + slope_z_neg = mhd::_internal::_ctSlope(fluxZ, dev_conserved, -1, 1, 0, 2, 0, 2, xid, yid, zid, nx, ny, n_cells); + } + else if (signUpwind < 0.0) + { + slope_z_neg = mhd::_internal::_ctSlope(fluxZ, dev_conserved, -1, 1, 2, -1, 2, -1, xid, yid, zid, nx, ny, n_cells); + } + else + { + slope_z_neg = 0.5 * (mhd::_internal::_ctSlope(fluxZ, dev_conserved, -1, 1, 0, 2, 0, 2, xid, yid, zid, nx, ny, n_cells) + + mhd::_internal::_ctSlope(fluxZ, dev_conserved, -1, 1, 2, -1, 2, -1, xid, yid, zid, nx, ny, n_cells)); + } + + // Load the face centered electric fields Note the negative signs to + // convert from magnetic flux to electric field + face_x_pos = - fluxZ[cuda_utilities::compute1DIndex(xid , yid, zid-1, nx, ny) + (grid_enum::fluxZ_magnetic_y)*n_cells]; + face_x_neg = - fluxZ[cuda_utilities::compute1DIndex(xid-1, yid, zid-1, nx, ny) + (grid_enum::fluxZ_magnetic_y)*n_cells]; + face_z_pos = + fluxX[cuda_utilities::compute1DIndex(xid-1, yid, zid , nx, ny) + (grid_enum::fluxX_magnetic_y)*n_cells]; + face_z_neg = + fluxX[cuda_utilities::compute1DIndex(xid-1, yid, zid-1, nx, ny) + (grid_enum::fluxX_magnetic_y)*n_cells]; + + // sum and average face centered electric fields and slopes to get the + // edge averaged electric field. + // S&G 2009 equation 22 + ctElectricFields[threadId + 1*n_cells] = 0.25 * (+ face_x_pos + + face_x_neg + + face_z_pos + + face_z_neg + + slope_x_pos + + slope_x_neg + + slope_z_pos + + slope_z_neg); + + // ================ + // Z electric field + // ================ + + // Y-direction slope on the positive Y side. S&G 2009 equation 23 + signUpwind = fluxX[cuda_utilities::compute1DIndex(xid-1, yid, zid, nx, ny)]; + if (signUpwind > 0.0) + { + slope_y_pos = mhd::_internal::_ctSlope(fluxY, dev_conserved, 1, 2, 0, -1, 0, 1, xid, yid, zid, nx, ny, n_cells); + } + else if (signUpwind < 0.0) + { + slope_y_pos = mhd::_internal::_ctSlope(fluxY, dev_conserved, 1, 2, -1, -1, 1, -1, xid, yid, zid, nx, ny, n_cells); + } + else + { + slope_y_pos = 0.5 * (mhd::_internal::_ctSlope(fluxY, dev_conserved, 1, 2, 0, -1, 0, 1, xid, yid, zid, nx, ny, n_cells) + + mhd::_internal::_ctSlope(fluxY, dev_conserved, 1, 2, -1, -1, 1, -1, xid, yid, zid, nx, ny, n_cells)); + } + + // Y-direction slope on the negative Y side. S&G 2009 equation 23 + signUpwind = fluxX[cuda_utilities::compute1DIndex(xid-1, yid-1, zid, nx, ny)]; + if (signUpwind > 0.0) + { + slope_y_neg = mhd::_internal::_ctSlope(fluxY, dev_conserved, 1, 2, 0, 1, 0, 1, xid, yid, zid, nx, ny, n_cells); + } + else if (signUpwind < 0.0) + { + slope_y_neg = mhd::_internal::_ctSlope(fluxY, dev_conserved, 1, 2, 1, -1, 1, -1, xid, yid, zid, nx, ny, n_cells); + } + else + { + slope_y_neg = 0.5 * (mhd::_internal::_ctSlope(fluxY, dev_conserved, 1, 2, 0, 1, 0, 1, xid, yid, zid, nx, ny, n_cells) + + mhd::_internal::_ctSlope(fluxY, dev_conserved, 1, 2, 1, -1, 1, -1, xid, yid, zid, nx, ny, n_cells)); + } + + // X-direction slope on the positive X side. S&G 2009 equation 23 + signUpwind = fluxY[cuda_utilities::compute1DIndex(xid, yid-1, zid, nx, ny)]; + if (signUpwind > 0.0) + { + slope_x_pos = mhd::_internal::_ctSlope(fluxX, dev_conserved, -1, 2, 1, -1, 0, 1, xid, yid, zid, nx, ny, n_cells); + } + else if (signUpwind < 0.0) + { + slope_x_pos = mhd::_internal::_ctSlope(fluxX, dev_conserved, -1, 2, -1, -1, 0, -1, xid, yid, zid, nx, ny, n_cells); + } + else + { + slope_x_pos = 0.5 * (mhd::_internal::_ctSlope(fluxX, dev_conserved, -1, 2, 1, -1, 0, 1, xid, yid, zid, nx, ny, n_cells) + + mhd::_internal::_ctSlope(fluxX, dev_conserved, -1, 2, -1, -1, 0, -1, xid, yid, zid, nx, ny, n_cells)); + } + + // X-direction slope on the negative X side. S&G 2009 equation 23 + signUpwind = fluxY[cuda_utilities::compute1DIndex(xid-1, yid-1, zid, nx, ny)]; + if (signUpwind > 0.0) + { + slope_x_neg = mhd::_internal::_ctSlope(fluxX, dev_conserved, -1, 2, 0, 1, 0, 1, xid, yid, zid, nx, ny, n_cells); + } + else if (signUpwind < 0.0) + { + slope_x_neg = mhd::_internal::_ctSlope(fluxX, dev_conserved, -1, 2, 0, -1, 0, -1, xid, yid, zid, nx, ny, n_cells); + } + else + { + slope_x_neg = 0.5 * (mhd::_internal::_ctSlope(fluxX, dev_conserved, -1, 2, 0, 1, 0, 1, xid, yid, zid, nx, ny, n_cells) + + mhd::_internal::_ctSlope(fluxX, dev_conserved, -1, 2, 0, -1, 0, -1, xid, yid, zid, nx, ny, n_cells)); + } + + // Load the face centered electric fields Note the negative signs to + // convert from magnetic flux to electric field + face_x_pos = + fluxY[cuda_utilities::compute1DIndex(xid , yid-1, zid, nx, ny) + (grid_enum::fluxY_magnetic_z)*n_cells]; + face_x_neg = + fluxY[cuda_utilities::compute1DIndex(xid-1, yid-1, zid, nx, ny) + (grid_enum::fluxY_magnetic_z)*n_cells]; + face_y_pos = - fluxX[cuda_utilities::compute1DIndex(xid-1, yid , zid, nx, ny) + (grid_enum::fluxX_magnetic_z)*n_cells]; + face_y_neg = - fluxX[cuda_utilities::compute1DIndex(xid-1, yid-1, zid, nx, ny) + (grid_enum::fluxX_magnetic_z)*n_cells]; + + // sum and average face centered electric fields and slopes to get the + // edge averaged electric field. + // S&G 2009 equation 22 + ctElectricFields[threadId + 2*n_cells] = 0.25 * (+ face_x_pos + + face_x_neg + + face_y_pos + + face_y_neg + + slope_x_pos + + slope_x_neg + + slope_y_pos + + slope_y_neg); + } + } + // ========================================================================= +} // end namespace mhd +#endif // MHD diff --git a/src/mhd/ct_electric_fields.h b/src/mhd/ct_electric_fields.h new file mode 100644 index 000000000..9ba8780ee --- /dev/null +++ b/src/mhd/ct_electric_fields.h @@ -0,0 +1,157 @@ +/*! + * \file ct_electric_fields.h + * \author Robert 'Bob' Caddy (rvc@pitt.edu) + * \brief Contains the declaration for the kernel that computes the CT electric + * fields. Method from Stone & Gardiner 2009 "A simple unsplit Godunov method + * for multidimensional MHD" hereafter referred to as "S&G 2009" + * + */ + +#pragma once + +// STL Includes + +// External Includes + +// Local Includes +#include "../global/global.h" +#include "../global/global_cuda.h" +#include "../utils/gpu.hpp" +#include "../utils/cuda_utilities.h" + +#ifdef MHD +namespace mhd +{ + /*! + * \brief Namespace for functions required by functions within the mhd + * namespace. Everything in this name space should be regarded as private + * but is made accesible for testing + * + */ + namespace _internal + { + // ===================================================================== + /*! + * \brief Compute and return the slope of the electric field used to + * compute the CT electric fields. This function implements S&G 2009 + * equation 24 + * + * \param[in] flux The flux array + * \param[in] dev_conserved The conserved variable array + * \param[in] fluxSign The sign of the flux to convert it to magnetic + * field. Also serves to choose which magnetic flux is used, i.e. the Y + * or Z field + * \param[in] ctDirection The direction of the CT field that this slope + will be used to calculate + * \param[in] conservedQuadrent1 Which index should be reduced by one to get the correct conserved variables. Options are -1 for no reduction, 0 for reducing xid, 1 for reducing yid, and 2 for reducing zid + * \param[in] conservedQuadrent2 Which index should be reduced by one to get the correct conserved variables. Options are -1 for no reduction, 0 for reducing xid, 1 for reducing yid, and 2 for reducing zid + * \param[in] fluxQuadrent1 Which index should be reduced by one to get the correct flux variable. Options are -1 for no reduction, 0 for reducing xid, 1 for reducing yid, and 2 for reducing zid + * \param[in] fluxQuadrent2 Which index should be reduced by one to get the correct flux variable. Options are -1 for no reduction, 0 for reducing xid, 1 for reducing yid, and 2 for reducing zid + * \param[in] xid The x index + * \param[in] yid The y index + * \param[in] zid The z index + * \param[in] nx The number of cells in the x-direction + * \param[in] ny The number of cells in the y-direction + * \param[in] n_cells The total number of cells + * \return Real The slope of the electric field + */ + inline __host__ __device__ Real _ctSlope(Real const *flux, + Real const *dev_conserved, + Real const &fluxSign, + int const &ctDirection, + int const &conservedQuadrent1, + int const &conservedQuadrent2, + int const &fluxQuadrent1, + int const &fluxQuadrent2, + int const &xid, + int const &yid, + int const &zid, + int const &nx, + int const &ny, + int const &n_cells + ) + { + // Compute the various required indices + + // Get the shifted modulos of the ctDirection. + int const modPlus1 = (ctDirection + 1) % 3; + int const modPlus2 = (ctDirection + 2) % 3; + + // Indices for the cell centered values + int const xidCentered = xid - int(conservedQuadrent1 == 0) - int(conservedQuadrent2 == 0); + int const yidCentered = yid - int(conservedQuadrent1 == 1) - int(conservedQuadrent2 == 1); + int const zidCentered = zid - int(conservedQuadrent1 == 2) - int(conservedQuadrent2 == 2); + int const idxCentered = cuda_utilities::compute1DIndex(xidCentered, yidCentered, zidCentered, nx, ny); + + // Index for the flux + int const idxFlux = cuda_utilities::compute1DIndex(xid - int(fluxQuadrent1 == 0) - int(fluxQuadrent2 == 0), + yid - int(fluxQuadrent1 == 1) - int(fluxQuadrent2 == 1), + zid - int(fluxQuadrent1 == 2) - int(fluxQuadrent2 == 2), + nx, ny); + + // Indices for the face centered magnetic fields that need to be averaged + int const idxB2Shift = cuda_utilities::compute1DIndex(xidCentered - int(modPlus1 == 0), + yidCentered - int(modPlus1 == 1), + zidCentered - int(modPlus1 == 2), + nx, ny); + int const idxB3Shift = cuda_utilities::compute1DIndex(xidCentered - int(modPlus2 == 0), + yidCentered - int(modPlus2 == 1), + zidCentered - int(modPlus2 == 2), + nx, ny); + + // Load values for cell centered electric field. B1 (not present) is + // the magnetic field in the same direction as the `ctDirection` + // variable, B2 and B3 are the next two fields cyclically. i.e. if + // B1=Bx then B2=By and B3=Bz, if B1=By then B2=Bz and B3=Bx. The + // same rules apply for the momentum + Real const density = dev_conserved[idxCentered ]; + Real const Momentum2 = dev_conserved[idxCentered + (modPlus1+1) *n_cells]; + Real const Momentum3 = dev_conserved[idxCentered + (modPlus2+1) *n_cells]; + Real const B2Centered = 0.5 * ( dev_conserved[idxCentered + (modPlus1+grid_enum::magnetic_start)*n_cells] + + dev_conserved[idxB2Shift + (modPlus1+grid_enum::magnetic_start)*n_cells]); + Real const B3Centered = 0.5 * ( dev_conserved[idxCentered + (modPlus2+grid_enum::magnetic_start)*n_cells] + + dev_conserved[idxB3Shift + (modPlus2+grid_enum::magnetic_start)*n_cells]); + + // Compute the electric field in the center with a cross product + Real const electric_centered = (Momentum3*B2Centered - Momentum2*B3Centered) / density; + + // Load face centered electric field, note fluxSign to correctly do + // the shift from magnetic flux to EMF/electric field and to choose + // which field to use + Real const electric_face = fluxSign * flux[idxFlux + (int(fluxSign == 1)+grid_enum::magnetic_start)*n_cells]; + + // Compute the slope and return it + // S&G 2009 equation 24 + return electric_face - electric_centered; + } + // ===================================================================== + }// mhd::_internal namespace + + // ========================================================================= + /*! + * \brief Compute the Constrained Transport electric fields used to evolve + * the magnetic field. Note that this function requires that the density be + * non-zero or it will return Nans. + * + * \param[in] fluxX The flux on the x+1/2 face of each cell + * \param[in] fluxY The flux on the y+1/2 face of each cell + * \param[in] fluxZ The flux on the z+1/2 face of each cell + * \param[in] dev_conserved The device resident grid + * \param[out] ctElectricFields The CT electric fields + * \param[in] nx The number of cells in the x-direction + * \param[in] ny The number of cells in the y-direction + * \param[in] nz The number of cells in the z-direction + * \param[in] n_cells The total number of cells + */ + __global__ void Calculate_CT_Electric_Fields(Real const *fluxX, + Real const *fluxY, + Real const *fluxZ, + Real const *dev_conserved, + Real *ctElectricFields, + int const nx, + int const ny, + int const nz, + int const n_cells); + // ========================================================================= +} // end namespace mhd +#endif // MHD \ No newline at end of file diff --git a/src/mhd/ct_electric_fields_tests.cu b/src/mhd/ct_electric_fields_tests.cu new file mode 100644 index 000000000..b526ab7e0 --- /dev/null +++ b/src/mhd/ct_electric_fields_tests.cu @@ -0,0 +1,209 @@ +/*! + * \file ct_electric_fields_tests.cu + * \author Robert 'Bob' Caddy (rvc@pitt.edu) + * \brief Tests for the CT electric fields + * + */ + +// STL Includes +#include +#include +#include +#include +#include + +// External Includes +#include // Include GoogleTest and related libraries/headers + +// Local Includes +#include "../utils/testing_utilities.h" +#include "../mhd/ct_electric_fields.h" +#include "../global/global.h" + +#ifdef MHD +// ============================================================================= +// Tests for the mhd::Calculate_CT_Electric_Fields kernel +// ============================================================================= + +// ============================================================================= +/*! + * \brief Test fixture for tMHDCalculateCTElectricFields test suite + * + */ +class tMHDCalculateCTElectricFields : public ::testing::Test +{ +public: + + /*! + * \brief Initialize and allocate all the various required variables and + * arrays + * + */ + tMHDCalculateCTElectricFields() + : + nx(3), + ny(nx), + nz(nx), + n_cells(nx*ny*nz), + fluxX(n_cells * (grid_enum::num_flux_fields)), + fluxY(n_cells * (grid_enum::num_flux_fields)), + fluxZ(n_cells * (grid_enum::num_flux_fields)), + grid (n_cells * (grid_enum::num_fields)), + testCTElectricFields(n_cells * 3, -999.), + fiducialData(n_cells * 3, -999.), + dimGrid((n_cells + TPB - 1),1,1), + dimBlock(TPB,1,1) + { + // Allocate device arrays + CudaSafeCall ( cudaMalloc(&dev_fluxX, fluxX.size()*sizeof(double)) ); + CudaSafeCall ( cudaMalloc(&dev_fluxY, fluxY.size()*sizeof(double)) ); + CudaSafeCall ( cudaMalloc(&dev_fluxZ, fluxZ.size()*sizeof(double)) ); + CudaSafeCall ( cudaMalloc(&dev_grid, grid.size()*sizeof(double)) ); + CudaSafeCall ( cudaMalloc(&dev_testCTElectricFields, testCTElectricFields.size()*sizeof(double)) ); + + // Populate the grids with values where vector.at(i) = double(i). The + // values chosen aren't that important, just that every cell has a unique + // value + std::iota(std::begin(fluxX), std::end(fluxX), 0.); + std::iota(std::begin(fluxY), std::end(fluxY), fluxX.back() + 1); + std::iota(std::begin(fluxZ), std::end(fluxZ), fluxY.back() + 1); + std::iota(std::begin(grid), std::end(grid), fluxZ.back() + 1); + } + ~tMHDCalculateCTElectricFields() = default; +protected: + // Initialize the test grid and other state variables + size_t const nx, ny, nz; + size_t const n_cells; + + // Launch Parameters + dim3 const dimGrid; // How many blocks in the grid + dim3 const dimBlock; // How many threads per block + + // Make sure the vector is large enough that the locations where the + // magnetic field would be in the real grid are filled + std::vector fluxX; + std::vector fluxY; + std::vector fluxZ; + std::vector grid; + std::vector testCTElectricFields; + std::vector fiducialData; + + // device pointers + double *dev_fluxX, *dev_fluxY, *dev_fluxZ, *dev_grid, *dev_testCTElectricFields; + + /*! + * \brief Launch the kernel and check results + * + */ + void runTest() + { + // Copy values to GPU + CudaSafeCall( cudaMemcpy(dev_fluxX, fluxX.data(), fluxX.size()*sizeof(Real), cudaMemcpyHostToDevice) ); + CudaSafeCall( cudaMemcpy(dev_fluxY, fluxY.data(), fluxY.size()*sizeof(Real), cudaMemcpyHostToDevice) ); + CudaSafeCall( cudaMemcpy(dev_fluxZ, fluxZ.data(), fluxZ.size()*sizeof(Real), cudaMemcpyHostToDevice) ); + CudaSafeCall( cudaMemcpy(dev_grid, grid.data(), grid.size()*sizeof(Real), cudaMemcpyHostToDevice) ); + CudaSafeCall( cudaMemcpy(dev_testCTElectricFields, + testCTElectricFields.data(), + testCTElectricFields.size()*sizeof(Real), + cudaMemcpyHostToDevice) ); + + // Call the kernel to test + hipLaunchKernelGGL(mhd::Calculate_CT_Electric_Fields, + dimGrid, + dimBlock, + 0, + 0, + dev_fluxX, + dev_fluxY, + dev_fluxZ, + dev_grid, + dev_testCTElectricFields, + nx, + ny, + nz, + n_cells); + CudaCheckError(); + + // Copy test data back + CudaSafeCall( cudaMemcpy(testCTElectricFields.data(), + dev_testCTElectricFields, + testCTElectricFields.size()*sizeof(Real), + cudaMemcpyDeviceToHost) ); + cudaDeviceSynchronize(); + + // Check the results + for (size_t i = 0; i < fiducialData.size(); i++) + { + int xid, yid, zid; + cuda_utilities::compute3DIndices(i, nx, ny, xid, yid, zid); + testingUtilities::checkResults(fiducialData.at(i), + testCTElectricFields.at(i), + "value at i = " + std::to_string(i) + + ", xid = " + std::to_string(xid) + + ", yid = " + std::to_string(yid) + + ", zid = " + std::to_string(zid)); + } + } +}; +// ============================================================================= + +// ============================================================================= +TEST_F(tMHDCalculateCTElectricFields, + PositiveVelocityExpectCorrectOutput) +{ + // Fiducial values + fiducialData.at(26) = 206.29859653255295; + fiducialData.at(53) = -334.90052254763339; + fiducialData.at(80) = 209.53472440298236; + + // Launch kernel and check results + runTest(); +} +// ============================================================================= + +// ============================================================================= +TEST_F(tMHDCalculateCTElectricFields, + NegativeVelocityExpectCorrectOutput) +{ + // Fiducial values + fiducialData.at(26) = 203.35149422304994; + fiducialData.at(53) = -330.9860399765279; + fiducialData.at(80) = 208.55149905461991; + + // Set the density fluxes to be negative to indicate a negative velocity + // across the face + for (size_t i = 0; i < n_cells; i++) + { + fluxX.at(i) = -fluxX.at(i); + fluxY.at(i) = -fluxY.at(i); + fluxZ.at(i) = -fluxZ.at(i); + } + + // Launch kernel and check results + runTest(); +} +// ============================================================================= + +// ============================================================================= +TEST_F(tMHDCalculateCTElectricFields, + ZeroVelocityExpectCorrectOutput) +{ + // Fiducial values + fiducialData.at(26) = 204.82504537780144; + fiducialData.at(53) = -332.94328126208063; + fiducialData.at(80) = 209.04311172880114; + + // Set the density fluxes to be negative to indicate a negative velocity + // across the face + for (size_t i = 0; i < n_cells; i++) + { + fluxX.at(i) = 0.0; + fluxY.at(i) = 0.0; + fluxZ.at(i) = 0.0; + } + + // Launch kernel and check results + runTest(); +} +// ============================================================================= +#endif // MHD diff --git a/src/mhd/magnetic_divergence.cu b/src/mhd/magnetic_divergence.cu new file mode 100644 index 000000000..fc84cbb3f --- /dev/null +++ b/src/mhd/magnetic_divergence.cu @@ -0,0 +1,138 @@ +/*! + * \file mhd_utilities.cpp + * \author Robert 'Bob' Caddy (rvc@pitt.edu) + * \brief Contains the implementation of various utility functions for MHD and + * for the various kernels, functions, and tools required for the 3D VL+CT MHD + * integrator. Due to the CUDA/HIP compiler requiring that device functions be + * directly accessible to the file they're used in most device functions will be + * implemented in the header file. Uses the same method described in Stone et + * al. 2008 "ATHENA: A new code for astrophysical MHD", hereafter referred to as + * Stone et al. 2008 + * + */ + +// STL Includes +#include +#include + +// External Includes + +// Local Includes +#include "../grid/grid3D.h" +#include "../io/io.h" +#include "../mhd/magnetic_divergence.h" +#include "../utils/cuda_utilities.h" +#include "../utils/reduction_utilities.h" +#include "../utils/DeviceVector.h" +#include "../utils/error_handling.h" +#ifdef MHD + +namespace mhd +{ + // ========================================================================= + __global__ void calculateMagneticDivergence(Real const *dev_conserved, + Real *dev_maxDivergence, + Real const dx, + Real const dy, + Real const dz, + int const nx, + int const ny, + int const nz, + int const n_cells) + { + // Variables to store the divergence + Real cellDivergence; + Real maxDivergence = 0.0; + + // Index variables + int xid, yid, zid, id_xMin1, id_yMin1, id_zMin1; + + // Grid stride loop to perform as much of the reduction as possible + for(size_t id = threadIdx.x + blockIdx.x * blockDim.x; id < n_cells; id += blockDim.x * gridDim.x) + { + // compute the real indices + cuda_utilities::compute3DIndices(id, nx, ny, xid, yid, zid); + + // Thread guard to avoid overrun and to skip ghost cells that cannot + // have their divergences computed due to a missing face; + if ( xid > 1 and yid > 1 and zid > 1 + and xid < nx and yid < ny and zid < nz) + { + // Compute the various offset indices + id_xMin1 = cuda_utilities::compute1DIndex(xid-1, yid , zid , nx, ny); + id_yMin1 = cuda_utilities::compute1DIndex(xid , yid-1, zid , nx, ny); + id_zMin1 = cuda_utilities::compute1DIndex(xid , yid , zid-1, nx, ny); + + // Compute divergence + // Stone et al. 2008 equation 25 + cellDivergence = + (( dev_conserved[id + (grid_enum::magnetic_x)*n_cells] + - dev_conserved[id_xMin1 + (grid_enum::magnetic_x)*n_cells]) + / dx) + + (( dev_conserved[id + (grid_enum::magnetic_y)*n_cells] + - dev_conserved[id_yMin1 + (grid_enum::magnetic_y)*n_cells]) + / dy) + + (( dev_conserved[id + (grid_enum::magnetic_z)*n_cells] + - dev_conserved[id_zMin1 + (grid_enum::magnetic_z)*n_cells]) + / dz); + + maxDivergence = max(maxDivergence, fabs(cellDivergence)); + } + } + + // Perform reduction across the entire grid + reduction_utilities::gridReduceMax(maxDivergence, dev_maxDivergence); + } + // ========================================================================= + +// ============================================================================= +Real checkMagneticDivergence(Grid3D const &G) +{ + // Compute the local value of the divergence + // First let's create some variables we'll need. + cuda_utilities::AutomaticLaunchParams static const launchParams(mhd::calculateMagneticDivergence); + cuda_utilities::DeviceVector static dev_maxDivergence(1); + + // Set the device side divergence to the smallest possible double so that + // the reduction isn't using the maximum value of the previous iteration + dev_maxDivergence.assign(std::numeric_limits::lowest()); + + // Now lets get the local maximum divergence + hipLaunchKernelGGL(mhd::calculateMagneticDivergence, + launchParams.numBlocks, launchParams.threadsPerBlock, 0, 0, + G.C.device, dev_maxDivergence.data(), + G.H.dx, G.H.dy, G.H.dz, + G.H.nx, G.H.ny, G.H.nz, + G.H.n_cells); + CudaCheckError(); + Real max_magnetic_divergence = dev_maxDivergence[0]; + + #ifdef MPI_CHOLLA + // Now that we have the local maximum let's get the global maximum + max_magnetic_divergence = ReduceRealMax(max_magnetic_divergence); + #endif //MPI_CHOLLA + + // If the magnetic divergence is greater than the limit then raise a warning and exit + Real static const magnetic_divergence_limit = 1.0E-14; + if (max_magnetic_divergence > magnetic_divergence_limit) + { + // Report the error and exit + chprintf("The magnetic divergence has exceeded the maximum allowed value. Divergence = %7.4e, the maximum allowed divergence = %7.4e\n", max_magnetic_divergence, magnetic_divergence_limit); + chexit(-1); + } + else if (max_magnetic_divergence < 0.0) + { + // Report the error and exit + chprintf("The magnetic divergence is negative. Divergence = %7.4e\n", max_magnetic_divergence); + chexit(-1); + } + else // The magnetic divergence is within acceptable bounds + { + chprintf("Global maximum magnetic divergence = %7.4e\n", max_magnetic_divergence); + } + + return max_magnetic_divergence; +} +// ============================================================================= +} // end namespace mhd +#endif // MHD diff --git a/src/mhd/magnetic_divergence.h b/src/mhd/magnetic_divergence.h new file mode 100644 index 000000000..a32c7ec74 --- /dev/null +++ b/src/mhd/magnetic_divergence.h @@ -0,0 +1,70 @@ +/*! + * \file magnetic_divergence.h + * \author Robert 'Bob' Caddy (rvc@pitt.edu) + * \brief Contains the declaration for the functions that compute the magnetic + * divergence + * + */ + +#pragma once + +// STL Includes + +// External Includes + +// Local Includes +#include "../global/global.h" +#include "../grid/grid3D.h" +#include "../global/global_cuda.h" +#include "../utils/gpu.hpp" + + +/*! + * \brief Namespace for MHD code + * + */ +namespace mhd +{ + // ========================================================================= + /*! + * \brief Kernel to compute the maximum divergence of the magnetic field in + * the grid. Uses `reduction_utilities::gridReduceMax` and as such should be + * called with the minimum number of blocks. Recommend using the occupancy + * API + * + * \param[in] dev_conserved The device array of conserved variables + * \param[out] maxDivergence The device scalar to store the reduced divergence at + * \param[in] dx Cell size in the X-direction + * \param[in] dy Cell size in the Y-direction + * \param[in] dz Cell size in the Z-direction + * \param[in] nx Number of cells in the X-direction + * \param[in] ny Number of cells in the Y-direction + * \param[in] nz Number of cells in the Z-direction + * \param[in] n_cells Total number of cells + */ + __global__ void calculateMagneticDivergence(Real const *dev_conserved, + Real *maxDivergence, + Real const dx, + Real const dy, + Real const dz, + int const nx, + int const ny, + int const nz, + int const n_cells); + // ========================================================================= + + // ========================================================================= + /*! + * \brief Compute the maximum magnetic divergence in the grid and report + * an error if it exceeds the magnetic divergence limit or is negative. The + * magnetic divergence limit is 1E-14 as determined by Athena as a + * reasonable upper bound for correctness. + * + * \param G The grid object + * \return Real The maximum magnetic divergence found in the grid. Can + * usually be ignored since all checking is done in the fucntion, mostly + * this return is for testing. + */ + Real checkMagneticDivergence(Grid3D const &G); + // ========================================================================= +} // end namespace mhd \ No newline at end of file diff --git a/src/mhd/magnetic_divergence_tests.cu b/src/mhd/magnetic_divergence_tests.cu new file mode 100644 index 000000000..509d9af30 --- /dev/null +++ b/src/mhd/magnetic_divergence_tests.cu @@ -0,0 +1,71 @@ +/*! + * \file magnetic_divergence_tests.cu + * \author Robert 'Bob' Caddy (rvc@pitt.edu) + * \brief Tests for the magnetic divergence code + * + */ + + +// STL Includes +#include +#include +#include +#include + +// External Includes +#include // Include GoogleTest and related libraries/headers + +// Local Includes +#include "../utils/testing_utilities.h" +#include "../mhd/magnetic_divergence.h" +#include "../utils/DeviceVector.h" +#include "../global/global.h" + +#ifdef MHD +// ============================================================================= +// Tests for the magnetic field divergence functions +// ============================================================================= +TEST(tMHDGrid3DcheckMagneticDivergence, CorrectInputExpectCorrectOutput) +{ + // Grid Parameters & testing parameters + size_t const gridSize = 96; // Needs to be at least 64 so that each thread has a value + size_t const n_ghost = 4; + + // Instantiate Grid3D object + Grid3D G; + G.H.dx = 3; + G.H.dy = G.H.dx; + G.H.dz = G.H.dx; + G.H.nx = gridSize+2*n_ghost; + G.H.ny = G.H.nx; + G.H.nz = G.H.nx; + G.H.n_cells = G.H.nx * G.H.ny * G.H.nz; + G.H.n_fields = 8; + + // Setup host grid. Fill host grid with random values and randomly assign + // maximum value + std::vector host_grid(G.H.n_cells * G.H.n_fields); + std::mt19937 prng(1); + std::uniform_real_distribution doubleRand(1, 5); + for (size_t i = 0; i < host_grid.size(); i++) + { + host_grid.at(i) = doubleRand(prng) / 1E15; + } + + // Allocating and copying to device + cuda_utilities::DeviceVector dev_grid(host_grid.size()); + G.C.device = dev_grid.data(); + dev_grid.cpyHostToDevice(host_grid); + + // Perform test + InitializeChollaMPI(NULL, NULL); + double max_magnetic_divergence = mhd::checkMagneticDivergence(G); + MPI_Finalize(); + // Perform Comparison + Real const fiducialDivergence = 3.6318132783263106 / 1E15; + testingUtilities::checkResults(fiducialDivergence, max_magnetic_divergence, "maximum divergence"); +} +// ============================================================================= +// End of tests for the magnetic field divergence functions +// ============================================================================= +#endif // MHD diff --git a/src/mhd/magnetic_update.cu b/src/mhd/magnetic_update.cu new file mode 100644 index 000000000..541fb83ba --- /dev/null +++ b/src/mhd/magnetic_update.cu @@ -0,0 +1,86 @@ +/*! + * \file magnetic_update.cu + * \author Robert 'Bob' Caddy (rvc@pitt.edu) + * \brief Contains the definition of the kernel to update the magnetic field. + * Method from Stone & Gardiner 2009 "A simple unsplit Godunov method for + * multidimensional MHD" hereafter referred to as "S&G 2009" + * + */ + +// STL Includes + +// External Includes + +// Local Includes +#include "../mhd/magnetic_update.h" +#include "../utils/cuda_utilities.h" +#ifdef MHD +namespace mhd +{ + // ========================================================================= + __global__ void Update_Magnetic_Field_3D(Real *sourceGrid, + Real *destinationGrid, + Real *ctElectricFields, + int const nx, + int const ny, + int const nz, + int const n_cells, + Real const dt, + Real const dx, + Real const dy, + Real const dz) + { + // get a thread index + int const blockId = blockIdx.x + blockIdx.y*gridDim.x; + int const threadId = threadIdx.x + blockId * blockDim.x; + int xid, yid, zid; + cuda_utilities::compute3DIndices(threadId, nx, ny, xid, yid, zid); + + // Thread guard to avoid overrun and to skip ghost cells that cannot be + // evolved due to missing electric fields that can't be reconstructed + if ( xid < nx-2 + and yid < ny-2 + and zid < nz-2) + { + // Compute the three dt/dx quantities + Real const dtodx = dt/dx; + Real const dtody = dt/dy; + Real const dtodz = dt/dz; + + // Load the various edge electric fields required. The '1' and '2' + // fields are not shared and the '3' fields are shared by two of the + // updates + Real electric_x_1 = ctElectricFields[(cuda_utilities::compute1DIndex(xid , yid+1, zid , nx, ny))]; + Real electric_x_2 = ctElectricFields[(cuda_utilities::compute1DIndex(xid , yid , zid+1, nx, ny))]; + Real electric_x_3 = ctElectricFields[(cuda_utilities::compute1DIndex(xid , yid+1, zid+1, nx, ny))]; + Real electric_y_1 = ctElectricFields[(cuda_utilities::compute1DIndex(xid+1, yid , zid , nx, ny)) + n_cells]; + Real electric_y_2 = ctElectricFields[(cuda_utilities::compute1DIndex(xid , yid , zid+1, nx, ny)) + n_cells]; + Real electric_y_3 = ctElectricFields[(cuda_utilities::compute1DIndex(xid+1, yid , zid+1, nx, ny)) + n_cells]; + Real electric_z_1 = ctElectricFields[(cuda_utilities::compute1DIndex(xid+1, yid , zid , nx, ny)) + 2 * n_cells]; + Real electric_z_2 = ctElectricFields[(cuda_utilities::compute1DIndex(xid , yid+1, zid , nx, ny)) + 2 * n_cells]; + Real electric_z_3 = ctElectricFields[(cuda_utilities::compute1DIndex(xid+1, yid+1, zid , nx, ny)) + 2 * n_cells]; + + // Perform Updates + + // X field update + // S&G 2009 equation 10 + destinationGrid[threadId + (grid_enum::magnetic_x)*n_cells] = sourceGrid[threadId + (grid_enum::magnetic_x)*n_cells] + + dtodz * (electric_y_3 - electric_y_1) + + dtody * (electric_z_1 - electric_z_3); + + // Y field update + // S&G 2009 equation 11 + destinationGrid[threadId + (grid_enum::magnetic_y)*n_cells] = sourceGrid[threadId + (grid_enum::magnetic_y)*n_cells] + + dtodx * (electric_z_3 - electric_z_2) + + dtodz * (electric_x_1 - electric_x_3); + + // Z field update + // S&G 2009 equation 12 + destinationGrid[threadId + (grid_enum::magnetic_z)*n_cells] = sourceGrid[threadId + (grid_enum::magnetic_z)*n_cells] + + dtody * (electric_x_3 - electric_x_2) + + dtodx * (electric_y_2 - electric_y_3); + } + } + // ========================================================================= +} // end namespace mhd +#endif // MHD diff --git a/src/mhd/magnetic_update.h b/src/mhd/magnetic_update.h new file mode 100644 index 000000000..4b71689b6 --- /dev/null +++ b/src/mhd/magnetic_update.h @@ -0,0 +1,59 @@ +/*! + * \file magnetic_update.h + * \author Robert 'Bob' Caddy (rvc@pitt.edu) + * \brief Contains the declaration of the kernel to update the magnetic field. + * Method from Stone & Gardiner 2009 "A simple unsplit Godunov method for + * multidimensional MHD" hereafter referred to as "S&G 2009" + * + */ + +#pragma once + +// STL Includes + +// External Includes + +// Local Includes +#include "../global/global.h" +#include "../global/global_cuda.h" +#include "../utils/gpu.hpp" + +/*! + * \brief Namespace for MHD code + * + */ +namespace mhd +{ + // ========================================================================= + /*! + * \brief Update the magnetic field using the CT electric fields + * + * \param[in] sourceGrid The array which holds the old values of the + * magnetic field + * \param[out] destinationGrid The array to hold the updated values of the + * magnetic field + * \param[in] ctElectricFields The array of constrained transport electric + * fields + * \param[in] nx The number of cells in the x-direction + * \param[in] ny The number of cells in the y-direction + * \param[in] nz The number of cells in the z-direction + * \param[in] n_cells The total number of cells + * \param[in] dt The time step. If doing the half time step update make sure + * to divide it by two when passing the time step to this kernel + * \param[in] dx The size of each cell in the x-direction + * \param[in] dy The size of each cell in the y-direction + * \param[in] dz The size of each cell in the z-direction + */ + __global__ void Update_Magnetic_Field_3D(Real *sourceGrid, + Real *destinationGrid, + Real *ctElectricFields, + int const nx, + int const ny, + int const nz, + int const n_cells, + Real const dt, + Real const dx, + Real const dy, + Real const dz); + // ========================================================================= +} // end namespace mhd \ No newline at end of file diff --git a/src/mhd/magnetic_update_tests.cu b/src/mhd/magnetic_update_tests.cu new file mode 100644 index 000000000..f4d0d44a0 --- /dev/null +++ b/src/mhd/magnetic_update_tests.cu @@ -0,0 +1,152 @@ +/*! + * \file magnetic_update_tests.cu + * \author Robert 'Bob' Caddy (rvc@pitt.edu) + * \brief Tests for the magnetic update code + * + */ + +// STL Includes +#include +#include +#include +#include + +// External Includes +#include // Include GoogleTest and related libraries/headers + +// Local Includes +#include "../utils/testing_utilities.h" +#include "../utils/cuda_utilities.h" +#include "../mhd/magnetic_update.h" + +#ifdef MHD +// ============================================================================= +/*! + * \brief Test fixture for tMHDUpdateMagneticField3D test suite + * + */ +class tMHDUpdateMagneticField3D : public ::testing::Test +{ +public: + + /*! + * \brief Initialize and allocate all the various required variables and + * arrays + * + */ + tMHDUpdateMagneticField3D() + : + nx(3), + ny(nx), + nz(nx), + n_cells(nx*ny*nz), + dt(3.2), + dx(2.5), + dy(2.5), + dz(2.5), + sourceGrid (n_cells * (grid_enum::num_fields)), + destinationGrid (n_cells * (grid_enum::num_fields), -999.), + ctElectricFields(n_cells * 3), + fiducialData (n_cells * (grid_enum::num_fields), -999.), + dimGrid((n_cells + TPB - 1),1,1), + dimBlock(TPB,1,1) + { + // Allocate device arrays + CudaSafeCall ( cudaMalloc(&dev_sourceGrid, sourceGrid.size()*sizeof(double)) ); + CudaSafeCall ( cudaMalloc(&dev_destinationGrid, destinationGrid.size()*sizeof(double)) ); + CudaSafeCall ( cudaMalloc(&dev_ctElectricFields, ctElectricFields.size()*sizeof(double)) ); + + // Populate the grids with values where vector.at(i) = double(i). The + // values chosen aren't that important, just that every cell has a unique + // value + std::iota(std::begin(sourceGrid), std::end(sourceGrid), 0.); + std::iota(std::begin(ctElectricFields), std::end(ctElectricFields), sourceGrid.back() + 1); + } + ~tMHDUpdateMagneticField3D() = default; +protected: + // Initialize the test grid and other state variables + size_t const nx, ny, nz; + size_t const n_cells; + Real const dt, dx, dy, dz; + + // Launch Parameters + dim3 const dimGrid; // How many blocks in the grid + dim3 const dimBlock; // How many threads per block + + // Make sure the vector is large enough that the locations where the + // magnetic field would be in the real grid are filled + std::vector sourceGrid; + std::vector destinationGrid; + std::vector ctElectricFields; + std::vector fiducialData; + + // device pointers + double *dev_sourceGrid, *dev_destinationGrid, *dev_ctElectricFields, *dev_fiducialData; + + /*! + * \brief Launch the kernel and check results + * + */ + void runTest() + { + // Copy values to GPU + CudaSafeCall( cudaMemcpy(dev_sourceGrid, sourceGrid.data(), sourceGrid.size()*sizeof(Real), cudaMemcpyHostToDevice) ); + CudaSafeCall( cudaMemcpy(dev_destinationGrid, destinationGrid.data(), destinationGrid.size()*sizeof(Real), cudaMemcpyHostToDevice) ); + CudaSafeCall( cudaMemcpy(dev_ctElectricFields, ctElectricFields.data(), ctElectricFields.size()*sizeof(Real), cudaMemcpyHostToDevice) ); + + // Call the kernel to test + hipLaunchKernelGGL(mhd::Update_Magnetic_Field_3D, + dimGrid, + dimBlock, + 0, + 0, + dev_sourceGrid, + dev_destinationGrid, + dev_ctElectricFields, + nx, + ny, + nz, + n_cells, + dt, + dx, + dy, + dz); + CudaCheckError(); + + // Copy test data back + CudaSafeCall( cudaMemcpy(destinationGrid.data(), + dev_destinationGrid, + destinationGrid.size()*sizeof(Real), + cudaMemcpyDeviceToHost) ); + cudaDeviceSynchronize(); + + // Check the results + for (size_t i = 0; i < fiducialData.size(); i++) + { + int xid, yid, zid; + cuda_utilities::compute3DIndices(i, nx, ny, xid, yid, zid); + testingUtilities::checkResults(fiducialData.at(i), + destinationGrid.at(i), + "value at i = " + std::to_string(i) + + ", xid = " + std::to_string(xid) + + ", yid = " + std::to_string(yid) + + ", zid = " + std::to_string(zid)); + } + } +}; +// ============================================================================= + +// ============================================================================= +TEST_F(tMHDUpdateMagneticField3D, + CorrectInputExpectCorrectOutput) +{ + // Fiducial values + fiducialData.at(135) = 142.68000000000001; + fiducialData.at(162) = 151.75999999999999; + fiducialData.at(189) = 191.56; + + // Launch kernel and check results + runTest(); +} +// ============================================================================= +#endif // MHD diff --git a/src/reconstruction/pcm_cuda.cu b/src/reconstruction/pcm_cuda.cu index 1964ddedf..e6d48999a 100644 --- a/src/reconstruction/pcm_cuda.cu +++ b/src/reconstruction/pcm_cuda.cu @@ -7,7 +7,8 @@ #include "../global/global.h" #include "../global/global_cuda.h" #include "../reconstruction/pcm_cuda.h" - +#include "../utils/mhd_utilities.h" +#include "../utils/cuda_utilities.h" __global__ void PCM_Reconstruction_1D(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int n_cells, int n_ghost, Real gamma, int n_fields) { @@ -18,11 +19,11 @@ __global__ void PCM_Reconstruction_1D(Real *dev_conserved, Real *dev_bounds_L, R #ifdef DE Real ge; - #endif + #endif //DE #ifdef SCALAR Real scalar[NSCALARS]; - #endif + #endif //SCALAR // get a global thread ID int xid = threadIdx.x + blockIdx.x*blockDim.x; @@ -43,10 +44,10 @@ __global__ void PCM_Reconstruction_1D(Real *dev_conserved, Real *dev_bounds_L, R for (int i=0; i 0) + { + id = cuda_utilities::compute1DIndex(xid-1, yid, zid, nx, ny); + dev_bounds_Rx[ id] = d; + dev_bounds_Rx[ n_cells + id] = mx; + dev_bounds_Rx[2*n_cells + id] = my; + dev_bounds_Rx[3*n_cells + id] = mz; + dev_bounds_Rx[4*n_cells + id] = E; + #ifdef SCALAR + for (int i=0; i 0) + { + // Send the y-1/2 Right interface + id = cuda_utilities::compute1DIndex(xid, yid-1, zid, nx, ny); + dev_bounds_Ry[ id] = d; + dev_bounds_Ry[ n_cells + id] = mx; + dev_bounds_Ry[2*n_cells + id] = my; + dev_bounds_Ry[3*n_cells + id] = mz; + dev_bounds_Ry[4*n_cells + id] = E; + #ifdef SCALAR + for (int i=0; i 0) + { + // Send the z-1/2 Right interface + id = cuda_utilities::compute1DIndex(xid, yid, zid-1, nx, ny); + dev_bounds_Rz[ id] = d; + dev_bounds_Rz[ n_cells + id] = mx; + dev_bounds_Rz[2*n_cells + id] = my; + dev_bounds_Rz[3*n_cells + id] = mz; + dev_bounds_Rz[4*n_cells + id] = E; + #ifdef SCALAR + for (int i=0; i 0.0) { del_ge_G = 2.0*del_ge_L*del_ge_R / (del_ge_L+del_ge_R); } else { del_ge_G = 0.0; } - #endif + #endif //DE #ifdef SCALAR for (int i=0; i 0.0) { del_scalar_G[i] = 2.0*del_scalar_L[i]*del_scalar_R[i] / (del_scalar_L[i]+del_scalar_R[i]); } else { del_scalar_G[i] = 0.0; } } - #endif + #endif //SCALAR // Project the left, right, centered and van Leer differences onto the characteristic variables @@ -316,7 +316,7 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou lim_slope_b = fmin(fabs(del_ge_C), fabs(del_ge_G)); del_ge_m_i = sgn_CUDA(del_ge_C) * fmin(2.0*lim_slope_a, lim_slope_b); } - #endif + #endif //DE #ifdef SCALAR for (int i=0; i= 0) { lamdiff = lambda_p - lambda_m; @@ -498,12 +498,12 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou sum_3 += lamdiff * del_vz_m_i; #ifdef DE sum_ge += lamdiff * del_ge_m_i; - #endif + #endif //DE #ifdef SCALAR for (int i=0; i= 0) { @@ -522,24 +522,24 @@ __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou p_L_iph += 0.5*dtodx*sum_4; #ifdef DE ge_L_iph += 0.5*dtodx*sum_ge; - #endif + #endif //DE #ifdef SCALAR for (int i=0; i 0.0) { del_ge_G = 2.0*del_ge_L*del_ge_R / (del_ge_L+del_ge_R); } else { del_ge_G = 0.0; } - #endif + #endif //DE #ifdef SCALAR for (int i=0; i 0.0) { del_scalar_G[i] = 2.0*del_scalar_L[i]*del_scalar_R[i] / (del_scalar_L[i]+del_scalar_R[i]); } else { del_scalar_G[i] = 0.0; } } - #endif + #endif //SCALAR // Step 3 - Project the left, right, centered and van Leer differences onto the characteristic variables @@ -372,7 +372,7 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou del_ge_m_imo = sgn_CUDA(del_ge_C) * fmin((Real) 2.0*lim_slope_a, lim_slope_b); } else del_ge_m_imo = 0.0; - #endif + #endif //DE #ifdef SCALAR for (int i=0; i 0.0) { @@ -382,7 +382,7 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou } else del_scalar_m_imo[i] = 0.0; } - #endif + #endif //SCALAR // Step 5 - Project the monotonized difference in the characteristic variables back onto the @@ -442,7 +442,7 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou del_ge_C = 0.5*(ge_ipo - ge_imo); if (del_ge_L*del_ge_R > 0.0) { del_ge_G = 2.0*del_ge_L*del_ge_R / (del_ge_L+del_ge_R); } else { del_ge_G = 0.0; } - #endif + #endif //DE #ifdef SCALAR for (int i=0; i 0.0) { del_scalar_G[i] = 2.0*del_scalar_L[i]*del_scalar_R[i] / (del_scalar_L[i]+del_scalar_R[i]); } else { del_scalar_G[i] = 0.0; } } - #endif + #endif //SCALAR // Step 3 - Project the left, right, centered, and van Leer differences onto the characteristic variables // Stone Eqn 37 (del_a are differences in characteristic variables, see Stone for notation) @@ -520,7 +520,7 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou del_ge_m_i = sgn_CUDA(del_ge_C) * fmin((Real) 2.0*lim_slope_a, lim_slope_b); } else del_ge_m_i = 0.0; - #endif + #endif //DE #ifdef SCALAR for (int i=0; i 0.0) { @@ -530,7 +530,7 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou } else del_scalar_m_i[i] = 0.0; } - #endif + #endif //SCALAR // Step 5 - Project the monotonized difference in the characteristic variables back onto the @@ -591,7 +591,7 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou del_ge_C = 0.5*(ge_ipt- ge_i); if (del_ge_L*del_ge_R > 0.0) { del_ge_G = 2.0*del_ge_L*del_ge_R / (del_ge_L+del_ge_R); } else { del_ge_G = 0.0; } - #endif + #endif //DE #ifdef SCALAR for (int i=0; i 0.0) { del_scalar_G[i] = 2.0*del_scalar_L[i]*del_scalar_R[i] / (del_scalar_L[i]+del_scalar_R[i]); } else { del_scalar_G[i] = 0.0; } } - #endif + #endif //SCALAR // Step 3 - Project the left, right, centered, and van Leer differences onto the characteristic variables @@ -670,7 +670,7 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou del_ge_m_ipo = sgn_CUDA(del_ge_C) * fmin((Real) 2.0*lim_slope_a, lim_slope_b); } else del_ge_m_ipo = 0.0; - #endif + #endif //DE #ifdef SCALAR for (int i=0; i 0.0) { @@ -680,7 +680,7 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou } else del_scalar_m_ipo[i] = 0.0; } - #endif + #endif //SCALAR // Step 5 - Project the monotonized difference in the characteristic variables back onto the @@ -713,13 +713,13 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou #ifdef DE ge_L = 0.5*(ge_i + ge_imo) - (del_ge_m_i - del_ge_m_imo) / 6.0; ge_R = 0.5*(ge_ipo + ge_i) - (del_ge_m_ipo - del_ge_m_i) / 6.0; - #endif + #endif //DE #ifdef SCALAR for (int i=0; i= 0) { @@ -909,24 +909,24 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou chi_5 = A*(del_p_m_i - p_6) + B*p_6; #ifdef DE chi_ge = A*(del_ge_m_i - ge_6) + B*ge_6; - #endif + #endif //DE #ifdef SCALAR for (int i=0; i= 0) { @@ -952,12 +952,12 @@ __global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou p_R += sum_5; #ifdef DE ge_R += sum_ge; - #endif + #endif //DE #ifdef SCALAR for (int i=0; i= 0.0) { - _hlldInternal::_returnFluxes(threadId, o1, o2, o3, n_cells, + mhd::_internal::_returnFluxes(threadId, o1, o2, o3, n_cells, dev_flux, densityFluxL, momentumFluxXL, momentumFluxYL, momentumFluxZL, @@ -263,13 +266,13 @@ // Right state Real densityFluxR, momentumFluxXR, momentumFluxYR, momentumFluxZR, magneticFluxYR, magneticFluxZR, energyFluxR; - _hlldInternal::_nonStarFluxes(momentumXR, + mhd::_internal::_nonStarFluxes(momentumXR, velocityXR, velocityYR, velocityZR, totalPressureR, energyR, - magneticXR, + magneticX, magneticYR, magneticZR, densityFluxR, @@ -282,9 +285,10 @@ // If we're in the R state then assign fluxes and return. // In this state the flow is supersonic + // M&K 2005 equation 66 if (speedR <= 0.0) { - _hlldInternal::_returnFluxes(threadId, o1, o2, o3, n_cells, + mhd::_internal::_returnFluxes(threadId, o1, o2, o3, n_cells, dev_flux, densityFluxR, momentumFluxXR, momentumFluxYR, momentumFluxZR, @@ -307,6 +311,8 @@ // ================================================================= // Shared quantity // note that velocityStarX = speedM + // M&K 2005 equation 23, might need to switch to eqn. 41 in the + // future though they should produce identical results Real totalPressureStar = totalPressureL + densityL * (speedL - velocityXL) * (speedM - velocityXL); @@ -317,7 +323,7 @@ densityStarFluxL, momentumStarFluxXL, momentumStarFluxYL, momentumStarFluxZL, magneticStarFluxYL, magneticStarFluxZL, energyStarFluxL; - _hlldInternal::_starFluxes(speedM, + mhd::_internal::_starFluxes(speedM, speedL, densityL, velocityXL, @@ -328,7 +334,7 @@ momentumZL, energyL, totalPressureL, - magneticXL, + magneticX, magneticYL, magneticZL, densityStarL, @@ -355,9 +361,10 @@ // If we're in the L* state then assign fluxes and return. // In this state the flow is subsonic + // M&K 2005 equation 66 if (speedStarL >= 0.0) { - _hlldInternal::_returnFluxes(threadId, o1, o2, o3, n_cells, + mhd::_internal::_returnFluxes(threadId, o1, o2, o3, n_cells, dev_flux, densityStarFluxL, momentumStarFluxXL, momentumStarFluxYL, momentumStarFluxZL, @@ -381,7 +388,7 @@ densityStarFluxR, momentumStarFluxXR, momentumStarFluxYR, momentumStarFluxZR, magneticStarFluxYR, magneticStarFluxZR, energyStarFluxR; - _hlldInternal::_starFluxes(speedM, + mhd::_internal::_starFluxes(speedM, speedR, densityR, velocityXR, @@ -392,7 +399,7 @@ momentumZR, energyR, totalPressureR, - magneticXR, + magneticX, magneticYR, magneticZR, densityStarR, @@ -419,9 +426,10 @@ // If we're in the R* state then assign fluxes and return. // In this state the flow is subsonic + // M&K 2005 equation 66 if (speedStarR <= 0.0) { - _hlldInternal::_returnFluxes(threadId, o1, o2, o3, n_cells, + mhd::_internal::_returnFluxes(threadId, o1, o2, o3, n_cells, dev_flux, densityStarFluxR, momentumStarFluxXR, momentumStarFluxYR, momentumStarFluxZR, @@ -445,8 +453,8 @@ Real velocityDoubleStarY, velocityDoubleStarZ, magneticDoubleStarY, magneticDoubleStarZ, energyDoubleStarL, energyDoubleStarR; - _hlldInternal::_doubleStarState(speedM, - magneticXL, + mhd::_internal::_doubleStarState(speedM, + magneticX, totalPressureStar, densityStarL, velocityStarYL, @@ -468,12 +476,13 @@ energyDoubleStarR); // Compute and return L** fluxes + // M&K 2005 equation 66 if (speedM >= 0.0) { Real momentumDoubleStarFluxX, momentumDoubleStarFluxY, momentumDoubleStarFluxZ, energyDoubleStarFlux, magneticDoubleStarFluxY, magneticDoubleStarFluxZ; - _hlldInternal::_doubleStarFluxes(speedStarL, + mhd::_internal::_doubleStarFluxes(speedStarL, momentumStarFluxXL, momentumStarFluxYL, momentumStarFluxZL, @@ -500,7 +509,7 @@ magneticDoubleStarFluxY, magneticDoubleStarFluxZ); - _hlldInternal::_returnFluxes(threadId, o1, o2, o3, n_cells, + mhd::_internal::_returnFluxes(threadId, o1, o2, o3, n_cells, dev_flux, densityStarFluxL, momentumDoubleStarFluxX, momentumDoubleStarFluxY, momentumDoubleStarFluxZ, @@ -520,12 +529,13 @@ return; } // Compute and return R** fluxes + // M&K 2005 equation 66 else if (speedStarR >= 0.0) { Real momentumDoubleStarFluxX, momentumDoubleStarFluxY, momentumDoubleStarFluxZ, energyDoubleStarFlux, magneticDoubleStarFluxY, magneticDoubleStarFluxZ; - _hlldInternal::_doubleStarFluxes(speedStarR, + mhd::_internal::_doubleStarFluxes(speedStarR, momentumStarFluxXR, momentumStarFluxYR, momentumStarFluxZR, @@ -552,7 +562,7 @@ magneticDoubleStarFluxY, magneticDoubleStarFluxZ); - _hlldInternal::_returnFluxes(threadId, o1, o2, o3, n_cells, + mhd::_internal::_returnFluxes(threadId, o1, o2, o3, n_cells, dev_flux, densityStarFluxR, momentumDoubleStarFluxX, momentumDoubleStarFluxY, momentumDoubleStarFluxZ, @@ -575,7 +585,7 @@ }; // ========================================================================= - namespace _hlldInternal + namespace _internal { // ===================================================================== __device__ __host__ void _approximateWaveSpeeds(Real const &densityL, @@ -587,7 +597,7 @@ Real const &velocityZL, Real const &gasPressureL, Real const &totalPressureL, - Real const &magneticXL, + Real const &magneticX, Real const &magneticYL, Real const &magneticZL, Real const &densityR, @@ -599,7 +609,6 @@ Real const &velocityZR, Real const &gasPressureR, Real const &totalPressureR, - Real const &magneticXR, Real const &magneticYR, Real const &magneticZR, Real const &gamma, @@ -612,26 +621,28 @@ Real &densityStarR) { // Get the fast magnetosonic wave speeds - Real magSonicL = mhdUtils::fastMagnetosonicSpeed(densityL, + Real magSonicL = mhd::utils::fastMagnetosonicSpeed(densityL, gasPressureL, - magneticXL, + magneticX, magneticYL, magneticZL, gamma); - Real magSonicR = mhdUtils::fastMagnetosonicSpeed(densityR, + Real magSonicR = mhd::utils::fastMagnetosonicSpeed(densityR, gasPressureR, - magneticXR, + magneticX, magneticYR, magneticZR, gamma); // Compute the S_L and S_R wave speeds. // Version suggested by Miyoshi & Kusano 2005 and used in Athena + // M&K 2005 equation 67 Real magSonicMax = fmax(magSonicL, magSonicR); speedL = fmin(velocityXL, velocityXR) - magSonicMax; speedR = fmax(velocityXL, velocityXR) + magSonicMax; // Compute the S_M wave speed + // M&K 2005 equation 38 speedM = // Numerator ( momentumXR * (speedR - velocityXR) - momentumXL * (speedL - velocityXL) @@ -642,12 +653,14 @@ - densityL * (speedL - velocityXL)); // Compute the densities in the star state + // M&K 2005 equation 43 densityStarL = densityL * (speedL - velocityXL) / (speedL - speedM); densityStarR = densityR * (speedR - velocityXR) / (speedR - speedM); // Compute the S_L^* and S_R^* wave speeds - speedStarL = speedM - mhdUtils::alfvenSpeed(magneticXL, densityStarL); - speedStarR = speedM + mhdUtils::alfvenSpeed(magneticXR, densityStarR); + // M&K 2005 equation 51 + speedStarL = speedM - mhd::utils::alfvenSpeed(magneticX, densityStarL); + speedStarR = speedM + mhd::utils::alfvenSpeed(magneticX, densityStarR); } // ===================================================================== @@ -669,6 +682,7 @@ Real &magneticFluxZ, Real &energyFlux) { + // M&K 2005 equation 2 densityFlux = momentumX; momentumFluxX = momentumX * velocityX + totalPressure - magneticX * magneticX; @@ -701,13 +715,13 @@ Real const &magneticFluxY, Real const &magneticFluxZ) { - dev_flux[threadId] = densityFlux; - dev_flux[threadId + n_cells * o1] = momentumFluxX; - dev_flux[threadId + n_cells * o2] = momentumFluxY; - dev_flux[threadId + n_cells * o3] = momentumFluxZ; - dev_flux[threadId + n_cells * 4] = energyFlux; - dev_flux[threadId + n_cells * (o2 + 4 + NSCALARS)] = magneticFluxY; - dev_flux[threadId + n_cells * (o3 + 4 + NSCALARS)] = magneticFluxZ; + dev_flux[threadId] = densityFlux; + dev_flux[threadId + n_cells * o1] = momentumFluxX; + dev_flux[threadId + n_cells * o2] = momentumFluxY; + dev_flux[threadId + n_cells * o3] = momentumFluxZ; + dev_flux[threadId + n_cells * 4] = energyFlux; + dev_flux[threadId + n_cells * (grid_enum::fluxX_magnetic_z)] = magneticFluxY; + dev_flux[threadId + n_cells * (grid_enum::fluxX_magnetic_y)] = magneticFluxZ; } // ===================================================================== @@ -749,10 +763,11 @@ Real &magneticStarFluxZ) { // Check for and handle the degenerate case + // Explained at the top of page 326 in M&K 2005 if (fabs(density * (speedSide - velocityX) * (speedSide - speedM) - (magneticX * magneticX)) - < totalPressureStar * _hlldInternal::_hlldSmallNumber) + < totalPressureStar * mhd::_internal::_hlldSmallNumber) { velocityStarY = velocityY; velocityStarZ = velocityZ; @@ -761,30 +776,35 @@ } else { + // Denominator for M&K 2005 equations 44-47 Real const denom = density * (speedSide - velocityX) * (speedSide - speedM) - (magneticX * magneticX); // Compute the velocity and magnetic field in the star state + // M&K 2005 equations 44 & 46 Real coef = magneticX * (speedM - velocityX) / denom; velocityStarY = velocityY - magneticY * coef; velocityStarZ = velocityZ - magneticZ * coef; + // M&K 2005 equations 45 & 47 Real tmpPower = (speedSide - velocityX); - tmpPower = tmpPower * tmpPower; - coef = (density * tmpPower - (magneticX * magneticX)) / denom; + tmpPower = tmpPower * tmpPower; + coef = (density * tmpPower - (magneticX * magneticX)) / denom; magneticStarY = magneticY * coef; magneticStarZ = magneticZ * coef; } + // M&K 2005 equation 48 energyStar = ( energy * (speedSide - velocityX) - totalPressure * velocityX + totalPressureStar * speedM - + magneticX * (_hlldInternal::_dotProduct(velocityX, velocityY, velocityZ, magneticX, magneticY, magneticZ) - - _hlldInternal::_dotProduct(speedM, velocityStarY, velocityStarZ, magneticX, magneticStarY, magneticStarZ))) + + magneticX * (math_utils::dotProduct(velocityX, velocityY, velocityZ, magneticX, magneticY, magneticZ) + - math_utils::dotProduct(speedM, velocityStarY, velocityStarZ, magneticX, magneticStarY, magneticStarZ))) / (speedSide - speedM); // Now compute the star state fluxes + // M&K 2005 equations 64 densityStarFlux = densityFlux + speedSide * (densityStar - density);; momentumStarFluxX = momentumFluxX + speedSide * (densityStar * speedM - momentumX);; momentumStarFluxY = momentumFluxY + speedSide * (densityStar * velocityStarY - momentumY);; @@ -819,14 +839,16 @@ Real &energyDoubleStarR) { // if Bx is zero then just return the star state - if (magneticX < _hlldInternal::_hlldSmallNumber * totalPressureStar) + // Explained at the top of page 328 in M&K 2005. Essentially when + // magneticX is 0 this reduces to the HLLC solver + if (magneticX < mhd::_internal::_hlldSmallNumber * totalPressureStar) { velocityDoubleStarY = velocityStarYL; velocityDoubleStarZ = velocityStarZL; magneticDoubleStarY = magneticStarYL; magneticDoubleStarZ = magneticStarZL; - energyDoubleStarL = energyStarL; - energyDoubleStarR = energyStarR; + energyDoubleStarL = energyStarL; + energyDoubleStarR = energyStarR; } else { @@ -840,6 +862,7 @@ // and magnetic fields along with the energy // Double Star velocities + // M&K 2005 equations 59 & 60 velocityDoubleStarY = inverseDensities * (sqrtDL * velocityStarYL + sqrtDR * velocityStarYR + magXSign * (magneticStarYR - magneticStarYL)); @@ -848,6 +871,7 @@ + magXSign * (magneticStarZR - magneticStarZL)); // Double star magnetic fields + // M&K 2005 equations 61 & 62 magneticDoubleStarY = inverseDensities * (sqrtDL * magneticStarYR + sqrtDR * magneticStarYL + magXSign * (sqrtDL * sqrtDR) * (velocityStarYR - velocityStarYL)); @@ -856,17 +880,18 @@ + magXSign * (sqrtDL * sqrtDR) * (velocityStarZR - velocityStarZL)); // Double star energy - Real velDblStarDotMagDblStar = _hlldInternal::_dotProduct(speedM, + Real velDblStarDotMagDblStar = math_utils::dotProduct(speedM, velocityDoubleStarY, velocityDoubleStarZ, magneticX, magneticDoubleStarY, magneticDoubleStarZ); + // M&K 2005 equation 63 energyDoubleStarL = energyStarL - sqrtDL * magXSign - * (_hlldInternal::_dotProduct(speedM, velocityStarYL, velocityStarZL, magneticX, magneticStarYL, magneticStarZL) + * (math_utils::dotProduct(speedM, velocityStarYL, velocityStarZL, magneticX, magneticStarYL, magneticStarZL) - velDblStarDotMagDblStar); energyDoubleStarR = energyStarR + sqrtDR * magXSign - * (_hlldInternal::_dotProduct(speedM, velocityStarYR, velocityStarZR, magneticX, magneticStarYR, magneticStarZR) + * (math_utils::dotProduct(speedM, velocityStarYR, velocityStarZR, magneticX, magneticStarYR, magneticStarZR) - velDblStarDotMagDblStar); } } @@ -900,6 +925,7 @@ Real &magneticDoubleStarFluxY, Real &magneticDoubleStarFluxZ) { + // M&K 2005 equation 65 momentumDoubleStarFluxX = momentumStarFluxX + speedStarSide * (velocityDoubleStarX - velocityStarX) * densityStar; momentumDoubleStarFluxY = momentumStarFluxY + speedStarSide * (velocityDoubleStarY - velocityStarY) * densityStar; momentumDoubleStarFluxZ = momentumStarFluxZ + speedStarSide * (velocityDoubleStarZ - velocityStarZ) * densityStar; @@ -909,7 +935,7 @@ } // ===================================================================== - } // _hlldInternal namespace - - -#endif // CUDA \ No newline at end of file + } // mhd::_internal namespace +} // end namespace mhd +#endif // MHD +#endif // CUDA diff --git a/src/riemann_solvers/hlld_cuda.h b/src/riemann_solvers/hlld_cuda.h index d8d58dce1..332768f8a 100644 --- a/src/riemann_solvers/hlld_cuda.h +++ b/src/riemann_solvers/hlld_cuda.h @@ -1,7 +1,9 @@ /*! * \file hlld_cuda.cu * \author Robert 'Bob' Caddy (rvc@pitt.edu) - * \brief Contains the declaration of the HLLD solver + * \brief Contains the declaration of the HLLD solver from Miyoshi & Kusano 2005 + * "A multi-state HLL approximate Riemann solver for ideal magnetohydrodynamics", + * hereafter referred to as M&K 2005 * */ @@ -13,23 +15,32 @@ #include "../global/global.h" #ifdef CUDA - +/*! + * \brief Namespace for MHD code + * + */ +namespace mhd +{ /*! * \brief Compute the HLLD fluxes from Miyoshi & Kusano 2005 * - * \param[in] dev_bounds_L - * \param[in] dev_bounds_R - * \param[out] dev_flux - * \param[in] nx - * \param[in] ny - * \param[in] nz - * \param[in] n_ghost - * \param[in] gamma - * \param[in] dir - * \param[in] n_fields + * \param[in] dev_bounds_L The interface states on the left side of the interface + * \param[in] dev_bounds_R The interface states on the right side of the interface + * \param[in] dev_magnetic_face A pointer to the begining of the conserved + * magnetic field array that is stored at the interface. I.e. for the + * X-direction solve this would be the begining of the X-direction fields + * \param[out] dev_flux The output flux + * \param[in] nx Number of cells in the X-direction + * \param[in] ny Number of cells in the Y-direction + * \param[in] nz Number of cells in the Z-direction + * \param[in] n_ghost Number of ghost cells on each side + * \param[in] gamma The adiabatic index + * \param[in] dir The direction that the solve is taking place in. 0=X, 1=Y, 2=Z + * \param[in] n_fields The total number of fields */ __global__ void Calculate_HLLD_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R, + Real *dev_magnetic_face, Real *dev_flux, int nx, int ny, @@ -44,7 +55,7 @@ * solver * */ - namespace _hlldInternal + namespace _internal { /*! * \brief Used for some comparisons. Value was chosen to match what is @@ -54,7 +65,8 @@ /*! * \brief Compute the left, right, star, and middle wave speeds. Also - * returns the densities in the star states + * returns the densities in the star states. M&K 2005 equations 38, 43, + * 51, and 67 * * \param[in] densityL Density, left side * \param[in] momentumXL Momentum in the X-direction, left side @@ -65,7 +77,7 @@ * \param[in] velocityZL Velocity in the Z-direction, left side * \param[in] gasPressureL Gas pressure, left side * \param[in] totalPressureL Total MHD pressure, left side - * \param[in] magneticXL Magnetic field in the X-direction, left side + * \param[in] magneticX Magnetic field in the X-direction, left side * \param[in] magneticYL Magnetic field in the Y-direction, left side * \param[in] magneticZL Magnetic field in the Z-direction, left side * \param[in] densityR Density, right side @@ -77,7 +89,6 @@ * \param[in] velocityZR Velocity in the Z-direction, right side * \param[in] gasPressureR Gas pressure, right side * \param[in] totalPressureR Total MHD pressure, right side - * \param[in] magneticXR Magnetic field in the X-direction, right side * \param[in] magneticYR Magnetic field in the Y-direction, right side * \param[in] magneticZR Magnetic field in the Z-direction, right side * \param[in] gamma Adiabatic index @@ -98,7 +109,7 @@ Real const &velocityZL, Real const &gasPressureL, Real const &totalPressureL, - Real const &magneticXL, + Real const &magneticX, Real const &magneticYL, Real const &magneticZL, Real const &densityR, @@ -110,7 +121,6 @@ Real const &velocityZR, Real const &gasPressureR, Real const &totalPressureR, - Real const &magneticXR, Real const &magneticYR, Real const &magneticZR, Real const &gamma, @@ -191,7 +201,8 @@ Real const &energyFlux); /*! - * \brief Compute the fluxes in the left or right star state + * \brief Compute the fluxes in the left or right star state. M&K 2005 + * equations 44-48, 64 * * \param[in] speedM Speed of the central wave * \param[in] speedSide Speed of the non-star wave on the side being computed @@ -267,27 +278,7 @@ Real &magneticStarFluxZ); /*! - * \brief Compute the dot product of a and b. - * - * \param[in] a1 The first element of a - * \param[in] a2 The second element of a - * \param[in] a3 The third element of a - * \param[in] b1 The first element of b - * \param[in] b2 The second element of b - * \param[in] b3 The third element of b - * - * \return Real The dot product of a and b - */ - inline __device__ __host__ Real _dotProduct(Real const &a1, - Real const &a2, - Real const &a3, - Real const &b1, - Real const &b2, - Real const &b3) - {return a1*b1 + ((a2*b2) + (a3*b3));}; - - /*! - * \brief Compute the double star state + * \brief Compute the double star state. M&K 2005 equations 59-63 * * \param[in] speedM * \param[in] magneticX @@ -334,7 +325,7 @@ Real &energyDoubleStarR); /*! - * \brief Compute the double star state fluxes + * \brief Compute the double star state fluxes. M&K 2005 equation 65 * * \param[in] speedStarSide The star speed on the side being computed * \param[in] momentumStarFluxX @@ -390,6 +381,6 @@ Real &magneticDoubleStarFluxY, Real &magneticDoubleStarFluxZ); - } // _hlldInternal namespace - + } // end namespace mhd::_internal +} // end namespace mhd #endif //CUDA diff --git a/src/riemann_solvers/hlld_cuda_tests.cu b/src/riemann_solvers/hlld_cuda_tests.cu index 754c2dba0..0de90e6f9 100644 --- a/src/riemann_solvers/hlld_cuda_tests.cu +++ b/src/riemann_solvers/hlld_cuda_tests.cu @@ -16,12 +16,14 @@ // Local Includes #include "../global/global_cuda.h" +#include "../grid/grid_enum.h" #include "../utils/gpu.hpp" #include "../utils/testing_utilities.h" #include "../utils/mhd_utilities.h" #include "../riemann_solvers/hlld_cuda.h" // Include code to test -#if defined(CUDA) && defined(HLLD) +#ifdef CUDA +#ifdef MHD // ========================================================================= // Integration tests for the entire HLLD solver. Unit tests are below // ========================================================================= @@ -59,15 +61,22 @@ int const &direction=0) { - // Rearrange X, Y, and Z values if a different direction is chosen - // besides default - stateLeft = _cycleXYZ(stateLeft, direction); - stateRight = _cycleXYZ(stateRight, direction); + // Rearrange X, Y, and Z values for the chosen direction + std::rotate(stateLeft.begin() + 1, stateLeft.begin() + 4 - direction, stateLeft.begin() + 4); + std::rotate(stateRight.begin()+ 1, stateRight.begin()+ 4 - direction, stateRight.begin()+ 4); + + // Create new vectors that store the values in the way that the HLLD + // solver expects + EXPECT_DOUBLE_EQ(stateLeft.at(grid_enum::magnetic_x), stateRight.at(grid_enum::magnetic_x)) + << "The left and right magnetic fields are not equal"; + std::vector const magneticX{stateLeft.at(grid_enum::magnetic_x)}; + stateLeft.erase(stateLeft.begin() + grid_enum::magnetic_x); + stateRight.erase(stateRight.begin() + grid_enum::magnetic_x); // Simulation Paramters - int const nx = 1; // Number of cells in the x-direction? - int const ny = 1; // Number of cells in the y-direction? - int const nz = 1; // Number of cells in the z-direction? + int const nx = 1; // Number of cells in the x-direction + int const ny = 1; // Number of cells in the y-direction + int const nz = 1; // Number of cells in the z-direction int const nGhost = 0; // Isn't actually used it appears int nFields = 8; // Total number of conserved fields #ifdef SCALAR @@ -83,33 +92,40 @@ // Create the std::vector to store the fluxes and declare the device // pointers - std::vector testFlux(nFields); + std::vector testFlux(nFields-1, 0); Real *devConservedLeft; Real *devConservedRight; + Real *devConservedMagXFace; Real *devTestFlux; // Allocate device arrays and copy data - CudaSafeCall(cudaMalloc(&devConservedLeft, nFields*sizeof(Real))); - CudaSafeCall(cudaMalloc(&devConservedRight, nFields*sizeof(Real))); - CudaSafeCall(cudaMalloc(&devTestFlux, nFields*sizeof(Real))); + CudaSafeCall(cudaMalloc(&devConservedLeft, stateLeft.size()*sizeof(Real))); + CudaSafeCall(cudaMalloc(&devConservedRight, stateRight.size()*sizeof(Real))); + CudaSafeCall(cudaMalloc(&devConservedMagXFace, magneticX.size()*sizeof(Real))); + CudaSafeCall(cudaMalloc(&devTestFlux, testFlux.size()*sizeof(Real))); CudaSafeCall(cudaMemcpy(devConservedLeft, stateLeft.data(), - nFields*sizeof(Real), + stateLeft.size()*sizeof(Real), cudaMemcpyHostToDevice)); CudaSafeCall(cudaMemcpy(devConservedRight, stateRight.data(), - nFields*sizeof(Real), + stateRight.size()*sizeof(Real), + cudaMemcpyHostToDevice)); + CudaSafeCall(cudaMemcpy(devConservedMagXFace, + magneticX.data(), + magneticX.size()*sizeof(Real), cudaMemcpyHostToDevice)); // Run kernel - hipLaunchKernelGGL(Calculate_HLLD_Fluxes_CUDA, + hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, dimGrid, dimBlock, 0, 0, devConservedLeft, // the "left" interface devConservedRight, // the "right" interface + devConservedMagXFace, // the magnetic field at the interface devTestFlux, nx, ny, @@ -122,13 +138,25 @@ CudaCheckError(); CudaSafeCall(cudaMemcpy(testFlux.data(), devTestFlux, - nFields*sizeof(Real), + testFlux.size()*sizeof(Real), cudaMemcpyDeviceToHost)); // Make sure to sync with the device so we have the results cudaDeviceSynchronize(); CudaCheckError(); + // Free device arrays + cudaFree(devConservedLeft); + cudaFree(devConservedRight); + cudaFree(devConservedMagXFace); + cudaFree(devTestFlux); + + // The HLLD solver only writes the the first two "slots" for + // magnetic flux so let's rearrange to make sure we have all the + // magnetic fluxes in the right spots + testFlux.insert(testFlux.begin() + grid_enum::magnetic_x, 0.0); + std::rotate(testFlux.begin() + 1, testFlux.begin() + 1 + direction, testFlux.begin() + 4); // Rotate momentum + return testFlux; } // ===================================================================== @@ -176,19 +204,15 @@ #endif //DE #ifdef SCALAR std::vector scalarNames{"Scalar 1", "Scalar 2", "Scalar 3"}; - fieldNames.insert(fieldNames.begin()+5, + fieldNames.insert(fieldNames.begin() + grid_enum::magnetic_start, scalarNames.begin(), - scalarNames.begin() + NSCALARS); + scalarNames.begin() + grid_enum::nscalars); - fiducialFlux.insert(fiducialFlux.begin()+5, + fiducialFlux.insert(fiducialFlux.begin() + grid_enum::magnetic_start, scalarFlux.begin(), - scalarFlux.begin() + NSCALARS); + scalarFlux.begin() + grid_enum::nscalars); #endif //SCALAR - // Rearrange X, Y, and Z values if a different direction is chosen - // besides default - fiducialFlux = _cycleXYZ(fiducialFlux, direction); - ASSERT_TRUE( (fiducialFlux.size() == testFlux.size()) and (fiducialFlux.size() == fieldNames.size())) << "The fiducial flux, test flux, and field name vectors are not all the same length" << std::endl @@ -242,7 +266,7 @@ output.at(1) = input.at(1) * input.at(0); // X Velocity to momentum output.at(2) = input.at(2) * input.at(0); // Y Velocity to momentum output.at(3) = input.at(3) * input.at(0); // Z Velocity to momentum - output.at(4) = mhdUtils::computeEnergy(input.at(4), + output.at(4) = mhd::utils::computeEnergy(input.at(4), input.at(0), input.at(1), input.at(2), @@ -261,19 +285,19 @@ primitiveScalars.end(), conservedScalar.begin(), [&](Real const &c){ return c*output.at(0); }); - output.insert(output.begin()+5, + output.insert(output.begin() + grid_enum::magnetic_start, conservedScalar.begin(), - conservedScalar.begin() + NSCALARS); + conservedScalar.begin() + grid_enum::nscalars); #endif //SCALAR #ifdef DE - output.push_back(mhdUtils::computeThermalEnergy(output.at(4), + output.push_back(mhd::utils::computeThermalEnergy(output.at(4), output.at(0), output.at(1), output.at(2), output.at(3), - output.at(5 + NSCALARS), - output.at(6 + NSCALARS), - output.at(7 + NSCALARS), + output.at(grid_enum::magnetic_x), + output.at(grid_enum::magnetic_y), + output.at(grid_enum::magnetic_z), gamma)); #endif //DE return output; @@ -294,52 +318,6 @@ } // ===================================================================== private: - // ===================================================================== - /*! - * \brief Cyclically permute the vector quantities in the list of - * conserved variables so that the same interfaces and fluxes can be - * used to test the HLLD solver in all 3 directions. - * - * \param[in,out] conservedVec The std::vector of conserved variables to - * be cyclically permutated - * \param[in] direction Which plane the interface is. 0 = plane normal - * to X, 1 = plane normal to Y, 2 = plane normal to Z - * - * \return std::vector The cyclically permutated list of conserved - * variables - */ - std::vector inline _cycleXYZ(std::vector conservedVec, - int const &direction) - { - switch (direction) - { - case 0: // Plane normal to X. Default case, do nothing - ; - break; - case 1: // Plane normal to Y - case 2: // Plane normal to Z - // Fall through for both Y and Z normal planes - { - size_t shift = 3 - direction; - auto momentumBegin = conservedVec.begin()+1; - auto magneticBegin = conservedVec.begin()+5; - #ifdef SCALAR - magneticBegin += NSCALARS; - #endif //SCALAR - - std::rotate(momentumBegin, momentumBegin+shift, momentumBegin+3); - std::rotate(magneticBegin, magneticBegin+shift, magneticBegin+3); - } - break; - default: - throw std::invalid_argument(("Invalid Value of `direction`" - " passed to `_cycleXYZ`. Value passed was " - + std::to_string(direction) + ", should be 0, 1, or 2.")); - break; - } - return conservedVec; - } - // ===================================================================== }; // ========================================================================= @@ -1525,6 +1503,61 @@ } // ========================================================================= + // ========================================================================= + /*! + * \brief Test the HLLD Riemann Solver using the constant states from the + * examples in cholla/examples/3D + * + */ + TEST_F(tMHDCalculateHLLDFluxesCUDA, + ConstantStatesExpectCorrectFlux) + { + // Constant Values + Real const gamma = 5./3.; + + std::vector const primitiveScalar{1.1069975296, 2.2286185018, 3.3155141875}; + + // States + std::vector const // | Density | X-Velocity | Y-Velocity | Z-Velocity | Pressure | X-Magnetic Field | Y-Magnetic Field | Z-Magnetic Field | Adiabatic Index | Passive Scalars | + zeroMagneticField = primitive2Conserved({1e4, 0.0, 0.0, 0.0, 1.380658E-5, 0.0, 0.0, 0.0}, gamma, primitiveScalar), + onesMagneticField = primitive2Conserved({1e4, 0.0, 0.0, 0.0, 1.380658E-5, 1.0, 1.0, 1.0}, gamma, primitiveScalar); + + for (size_t direction = 2; direction < 3; direction++) + { + { + std::string const outputString {"Left State: Constant state, zero magnetic field\n" + "Right State: Constant state, zero magnetic field\n" + "HLLD State: Left Star"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{0,1.380658e-05,0,0,0,0,0,0}; + std::vector const scalarFlux{0,0,0}; + Real thermalEnergyFlux = 0.; + std::vector const testFluxes = computeFluxes(zeroMagneticField, + zeroMagneticField, + gamma, + direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + } + { + std::string const outputString {"Left State: Constant state, ones magnetic field\n" + "Right State: Constant state, ones magnetic field\n" + "HLLD State: Left Double Star"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{0, 0.50001380657999994, -1, -1, -1.7347234759768071e-18, 0.0, 3.4694469519536142e-18, 3.4694469519536142e-18}; + std::vector const scalarFlux{1.5731381063233131e-14, 3.1670573744690958e-14, 4.7116290424753513e-14}; + Real thermalEnergyFlux = 0.; + std::vector const testFluxes = computeFluxes(onesMagneticField, + onesMagneticField, + gamma, + direction); + checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + } + } + } + // ========================================================================= + // ========================================================================= /*! * \brief Test the HLLD Riemann Solver with the degenerate state @@ -1626,18 +1659,18 @@ #ifdef SCALAR std::vector const conservedScalar{1.1069975296, 2.2286185018, 3.3155141875}; - negativePressure.insert(negativePressure.begin()+5, conservedScalar.begin(), conservedScalar.begin() + NSCALARS); - negativeEnergy.insert(negativeEnergy.begin()+5, conservedScalar.begin(), conservedScalar.begin() + NSCALARS); - negativeDensity.insert(negativeDensity.begin()+5, conservedScalar.begin(), conservedScalar.begin() + NSCALARS); - negativeDensityEnergyPressure.insert(negativeDensityEnergyPressure.begin()+5, conservedScalar.begin(), conservedScalar.begin() + NSCALARS); - negativeDensityPressure.insert(negativeDensityPressure.begin()+5, conservedScalar.begin(), conservedScalar.begin() + NSCALARS); + negativePressure.insert(negativePressure.begin()+5, conservedScalar.begin(), conservedScalar.begin() + grid_enum::nscalars); + negativeEnergy.insert(negativeEnergy.begin()+5, conservedScalar.begin(), conservedScalar.begin() + grid_enum::nscalars); + negativeDensity.insert(negativeDensity.begin()+5, conservedScalar.begin(), conservedScalar.begin() + grid_enum::nscalars); + negativeDensityEnergyPressure.insert(negativeDensityEnergyPressure.begin()+5, conservedScalar.begin(), conservedScalar.begin() + grid_enum::nscalars); + negativeDensityPressure.insert(negativeDensityPressure.begin()+5, conservedScalar.begin(), conservedScalar.begin() + grid_enum::nscalars); #endif // SCALAR #ifdef DE - negativePressure.push_back(mhdUtils::computeThermalEnergy(negativePressure.at(4),negativePressure.at(0),negativePressure.at(1),negativePressure.at(2),negativePressure.at(3),negativePressure.at(5 + NSCALARS),negativePressure.at(6 + NSCALARS),negativePressure.at(7 + NSCALARS),gamma)); - negativeEnergy.push_back(mhdUtils::computeThermalEnergy(negativeEnergy.at(4),negativeEnergy.at(0),negativeEnergy.at(1),negativeEnergy.at(2),negativeEnergy.at(3),negativeEnergy.at(5 + NSCALARS),negativeEnergy.at(6 + NSCALARS),negativeEnergy.at(7 + NSCALARS),gamma)); - negativeDensity.push_back(mhdUtils::computeThermalEnergy(negativeDensity.at(4),negativeDensity.at(0),negativeDensity.at(1),negativeDensity.at(2),negativeDensity.at(3),negativeDensity.at(5 + NSCALARS),negativeDensity.at(6 + NSCALARS),negativeDensity.at(7 + NSCALARS),gamma)); - negativeDensityEnergyPressure.push_back(mhdUtils::computeThermalEnergy(negativeDensityEnergyPressure.at(4),negativeDensityEnergyPressure.at(0),negativeDensityEnergyPressure.at(1),negativeDensityEnergyPressure.at(2),negativeDensityEnergyPressure.at(3),negativeDensityEnergyPressure.at(5 + NSCALARS),negativeDensityEnergyPressure.at(6 + NSCALARS),negativeDensityEnergyPressure.at(7 + NSCALARS),gamma)); - negativeDensityPressure.push_back(mhdUtils::computeThermalEnergy(negativeDensityPressure.at(4),negativeDensityPressure.at(0),negativeDensityPressure.at(1),negativeDensityPressure.at(2),negativeDensityPressure.at(3),negativeDensityPressure.at(5 + NSCALARS),negativeDensityPressure.at(6 + NSCALARS),negativeDensityPressure.at(7 + NSCALARS),gamma)); + negativePressure.push_back(mhd::utils::computeThermalEnergy(negativePressure.at(4),negativePressure.at(0),negativePressure.at(1),negativePressure.at(2),negativePressure.at(3),negativePressure.at(grid_enum::magnetic_x),negativePressure.at(grid_enum::magnetic_y),negativePressure.at(grid_enum::magnetic_z),gamma)); + negativeEnergy.push_back(mhd::utils::computeThermalEnergy(negativeEnergy.at(4),negativeEnergy.at(0),negativeEnergy.at(1),negativeEnergy.at(2),negativeEnergy.at(3),negativeEnergy.at(grid_enum::magnetic_x),negativeEnergy.at(grid_enum::magnetic_y),negativeEnergy.at(grid_enum::magnetic_z),gamma)); + negativeDensity.push_back(mhd::utils::computeThermalEnergy(negativeDensity.at(4),negativeDensity.at(0),negativeDensity.at(1),negativeDensity.at(2),negativeDensity.at(3),negativeDensity.at(grid_enum::magnetic_x),negativeDensity.at(grid_enum::magnetic_y),negativeDensity.at(grid_enum::magnetic_z),gamma)); + negativeDensityEnergyPressure.push_back(mhd::utils::computeThermalEnergy(negativeDensityEnergyPressure.at(4),negativeDensityEnergyPressure.at(0),negativeDensityEnergyPressure.at(1),negativeDensityEnergyPressure.at(2),negativeDensityEnergyPressure.at(3),negativeDensityEnergyPressure.at(grid_enum::magnetic_x),negativeDensityEnergyPressure.at(grid_enum::magnetic_y),negativeDensityEnergyPressure.at(grid_enum::magnetic_z),gamma)); + negativeDensityPressure.push_back(mhd::utils::computeThermalEnergy(negativeDensityPressure.at(4),negativeDensityPressure.at(0),negativeDensityPressure.at(1),negativeDensityPressure.at(2),negativeDensityPressure.at(3),negativeDensityPressure.at(grid_enum::magnetic_x),negativeDensityPressure.at(grid_enum::magnetic_y),negativeDensityPressure.at(grid_enum::magnetic_z),gamma)); #endif //DE for (size_t direction = 0; direction < 3; direction++) @@ -1726,7 +1759,7 @@ // ========================================================================= // ========================================================================= - // Unit tests for the contents of the _hlldInternal namespace + // Unit tests for the contents of the mhd::_internal namespace // ========================================================================= /*! * \brief A struct to hold some basic test values @@ -1827,10 +1860,10 @@ { for (size_t i = 0; i < names.size(); i++) { - gasPressureL.push_back(mhdUtils::computeGasPressure(energyL[i], densityL[i], momentumXL[i], momentumYL[i], momentumZL[i], magneticXL[i], magneticYL[i], magneticZL[i], gamma)); - gasPressureR.push_back(mhdUtils::computeGasPressure(energyR[i], densityR[i], momentumXR[i], momentumYR[i], momentumZR[i], magneticXR[i], magneticYR[i], magneticZR[i], gamma)); - totalPressureL.push_back(mhdUtils::computeTotalPressure(gasPressureL.back(), magneticXL[i], magneticYL[i], magneticZL[i])); - totalPressureR.push_back(mhdUtils::computeTotalPressure(gasPressureL.back(), magneticXR[i], magneticYR[i], magneticZR[i])); + gasPressureL.push_back(mhd::utils::computeGasPressure(energyL[i], densityL[i], momentumXL[i], momentumYL[i], momentumZL[i], magneticXL[i], magneticYL[i], magneticZL[i], gamma)); + gasPressureR.push_back(mhd::utils::computeGasPressure(energyR[i], densityR[i], momentumXR[i], momentumYR[i], momentumZR[i], magneticXR[i], magneticYR[i], magneticZR[i], gamma)); + totalPressureL.push_back(mhd::utils::computeTotalPressure(gasPressureL.back(), magneticXL[i], magneticYL[i], magneticZL[i])); + totalPressureR.push_back(mhd::utils::computeTotalPressure(gasPressureL.back(), magneticXR[i], magneticYR[i], magneticZR[i])); } } }; @@ -1839,7 +1872,7 @@ // ========================================================================= /*! - * \brief Test the _hlldInternal::_approximateWaveSpeeds function + * \brief Test the mhd::_internal::_approximateWaveSpeeds function * */ TEST(tMHDHlldInternalApproximateWaveSpeeds, @@ -1850,7 +1883,7 @@ std::vector const fiducialSpeedR {24.295526347371595, 12.519790189404299}; std::vector const fiducialSpeedM {-0.81760587897407833, -0.026643804611559244}; std::vector const fiducialSpeedStarL {-19.710500632936679, -4.4880642018724357}; - std::vector const fiducialSpeedStarR {9.777062240423124, 9.17474383484066}; + std::vector const fiducialSpeedStarR {9.6740190040662242, 3.4191202933087519}; std::vector const fiducialDensityStarL{24.101290139122913, 50.132466596958501}; std::vector const fiducialDensityStarR{78.154104734671265, 84.041595114910123}; @@ -1864,7 +1897,7 @@ for (size_t i = 0; i < parameters.names.size(); i++) { - _hlldInternal::_approximateWaveSpeeds(parameters.densityL[i], + mhd::_internal::_approximateWaveSpeeds(parameters.densityL[i], parameters.momentumXL[i], parameters.momentumYL[i], parameters.momentumZL[i], @@ -1885,7 +1918,6 @@ parameters.velocityZR[i], parameters.gasPressureR[i], parameters.totalPressureR[i], - parameters.magneticXR[i], parameters.magneticYR[i], parameters.magneticZR[i], parameters.gamma, @@ -1924,7 +1956,7 @@ // ========================================================================= /*! - * \brief Test the _hlldInternal::_starFluxes function in the non-degenerate + * \brief Test the mhd::_internal::_starFluxes function in the non-degenerate * case * */ @@ -1961,7 +1993,7 @@ for (size_t i = 0; i < parameters.names.size(); i++) { - _hlldInternal::_starFluxes(parameters.speedM[i], + mhd::_internal::_starFluxes(parameters.speedM[i], parameters.speedSide[i], parameters.densityL[i], parameters.velocityXL[i], @@ -2038,7 +2070,7 @@ } /*! - * \brief Test the _hlldInternal::_starFluxes function in the degenerate + * \brief Test the mhd::_internal::_starFluxes function in the degenerate * case * */ @@ -2078,7 +2110,7 @@ for (size_t i = 0; i < parameters.names.size(); i++) { - _hlldInternal::_starFluxes(parameters.speedM[i], + mhd::_internal::_starFluxes(parameters.speedM[i], parameters.speedSide[i], parameters.densityL[i], parameters.velocityXL[i], @@ -2157,7 +2189,7 @@ // ========================================================================= /*! - * \brief Test the _hlldInternal::_nonStarFluxes function + * \brief Test the mhd::_internal::_nonStarFluxes function * */ TEST(tMHDHlldInternalNonStarFluxes, @@ -2183,7 +2215,7 @@ for (size_t i = 0; i < parameters.names.size(); i++) { - _hlldInternal::_nonStarFluxes(parameters.momentumXL[i], + mhd::_internal::_nonStarFluxes(parameters.momentumXL[i], parameters.velocityXL[i], parameters.velocityYL[i], parameters.velocityZL[i], @@ -2228,38 +2260,7 @@ // ========================================================================= /*! - * \brief Test the _hlldInternal::_dotProduct function - * - */ - TEST(tMHDHlldInternalDotProduct, - CorrectInputExpectCorrectOutput) - { - testParams const parameters; - - std::vector const fiducialDotProduct{5149.7597411033557,6127.2319832451567}; - - double testDotProduct; - - for (size_t i = 0; i < parameters.names.size(); i++) - { - testDotProduct = _hlldInternal::_dotProduct(parameters.momentumXL[i], - parameters.momentumYL[i], - parameters.momentumZL[i], - parameters.magneticXL[i], - parameters.magneticYL[i], - parameters.magneticZL[i]); - - // Now check results - testingUtilities::checkResults(fiducialDotProduct[i], - testDotProduct, - parameters.names.at(i) + ", DotProduct"); - } - } - // ========================================================================= - - // ========================================================================= - /*! - * \brief Test the _hlldInternal::_doubleStarState function. Non-degenerate + * \brief Test the mhd::_internal::_doubleStarState function. Non-degenerate * state * */ @@ -2286,7 +2287,7 @@ for (size_t i = 0; i < parameters.names.size(); i++) { - _hlldInternal::_doubleStarState(parameters.speedM[i], + mhd::_internal::_doubleStarState(parameters.speedM[i], parameters.magneticXL[i], parameters.totalPressureStarL[i], parameters.densityStarL[i], @@ -2332,7 +2333,7 @@ } /*! - * \brief Test the _hlldInternal::_doubleStarState function in the + * \brief Test the mhd::_internal::_doubleStarState function in the * degenerate state. * */ @@ -2357,7 +2358,7 @@ for (size_t i = 0; i < parameters.names.size(); i++) { - _hlldInternal::_doubleStarState(parameters.speedM[i], + mhd::_internal::_doubleStarState(parameters.speedM[i], 0.0, parameters.totalPressureStarL[i], parameters.densityStarL[i], @@ -2403,7 +2404,7 @@ // ========================================================================= /*! - * \brief Test the _hlldInternal::_doubleStarFluxes function + * \brief Test the mhd::_internal::_doubleStarFluxes function * */ TEST(tMHDHlldInternalDoubleStarFluxes, @@ -2428,7 +2429,7 @@ for (size_t i = 0; i < parameters.names.size(); i++) { - _hlldInternal::_doubleStarFluxes(parameters.speedSide[i], + mhd::_internal::_doubleStarFluxes(parameters.speedSide[i], parameters.momentumStarFluxX[i], parameters.momentumStarFluxY[i], parameters.momentumStarFluxZ[i], @@ -2480,7 +2481,7 @@ // ========================================================================= /*! - * \brief Test the _hlldInternal::_returnFluxes function + * \brief Test the mhd::_internal::_returnFluxes function * */ TEST(tMHDHlldInternalReturnFluxes, @@ -2532,10 +2533,10 @@ int const fiducialMomentumIndexY = threadId + n_cells * o2; int const fiducialMomentumIndexZ = threadId + n_cells * o3; int const fiducialEnergyIndex = threadId + n_cells * 4; - int const fiducialMagneticYIndex = threadId + n_cells * (o2 + 4 + NSCALARS); - int const fiducialMagneticZIndex = threadId + n_cells * (o3 + 4 + NSCALARS); + int const fiducialMagneticYIndex = threadId + n_cells * (grid_enum::magnetic_x); + int const fiducialMagneticZIndex = threadId + n_cells * (grid_enum::magnetic_y); - _hlldInternal::_returnFluxes(threadId, + mhd::_internal::_returnFluxes(threadId, o1, o2, o3, @@ -2578,4 +2579,5 @@ } } // ========================================================================= -#endif // CUDA & HLLD \ No newline at end of file +#endif // MHD +#endif // CUDA diff --git a/src/system_tests/hydro_system_tests.cpp b/src/system_tests/hydro_system_tests.cpp index 72a6dc349..5ed2b050c 100644 --- a/src/system_tests/hydro_system_tests.cpp +++ b/src/system_tests/hydro_system_tests.cpp @@ -13,19 +13,17 @@ // Local includes #include "../system_tests/system_tester.h" #include "../utils/testing_utilities.h" - - +#include "../io/io.h" #ifndef PI #define PI 3.141592653589793 #endif - // ============================================================================= -// Test Suite: tHYDROSYSTEMSodShockTube +// Test Suite: tHYDROtMHDSYSTEMSodShockTube // ============================================================================= /*! - * \defgroup tHYDROSYSTEMSodShockTubeParameterizedMpi_CorrectInputExpectCorrectOutput + * \defgroup tHYDROtMHDSYSTEMSodShockTubeParameterizedMpi_CorrectInputExpectCorrectOutput * \brief Test the Sod Shock tube initial conditions as a parameterized test * with varying numbers of MPI ranks * @@ -42,6 +40,28 @@ class tHYDROSYSTEMSodShockTubeParameterizedMpi TEST_P(tHYDROSYSTEMSodShockTubeParameterizedMpi, CorrectInputExpectCorrectOutput) { + #ifdef MHD + // Loosen correctness check to account for MHD only having PCM. This is + // about the error between PCM and PPMP in hydro + sodTest.setFixedEpsilon(1E-3); + + // Don't test the gas energy fields + auto datasetNames = sodTest.getDataSetsToTest(); + datasetNames.erase(std::remove(datasetNames.begin(), datasetNames.end(), "GasEnergy"), datasetNames.end()); + + // Set the magnetic fiducial datasets to zero + size_t const size = std::pow(65, 3); + std::vector const magVec(0, size); + + for (auto field: {"magnetic_x","magnetic_y","magnetic_z"}) + { + sodTest.setFiducialData(field, magVec); + datasetNames.push_back(field); + } + + sodTest.setDataSetsToTest(datasetNames); + #endif //MHD + sodTest.numMpiRanks = GetParam(); sodTest.runTest(); } @@ -52,7 +72,7 @@ INSTANTIATE_TEST_SUITE_P(CorrectInputExpectCorrectOutput, /// @} // ============================================================================= -TEST(tHYDROSYSTEMConstant, +TEST(tHYDROtMHDSYSTEMConstant, CorrectInputExpectCorrectOutput) { systemTest::SystemTestRunner testObject(false, false, false); @@ -70,15 +90,15 @@ TEST(tHYDROSYSTEMConstant, } -TEST(tHYDROSYSTEMSoundWave3D, +TEST(tHYDROtMHDSYSTEMSoundWave3D, CorrectInputExpectCorrectOutput) { double time = 0.05; double amplitude = 1e-5; double dx = 1./64.; - + double real_kx = 2*PI;//kx of the physical problem - + double kx = real_kx * dx; double speed = 1;//speed of wave is 1 since P = 0.6 and gamma = 1.666667 double phase = kx*0.5 - speed * time * real_kx; //kx*0.5 for half-cell offset @@ -86,12 +106,191 @@ TEST(tHYDROSYSTEMSoundWave3D, systemTest::SystemTestRunner testObject(false, false, false); + #ifdef MHD + // Loosen correctness check to account for MHD only having PCM. This is + // about the error between PCM and PPMP in hydro + tolerance = 1E-6; + #endif //MHD + testObject.launchCholla(); testObject.openHydroTestData(); - testingUtilities::analyticSine(testObject,"density",1.0,amplitude,kx,0.0,0.0,phase,tolerance); - testingUtilities::analyticSine(testObject,"momentum_x",0.0,amplitude,kx,0.0,0.0,phase,tolerance); + ASSERT_NO_FATAL_FAILURE(testingUtilities::analyticSine(testObject,"density",1.0,amplitude,kx,0.0,0.0,phase,tolerance)); + ASSERT_NO_FATAL_FAILURE(testingUtilities::analyticSine(testObject,"momentum_x",0.0,amplitude,kx,0.0,0.0,phase,tolerance)); //testingUtilities::analyticSine(testObject,"momentum_y",0.0,amplitude,kx,0.0,0.0,0.0,tolerance); //testingUtilities::analyticSine(testObject,"momentum_z",0.0,amplitude,kx,0.0,0.0,0.0,tolerance); } + +// ============================================================================= +// Test Suite: tHYDROtMHDSYSTEMLinearWavesParameterizedMpi +// ============================================================================= +/*! + * \defgroup tHYDROtMHDSYSTEMLinearWavesParameterizedMpi + * \brief Test the linear waves initial conditions as a parameterized test + * with varying numbers of MPI ranks. + * + */ +/// @{ +class tHYDROtMHDSYSTEMLinearWavesParameterizedMpi + :public + ::testing::TestWithParam +{ +public: + tHYDROtMHDSYSTEMLinearWavesParameterizedMpi() + : waveTest(false, true, false, false) + {}; +protected: + systemTest::SystemTestRunner waveTest; + + #ifdef PCM + double const allowedL1Error = 4E-7; // Based on results in Gardiner & Stone 2008 + double const allowedError = 4E-7; + #else //PCM + double const allowedL1Error = 1E-7; // Based on results in Gardiner & Stone 2008 + double const allowedError = 1E-7; + #endif //PCM + + void setLaunchParams(double const &waveSpeed, double const &rEigenVec_rho, + double const &rEigenVec_MomentumX, double const &rEigenVec_MomentumY, + double const &rEigenVec_MomentumZ, double const &rEigenVec_E, + double const &vx=0.0) + { + // Constant for all tests + size_t const N = 32; + double const domain = 0.5; + double const gamma = 5./3.; + double const tOut = 2*domain / waveSpeed; + + // Settings + waveTest.chollaLaunchParams.append(" nx=" + to_string_exact(2*N)); + waveTest.chollaLaunchParams.append(" ny=" + to_string_exact(N)); + waveTest.chollaLaunchParams.append(" nz=" + to_string_exact(N)); + waveTest.chollaLaunchParams.append(" tout=" + to_string_exact(tOut)); + waveTest.chollaLaunchParams.append(" outstep=" + to_string_exact(tOut)); + waveTest.chollaLaunchParams.append(" init=Linear_Wave"); + waveTest.chollaLaunchParams.append(" xmin=0.0"); + waveTest.chollaLaunchParams.append(" ymin=0.0"); + waveTest.chollaLaunchParams.append(" zmin=0.0"); + waveTest.chollaLaunchParams.append(" xlen=" + to_string_exact(2*domain)); + waveTest.chollaLaunchParams.append(" ylen=" + to_string_exact(domain)); + waveTest.chollaLaunchParams.append(" zlen=" + to_string_exact(domain)); + waveTest.chollaLaunchParams.append(" xl_bcnd=1"); + waveTest.chollaLaunchParams.append(" xu_bcnd=1"); + waveTest.chollaLaunchParams.append(" yl_bcnd=1"); + waveTest.chollaLaunchParams.append(" yu_bcnd=1"); + waveTest.chollaLaunchParams.append(" zl_bcnd=1"); + waveTest.chollaLaunchParams.append(" zu_bcnd=1"); + waveTest.chollaLaunchParams.append(" rho=1.0"); + waveTest.chollaLaunchParams.append(" vx=" + to_string_exact(vx)); + waveTest.chollaLaunchParams.append(" vy=0"); + waveTest.chollaLaunchParams.append(" vz=0"); + waveTest.chollaLaunchParams.append(" P=" + to_string_exact(1/gamma)); + waveTest.chollaLaunchParams.append(" Bx=0"); + waveTest.chollaLaunchParams.append(" By=0"); + waveTest.chollaLaunchParams.append(" Bz=0"); + waveTest.chollaLaunchParams.append(" A='1e-6'"); + waveTest.chollaLaunchParams.append(" gamma=" + to_string_exact(gamma)); + waveTest.chollaLaunchParams.append(" rEigenVec_rho=" + to_string_exact(rEigenVec_rho)); + waveTest.chollaLaunchParams.append(" rEigenVec_MomentumX=" + to_string_exact(rEigenVec_MomentumX)); + waveTest.chollaLaunchParams.append(" rEigenVec_MomentumY=" + to_string_exact(rEigenVec_MomentumY)); + waveTest.chollaLaunchParams.append(" rEigenVec_MomentumZ=" + to_string_exact(rEigenVec_MomentumZ)); + waveTest.chollaLaunchParams.append(" rEigenVec_E=" + to_string_exact(rEigenVec_E)); + waveTest.chollaLaunchParams.append(" rEigenVec_Bx=0"); + waveTest.chollaLaunchParams.append(" rEigenVec_By=0"); + waveTest.chollaLaunchParams.append(" rEigenVec_Bz=0"); + } +}; + +// Sound Waves Moving Left and Right +// ================================= +TEST_P(tHYDROtMHDSYSTEMLinearWavesParameterizedMpi, + SoundWaveRightMovingCorrectInputExpectCorrectOutput) +{ + // Specific to this test + double const waveSpeed = 1.; + int const numTimeSteps = 214; + + double const rEigenVec_rho = 1; + double const rEigenVec_MomentumX = 1; + double const rEigenVec_MomentumY = 1; + double const rEigenVec_MomentumZ = 1; + double const rEigenVec_E = 1.5; + + // Set the launch parameters + setLaunchParams(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, + rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E); + + // Set the number of MPI ranks + waveTest.numMpiRanks = GetParam(); + + // Set the number of timesteps + waveTest.setFiducialNumTimeSteps(numTimeSteps); + + // Check Results + waveTest.runL1ErrorTest(2*allowedL1Error, allowedError); +} + +TEST_P(tHYDROtMHDSYSTEMLinearWavesParameterizedMpi, + SoundWaveLeftMovingCorrectInputExpectCorrectOutput) +{ + // Specific to this test + double const waveSpeed = 1.; + int const numTimeSteps = 214; + + double const rEigenVec_rho = 1; + double const rEigenVec_MomentumX = -1; + double const rEigenVec_MomentumY = 1; + double const rEigenVec_MomentumZ = 1; + double const rEigenVec_E = 1.5; + + // Set the launch parameters + setLaunchParams(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, + rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E); + + // Set the number of MPI ranks + waveTest.numMpiRanks = GetParam(); + + // Set the number of timesteps + waveTest.setFiducialNumTimeSteps(numTimeSteps); + + // Check Results + waveTest.runL1ErrorTest(2*allowedL1Error, allowedError); +} + +// Contact Waves Moving Left and Right +// =================================== +TEST_P(tHYDROtMHDSYSTEMLinearWavesParameterizedMpi, + HydroContactWaveCorrectInputExpectCorrectOutput) +{ + // Specific to this test + double const waveSpeed = 1.0; + int const numTimeSteps = 427; + + double const rEigenVec_rho = 1; + double const rEigenVec_MomentumX = 1; + double const rEigenVec_MomentumY = 0; + double const rEigenVec_MomentumZ = 0; + double const rEigenVec_E = 0.5; + double const velocityX = waveSpeed; + + // Set the launch parameters + setLaunchParams(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, + rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E, + velocityX); + + // Set the number of MPI ranks + waveTest.numMpiRanks = GetParam(); + + // Set the number of timesteps + waveTest.setFiducialNumTimeSteps(numTimeSteps); + + // Check Results + waveTest.runL1ErrorTest(allowedL1Error, allowedError); +} + +INSTANTIATE_TEST_SUITE_P(, + tHYDROtMHDSYSTEMLinearWavesParameterizedMpi, + ::testing::Values(1)); +/// @} +// ============================================================================= \ No newline at end of file diff --git a/src/system_tests/input_files/blank_settings_file.txt b/src/system_tests/input_files/blank_settings_file.txt new file mode 100644 index 000000000..e8fbd7e77 --- /dev/null +++ b/src/system_tests/input_files/blank_settings_file.txt @@ -0,0 +1,3 @@ +# This is blank file for system tests that are setting all the parameters +# internally to point at. Without a blank file cholla will crash + diff --git a/src/system_tests/input_files/tHYDROSYSTEMConstant_CorrectInputExpectCorrectOutput.txt b/src/system_tests/input_files/tHYDROtMHDSYSTEMConstant_CorrectInputExpectCorrectOutput.txt similarity index 100% rename from src/system_tests/input_files/tHYDROSYSTEMConstant_CorrectInputExpectCorrectOutput.txt rename to src/system_tests/input_files/tHYDROtMHDSYSTEMConstant_CorrectInputExpectCorrectOutput.txt diff --git a/src/system_tests/input_files/tHYDROSYSTEMSoundWave3D_CorrectInputExpectCorrectOutput.txt b/src/system_tests/input_files/tHYDROtMHDSYSTEMSoundWave3D_CorrectInputExpectCorrectOutput.txt similarity index 70% rename from src/system_tests/input_files/tHYDROSYSTEMSoundWave3D_CorrectInputExpectCorrectOutput.txt rename to src/system_tests/input_files/tHYDROtMHDSYSTEMSoundWave3D_CorrectInputExpectCorrectOutput.txt index f1c23ea6e..efdedaceb 100644 --- a/src/system_tests/input_files/tHYDROSYSTEMSoundWave3D_CorrectInputExpectCorrectOutput.txt +++ b/src/system_tests/input_files/tHYDROtMHDSYSTEMSoundWave3D_CorrectInputExpectCorrectOutput.txt @@ -14,7 +14,7 @@ tout=0.05 # time interval for output outstep=0.05 # name of initial conditions -init=Sound_Wave +init=Linear_Wave # domain properties xmin=0.0 ymin=0.0 @@ -34,18 +34,31 @@ outdir=./ ################################################# # Parameters for linear wave problems -# initial density +# initial density rho=1.0 -# velocity in the x direction +# velocity in the x direction vx=0 # velocity in the y direction vy=0 # velocity in the z direction vz=0 -# initial pressure +# initial pressure P=0.6 # amplitude of perturbing oscillations A=1e-5 # value of gamma gamma=1.666666666666667 +# The right eigenvectors to set the wave properly +rEigenVec_rho=1 +rEigenVec_MomentumX=1 +rEigenVec_MomentumY=1 +rEigenVec_MomentumZ=1 +rEigenVec_E=1.5 +# Set the magnetic field quantities to zero +Bx=0 +By=0 +Bz=0 +rEigenVec_Bx=0 +rEigenVec_By=0 +rEigenVec_Bz=0 \ No newline at end of file diff --git a/src/system_tests/input_files/tMHDSYSTEMConstantParameterizedMpi_MagneticFieldCorrectInputExpectCorrectOutput.txt b/src/system_tests/input_files/tMHDSYSTEMConstantParameterizedMpi_MagneticFieldCorrectInputExpectCorrectOutput.txt new file mode 100644 index 000000000..eabea0e60 --- /dev/null +++ b/src/system_tests/input_files/tMHDSYSTEMConstantParameterizedMpi_MagneticFieldCorrectInputExpectCorrectOutput.txt @@ -0,0 +1,50 @@ +# +# Parameter File for 3D box filled with gas +# + +################################################ +# number of grid cells in the x dimension +nx=16 +# number of grid cells in the y dimension +ny=16 +# number of grid cells in the z dimension +nz=16 +# final output time +tout=100000.0 +# time interval for output +outstep=100000.0 +# name of initial conditions +init=Constant +# domain properties +xmin=0.0 +ymin=0.0 +zmin=0.0 +xlen=1.0 +ylen=1.0 +zlen=1.0 +# type of boundary conditions +xl_bcnd=1 +xu_bcnd=1 +yl_bcnd=1 +yu_bcnd=1 +zl_bcnd=1 +zu_bcnd=1 +# path to output directory +outdir=./ + +################################################# +# density +rho=1e4 +# velocity +vx=0 +vy=0 +vz=0 +# pressure +P=1.380658e-5 +# Magnetic Field +Bx=1.0e-5 +By=2.0e-5 +Bz=3.0e-5 +# value of gamma +gamma=1.666666667 + diff --git a/src/system_tests/input_files/tMHDSYSTEMConstantParameterizedMpi_ZeroMagneticFieldCorrectInputExpectCorrectOutput.txt b/src/system_tests/input_files/tMHDSYSTEMConstantParameterizedMpi_ZeroMagneticFieldCorrectInputExpectCorrectOutput.txt new file mode 100644 index 000000000..4f52b7cd6 --- /dev/null +++ b/src/system_tests/input_files/tMHDSYSTEMConstantParameterizedMpi_ZeroMagneticFieldCorrectInputExpectCorrectOutput.txt @@ -0,0 +1,50 @@ +# +# Parameter File for 3D box filled with gas +# + +################################################ +# number of grid cells in the x dimension +nx=16 +# number of grid cells in the y dimension +ny=16 +# number of grid cells in the z dimension +nz=16 +# final output time +tout=100000.0 +# time interval for output +outstep=100000.0 +# name of initial conditions +init=Constant +# domain properties +xmin=0.0 +ymin=0.0 +zmin=0.0 +xlen=1.0 +ylen=1.0 +zlen=1.0 +# type of boundary conditions +xl_bcnd=1 +xu_bcnd=1 +yl_bcnd=1 +yu_bcnd=1 +zl_bcnd=1 +zu_bcnd=1 +# path to output directory +outdir=./ + +################################################# +# density +rho=1e4 +# velocity +vx=0 +vy=0 +vz=0 +# pressure +P=1.380658e-5 +# Magnetic Field +Bx=0.0 +By=0.0 +Bz=0.0 +# value of gamma +gamma=1.666666667 + diff --git a/src/system_tests/input_files/tMHDSYSTEMEinfeldtStrongRarefaction_CorrectInputExpectCorrectOutput.txt b/src/system_tests/input_files/tMHDSYSTEMEinfeldtStrongRarefaction_CorrectInputExpectCorrectOutput.txt new file mode 100644 index 000000000..3e4747551 --- /dev/null +++ b/src/system_tests/input_files/tMHDSYSTEMEinfeldtStrongRarefaction_CorrectInputExpectCorrectOutput.txt @@ -0,0 +1,71 @@ +# +# Parameter File for 3D Einfeldt Strong Rarefaction MHD test +# Citation: Einfeldt et al. 1991 "On Godunov-Type Methods near Low Densities" +# + +################################################ +# number of grid cells in the x dimension +nx=32 +# number of grid cells in the y dimension +ny=32 +# number of grid cells in the z dimension +nz=32 +# final output time +tout=0.16 +# time interval for output +outstep=0.16 +# name of initial conditions +init=Riemann + +# domain properties +xmin=0.0 +ymin=0.0 +zmin=0.0 +xlen=1.0 +ylen=1.0 +zlen=1.0 + +# type of boundary conditions +xl_bcnd=3 +xu_bcnd=3 +yl_bcnd=3 +yu_bcnd=3 +zl_bcnd=3 +zu_bcnd=3 + +# path to output directory +outdir=./ + +################################################# +# Parameters for 1D Riemann problems +# density of left state +rho_l=1.0 +# velocity of left state +vx_l=-2.0 +vy_l=0.0 +vz_l=0.0 +# pressure of left state +P_l=0.45 +# Magnetic field of the left state +Bx_l=0.0 +By_l=0.5 +Bz_l=0.0 + +# density of right state +rho_r=1.0 +# velocity of right state +vx_r=2.0 +vy_r=0.0 +vz_r=0.0 +# pressure of right state +P_r=0.45 +# Magnetic field of the right state +Bx_r=0.0 +By_r=0.5 +Bz_r=0.0 + +# location of initial discontinuity +diaph=0.5 +# value of gamma +gamma=1.4 + diff --git a/src/system_tests/input_files/tMHDSYSTEMSodShockTubeParameterizedMpi_CorrectInputExpectCorrectOutput.txt b/src/system_tests/input_files/tMHDSYSTEMSodShockTubeParameterizedMpi_CorrectInputExpectCorrectOutput.txt new file mode 100644 index 000000000..71dd9bd91 --- /dev/null +++ b/src/system_tests/input_files/tMHDSYSTEMSodShockTubeParameterizedMpi_CorrectInputExpectCorrectOutput.txt @@ -0,0 +1,57 @@ +# +# Parameter File for 3D Sod Shock tube +# + +################################################ +# number of grid cells in the x dimension +nx=64 +# number of grid cells in the y dimension +ny=64 +# number of grid cells in the z dimension +nz=64 +# final output time +tout=0.2 +# time interval for output +outstep=0.2 +# name of initial conditions +init=Riemann +# domain properties +xmin=0.0 +ymin=0.0 +zmin=0.0 +xlen=1.0 +ylen=1.0 +zlen=1.0 +# type of boundary conditions +xl_bcnd=3 +xu_bcnd=3 +yl_bcnd=0 +yu_bcnd=0 +zl_bcnd=0 +zu_bcnd=0 +# path to output directory +outdir=./ + +################################################# +# Parameters for 1D Riemann problems +# density of left state +rho_l=1.0 +# velocity of left state +vx_l=0.0 +vy_l=0.0 +vz_l=0.0 +# pressure of left state +P_l=1.0 +# density of right state +rho_r=0.1 +# velocity of right state +vx_r=0.0 +vy_r=0.0 +vz_r=0.0 +# pressure of right state +P_r=0.1 +# location of initial discontinuity +diaph=0.5 +# value of gamma +gamma=1.4 + diff --git a/src/system_tests/mhd_system_tests.cpp b/src/system_tests/mhd_system_tests.cpp new file mode 100644 index 000000000..39cec0b89 --- /dev/null +++ b/src/system_tests/mhd_system_tests.cpp @@ -0,0 +1,636 @@ +/*! + * \file mhd_system_tests.cpp + * \author Robert 'Bob' Caddy (rvc@pitt.edu) + * \brief Contains all the system tests for the MHD build type + * + */ + +// STL includes +#include + +// External Libraries and Headers +#include + +// Local includes +#include "../system_tests/system_tester.h" +#include "../io/io.h" + +// ============================================================================= +// Test Suite: tMHDSYSTEMConstantParameterizedMpi +// ============================================================================= +/*! + * \defgroup tMHDSYSTEMConstantParameterizedMpi + * \brief Test the constant initial conditions as a parameterized test + * with varying numbers of MPI ranks + * + */ +/// @{ +class tMHDSYSTEMConstantParameterizedMpi + :public + ::testing::TestWithParam +{ +protected: + systemTest::SystemTestRunner constantTest; +}; + +// Test with all mangetic fields set to zero +TEST_P(tMHDSYSTEMConstantParameterizedMpi, + ZeroMagneticFieldCorrectInputExpectCorrectOutput) +{ + constantTest.numMpiRanks = GetParam(); + constantTest.runTest(); +} + +// Test with all mangetic fields set to one +TEST_P(tMHDSYSTEMConstantParameterizedMpi, + MagneticFieldCorrectInputExpectCorrectOutput) +{ + constantTest.numMpiRanks = GetParam(); + constantTest.runTest(); +} + +INSTANTIATE_TEST_SUITE_P(, + tMHDSYSTEMConstantParameterizedMpi, + ::testing::Values(1, 2, 4)); +/// @} +// ============================================================================= + +// ============================================================================= +// Test Suite: tMHDSYSTEMLinearWavesParameterizedAngle +// ============================================================================= +/*! + * \defgroup tMHDSYSTEMLinearWavesParameterizedAngle + * \brief Test the linear waves initial conditions as a parameterized test + * with varying angles. Details in Gardiner & Stone 2008 + * + */ +/// @{ +class tMHDSYSTEMLinearWavesParameterizedAngle + :public + ::testing::TestWithParam> +{ +public: + tMHDSYSTEMLinearWavesParameterizedAngle() + : waveTest(false, true, false, false){}; +protected: + systemTest::SystemTestRunner waveTest; + + #ifdef PCM + double const allowedL1Error = 4E-7; // Based on results in Gardiner & Stone 2008 + double const allowedError = 4E-7; + #else //PCM + double const allowedL1Error = 1E-7; // Based on results in Gardiner & Stone 2008 + double const allowedError = 1E-7; + #endif //PCM + + void setLaunchParams(double const &waveSpeed, double const &rEigenVec_rho, + double const &rEigenVec_MomentumX, double const &rEigenVec_MomentumY, + double const &rEigenVec_MomentumZ, double const &rEigenVec_E, + double const &rEigenVec_Bx, double const &rEigenVec_By, + double const &rEigenVec_Bz, double const &pitch, + double const &yaw, double const &domain, + int const &domain_direction, double const &vx=0.0) + { + // Constant for all tests + size_t const N = 32; + double const gamma = 5./3.; + double const tOut = 2*domain / waveSpeed; + + // Define vector values + double x_len=domain, y_len=domain, z_len=domain; + int nx=N, ny=N, nz=N; + double vx_rot=vx, vy_rot=0, vz_rot=0; + double Bx_rot=1, By_rot=1.5, Bz_rot=0; + + double rEigenVec_Bx_rot = rEigenVec_Bx; + double rEigenVec_By_rot = rEigenVec_By; + double rEigenVec_Bz_rot = rEigenVec_Bz; + + double rEigenVec_MomentumX_rot = rEigenVec_MomentumX; + double rEigenVec_MomentumY_rot = rEigenVec_MomentumY; + double rEigenVec_MomentumZ_rot = rEigenVec_MomentumZ; + + switch (domain_direction) + { + case 1: + x_len *= 2; + nx *= 2; + break; + case 2: // swap X and Y + y_len *= 2; + ny *= 2; + std::swap(vx_rot, vy_rot); + std::swap(Bx_rot, By_rot); + std::swap(rEigenVec_Bx_rot, rEigenVec_By_rot); + std::swap(rEigenVec_MomentumX_rot, rEigenVec_MomentumY_rot); + break; + case 3: // swap X and Z + z_len *= 2; + nz *= 2; + std::swap(vx_rot, vz_rot); + std::swap(Bx_rot, Bz_rot); + std::swap(rEigenVec_Bx_rot, rEigenVec_Bz_rot); + std::swap(rEigenVec_MomentumX_rot, rEigenVec_MomentumZ_rot); + break; + default: + throw std::invalid_argument("Invalid value of domain_direction given to setLaunchParams"); + break; + } + + // Settings + waveTest.chollaLaunchParams.append(" nx=" + to_string_exact(nx)); + waveTest.chollaLaunchParams.append(" ny=" + to_string_exact(ny)); + waveTest.chollaLaunchParams.append(" nz=" + to_string_exact(nz)); + waveTest.chollaLaunchParams.append(" tout=" + to_string_exact(tOut)); + waveTest.chollaLaunchParams.append(" outstep=" + to_string_exact(tOut)); + waveTest.chollaLaunchParams.append(" init=Linear_Wave"); + waveTest.chollaLaunchParams.append(" xmin=0.0"); + waveTest.chollaLaunchParams.append(" ymin=0.0"); + waveTest.chollaLaunchParams.append(" zmin=0.0"); + waveTest.chollaLaunchParams.append(" xlen=" + to_string_exact(x_len)); + waveTest.chollaLaunchParams.append(" ylen=" + to_string_exact(y_len)); + waveTest.chollaLaunchParams.append(" zlen=" + to_string_exact(z_len)); + waveTest.chollaLaunchParams.append(" xl_bcnd=1"); + waveTest.chollaLaunchParams.append(" xu_bcnd=1"); + waveTest.chollaLaunchParams.append(" yl_bcnd=1"); + waveTest.chollaLaunchParams.append(" yu_bcnd=1"); + waveTest.chollaLaunchParams.append(" zl_bcnd=1"); + waveTest.chollaLaunchParams.append(" zu_bcnd=1"); + waveTest.chollaLaunchParams.append(" rho=1.0"); + waveTest.chollaLaunchParams.append(" vx=" + to_string_exact(vx_rot)); + waveTest.chollaLaunchParams.append(" vy=" + to_string_exact(vy_rot)); + waveTest.chollaLaunchParams.append(" vz=" + to_string_exact(vz_rot)); + waveTest.chollaLaunchParams.append(" P=" + to_string_exact(1/gamma)); + waveTest.chollaLaunchParams.append(" Bx=" + to_string_exact(Bx_rot)); + waveTest.chollaLaunchParams.append(" By=" + to_string_exact(By_rot)); + waveTest.chollaLaunchParams.append(" Bz=" + to_string_exact(Bz_rot)); + waveTest.chollaLaunchParams.append(" A='1e-6'"); + waveTest.chollaLaunchParams.append(" gamma=" + to_string_exact(gamma)); + waveTest.chollaLaunchParams.append(" rEigenVec_rho=" + to_string_exact(rEigenVec_rho)); + waveTest.chollaLaunchParams.append(" rEigenVec_MomentumX=" + to_string_exact(rEigenVec_MomentumX_rot)); + waveTest.chollaLaunchParams.append(" rEigenVec_MomentumY=" + to_string_exact(rEigenVec_MomentumY_rot)); + waveTest.chollaLaunchParams.append(" rEigenVec_MomentumZ=" + to_string_exact(rEigenVec_MomentumZ_rot)); + waveTest.chollaLaunchParams.append(" rEigenVec_E=" + to_string_exact(rEigenVec_E)); + waveTest.chollaLaunchParams.append(" rEigenVec_Bx=" + to_string_exact(rEigenVec_Bx_rot)); + waveTest.chollaLaunchParams.append(" rEigenVec_By=" + to_string_exact(rEigenVec_By_rot)); + waveTest.chollaLaunchParams.append(" rEigenVec_Bz=" + to_string_exact(rEigenVec_Bz_rot)); + waveTest.chollaLaunchParams.append(" pitch=" + to_string_exact(pitch)); + waveTest.chollaLaunchParams.append(" yaw=" + to_string_exact(yaw)); + } +}; + +// Fast Magnetosonic Waves Moving Left and Right +// ============================================= +TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, + FastMagnetosonicWaveRightMovingCorrectInputExpectCorrectOutput) +{ + // Specific to this test + double const waveSpeed = 2.; + std::vector const numTimeSteps = {214, 204, 220}; + + double const prefix = 1./(2*std::sqrt(5)); + double const rEigenVec_rho = prefix * 2; + double const rEigenVec_MomentumX = prefix * 4; + double const rEigenVec_MomentumY = prefix * -2; // + for left wave + double const rEigenVec_MomentumZ = prefix * 0; + double const rEigenVec_Bx = prefix * 0; + double const rEigenVec_By = prefix * 4; + double const rEigenVec_Bz = prefix * 0; + double const rEigenVec_E = prefix * 9; + + // Get the test parameters + auto [pitch, yaw, domain, domain_direction] = GetParam(); + + // Set the launch parameters + setLaunchParams(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, + rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E, + rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, + domain_direction); + + // Set the number of timesteps + waveTest.setFiducialNumTimeSteps(numTimeSteps[domain_direction-1]); + + // Check Results + #ifdef PCM + waveTest.runL1ErrorTest(4.2E-7, 5.4E-7); + #else //PCM + waveTest.runL1ErrorTest(allowedL1Error, allowedError); + #endif //PCM +} + +TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, + FastMagnetosonicWaveLeftMovingCorrectInputExpectCorrectOutput) +{ + // Specific to this test + double const waveSpeed = 2.; + std::vector const numTimeSteps = {214, 204, 220}; + + double const prefix = 1./(2*std::sqrt(5)); + double const rEigenVec_rho = prefix * 2; + double const rEigenVec_MomentumX = prefix * -4; + double const rEigenVec_MomentumY = prefix * 2; + double const rEigenVec_MomentumZ = prefix * 0; + double const rEigenVec_Bx = prefix * 0; + double const rEigenVec_By = prefix * 4; + double const rEigenVec_Bz = prefix * 0; + double const rEigenVec_E = prefix * 9; + + // Get the test parameters + auto [pitch, yaw, domain, domain_direction] = GetParam(); + + // Set the launch parameters + setLaunchParams(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, + rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E, + rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, + domain_direction); + + // Set the number of timesteps + waveTest.setFiducialNumTimeSteps(numTimeSteps[domain_direction-1]); + + // Check Results + #ifdef PCM + waveTest.runL1ErrorTest(4.2E-7, 5.4E-7); + #else //PCM + waveTest.runL1ErrorTest(allowedL1Error, allowedError); + #endif //PCM +} + +// Slow Magnetosonic Waves Moving Left and Right +// ============================================= +TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, + SlowMagnetosonicWaveRightMovingCorrectInputExpectCorrectOutput) +{ + // Specific to this test + double const waveSpeed = 0.5; + std::vector const numTimeSteps = {854, 813, 880}; + + double const prefix = 1./(2*std::sqrt(5)); + double const rEigenVec_rho = prefix * 4; + double const rEigenVec_MomentumX = prefix * 2; + double const rEigenVec_MomentumY = prefix * 4; + double const rEigenVec_MomentumZ = prefix * 0; + double const rEigenVec_Bx = prefix * 0; + double const rEigenVec_By = prefix * -2; + double const rEigenVec_Bz = prefix * 0; + double const rEigenVec_E = prefix * 3; + + // Get the test parameters + auto [pitch, yaw, domain, domain_direction] = GetParam(); + + // Set the launch parameters + setLaunchParams(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, + rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E, + rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, + domain_direction); + + // Set the number of timesteps + waveTest.setFiducialNumTimeSteps(numTimeSteps[domain_direction-1]); + + // Check Results + waveTest.runL1ErrorTest(allowedL1Error, allowedError); +} + +TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, + SlowMagnetosonicWaveLeftMovingCorrectInputExpectCorrectOutput) +{ + // Specific to this test + double const waveSpeed = 0.5; + std::vector const numTimeSteps = {854, 813, 880}; + + double const prefix = 1./(2*std::sqrt(5)); + double const rEigenVec_rho = prefix * 4; + double const rEigenVec_MomentumX = prefix * -2; + double const rEigenVec_MomentumY = prefix * -4; + double const rEigenVec_MomentumZ = prefix * 0; + double const rEigenVec_Bx = prefix * 0; + double const rEigenVec_By = prefix * -2; + double const rEigenVec_Bz = prefix * 0; + double const rEigenVec_E = prefix * 3; + + // Get the test parameters + auto [pitch, yaw, domain, domain_direction] = GetParam(); + + // Set the launch parameters + setLaunchParams(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, + rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E, + rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, + domain_direction); + + // Set the number of timesteps + waveTest.setFiducialNumTimeSteps(numTimeSteps[domain_direction-1]); + + // Check Results + waveTest.runL1ErrorTest(allowedL1Error, allowedError); +} + +// Alfven Waves Moving Left and Right +// ============================================= +TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, + AlfvenWaveRightMovingCorrectInputExpectCorrectOutput) +{ + // Specific to this test + double const waveSpeed = 1.0; + std::vector const numTimeSteps = {427, 407, 440}; + + double const rEigenVec_rho = 0; + double const rEigenVec_MomentumX = 0; + double const rEigenVec_MomentumY = 0; + double const rEigenVec_MomentumZ = -1; + double const rEigenVec_Bx = 0; + double const rEigenVec_By = 0; + double const rEigenVec_Bz = 1; + double const rEigenVec_E = 0; + + // Get the test parameters + auto [pitch, yaw, domain, domain_direction] = GetParam(); + + // Set the launch parameters + setLaunchParams(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, + rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E, + rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, + domain_direction); + + // Set the number of timesteps + waveTest.setFiducialNumTimeSteps(numTimeSteps[domain_direction-1]); + + // Check Results + waveTest.runL1ErrorTest(allowedL1Error, allowedError); +} + +TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, + AlfvenWaveLeftMovingCorrectInputExpectCorrectOutput) +{ + // Specific to this test + double const waveSpeed = 1.0; + std::vector const numTimeSteps = {427, 407, 440}; + + double const rEigenVec_rho = 0; + double const rEigenVec_MomentumX = 0; + double const rEigenVec_MomentumY = 0; + double const rEigenVec_MomentumZ = 1; + double const rEigenVec_Bx = 0; + double const rEigenVec_By = 0; + double const rEigenVec_Bz = 1; + double const rEigenVec_E = 0; + + // Get the test parameters + auto [pitch, yaw, domain, domain_direction] = GetParam(); + + // Set the launch parameters + setLaunchParams(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, + rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E, + rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, + domain_direction); + + // Set the number of timesteps + waveTest.setFiducialNumTimeSteps(numTimeSteps[domain_direction-1]); + + // Check Results + waveTest.runL1ErrorTest(allowedL1Error, allowedError); +} + +// Contact Wave Moving Right +// =================================== +TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, + MHDContactWaveCorrectInputExpectCorrectOutput) +{ + // Specific to this test + double const waveSpeed = 1.0; + std::vector const numTimeSteps = {641, 620, 654}; + + double const rEigenVec_rho = 1; + double const rEigenVec_MomentumX = 1; + double const rEigenVec_MomentumY = 0; + double const rEigenVec_MomentumZ = 0; + double const rEigenVec_Bx = 0; + double const rEigenVec_By = 0; + double const rEigenVec_Bz = 0; + double const rEigenVec_E = 0.5; + double const velocityX = waveSpeed; + + // Get the test parameters + auto [pitch, yaw, domain, domain_direction] = GetParam(); + + // Set the launch parameters + setLaunchParams(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, + rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E, + rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, + domain_direction, velocityX); + + // Set the number of timesteps + waveTest.setFiducialNumTimeSteps(numTimeSteps[domain_direction-1]); + + // Check Results + #ifdef PCM + waveTest.runL1ErrorTest(1.35*allowedL1Error, 1.35*allowedError); + #else //PCM + waveTest.runL1ErrorTest(allowedL1Error, allowedError); + #endif //PCM +} + +INSTANTIATE_TEST_SUITE_P(, + tMHDSYSTEMLinearWavesParameterizedAngle, + ::testing::Values( + std::make_tuple(0.0*M_PI, 0.0*M_PI, 0.5, 1), + std::make_tuple(0.0*M_PI, 0.5*M_PI, 0.5, 2), + std::make_tuple(0.5*M_PI, 0.0*M_PI, 0.5, 3) + //std::make_tuple(std::asin(2./3.), std::asin(2./std::sqrt(5.)), 1.5, 1) + )); +/// @} +// ============================================================================= + +// ============================================================================= +// Test Suite: tMHDSYSTEMSodShockTube +// TODO: This is temporary. Remove once PPMP is implemented for MHD and replace +// with the hydro sod test +// ============================================================================= +/*! + * \defgroup tMHDSYSTEMSodShockTubeParameterizedMpi_CorrectInputExpectCorrectOutput + * \brief Test the Sod Shock tube initial conditions as a parameterized test + * with varying numbers of MPI ranks + * + */ +/// @{ +class tMHDSYSTEMSodShockTubeParameterizedMpi + :public + ::testing::TestWithParam +{ +protected: + systemTest::SystemTestRunner sodTest; +}; + +TEST_P(tMHDSYSTEMSodShockTubeParameterizedMpi, + CorrectInputExpectCorrectOutput) +{ + sodTest.numMpiRanks = GetParam(); + sodTest.runTest(); +} + +INSTANTIATE_TEST_SUITE_P(CorrectInputExpectCorrectOutput, + tMHDSYSTEMSodShockTubeParameterizedMpi, + ::testing::Values(1, 2, 4)); +/// @} +// ============================================================================= + +// ============================================================================= +// Test Suite: tMHDSYSTEMEinfeldtStrongRarefaction +// ============================================================================= +TEST(tMHDSYSTEMEinfeldtStrongRarefaction, + CorrectInputExpectCorrectOutput) +{ + systemTest::SystemTestRunner rarefactionTest; + rarefactionTest.runTest(); +} +// ============================================================================= + +// ============================================================================= +// Test Suite: tMHDSYSTEMLinearWavesParameterizedMpi +// ============================================================================= +/*! + * \defgroup tMHDSYSTEMLinearWavesParameterizedMpi + * \brief Test the linear waves initial conditions as a parameterized test + * with varying numbers of MPI ranks. Details in Gardiner & Stone 2008 + * + */ +/// @{ +class tMHDSYSTEMLinearWavesParameterizedMpi + :public + ::testing::TestWithParam +{ +public: + tMHDSYSTEMLinearWavesParameterizedMpi() + : waveTest(false, true, false, false){}; +protected: + systemTest::SystemTestRunner waveTest; + + #ifdef PCM + double const allowedL1Error = 4E-7; // Based on results in Gardiner & Stone 2008 + double const allowedError = 4E-7; + #else //PCM + double const allowedL1Error = 1E-7; // Based on results in Gardiner & Stone 2008 + double const allowedError = 1E-7; + #endif //PCM + + void setLaunchParams(double const &waveSpeed, double const &rEigenVec_rho, + double const &rEigenVec_MomentumX, double const &rEigenVec_MomentumY, + double const &rEigenVec_MomentumZ, double const &rEigenVec_E, + double const &rEigenVec_Bx, double const &rEigenVec_By, + double const &rEigenVec_Bz) + { + // Constant for all tests + size_t const N = 32; + double const gamma = 5./3.; + double const domain = 0.5; + double const tOut = 2*domain / waveSpeed; + + // Settings + waveTest.chollaLaunchParams.append(" nx=" + to_string_exact(2*N)); + waveTest.chollaLaunchParams.append(" ny=" + to_string_exact(N)); + waveTest.chollaLaunchParams.append(" nz=" + to_string_exact(N)); + waveTest.chollaLaunchParams.append(" tout=" + to_string_exact(tOut)); + waveTest.chollaLaunchParams.append(" outstep=" + to_string_exact(tOut)); + waveTest.chollaLaunchParams.append(" init=Linear_Wave"); + waveTest.chollaLaunchParams.append(" xmin=0.0"); + waveTest.chollaLaunchParams.append(" ymin=0.0"); + waveTest.chollaLaunchParams.append(" zmin=0.0"); + waveTest.chollaLaunchParams.append(" xlen=" + to_string_exact(2*domain)); + waveTest.chollaLaunchParams.append(" ylen=" + to_string_exact(domain)); + waveTest.chollaLaunchParams.append(" zlen=" + to_string_exact(domain)); + waveTest.chollaLaunchParams.append(" xl_bcnd=1"); + waveTest.chollaLaunchParams.append(" xu_bcnd=1"); + waveTest.chollaLaunchParams.append(" yl_bcnd=1"); + waveTest.chollaLaunchParams.append(" yu_bcnd=1"); + waveTest.chollaLaunchParams.append(" zl_bcnd=1"); + waveTest.chollaLaunchParams.append(" zu_bcnd=1"); + waveTest.chollaLaunchParams.append(" rho=1.0"); + waveTest.chollaLaunchParams.append(" vx=0"); + waveTest.chollaLaunchParams.append(" vy=0"); + waveTest.chollaLaunchParams.append(" vz=0"); + waveTest.chollaLaunchParams.append(" P=" + to_string_exact(1/gamma)); + waveTest.chollaLaunchParams.append(" Bx=1"); + waveTest.chollaLaunchParams.append(" By=1.5"); + waveTest.chollaLaunchParams.append(" Bz=0"); + waveTest.chollaLaunchParams.append(" A='1e-6'"); + waveTest.chollaLaunchParams.append(" gamma=" + to_string_exact(gamma)); + waveTest.chollaLaunchParams.append(" rEigenVec_rho=" + to_string_exact(rEigenVec_rho)); + waveTest.chollaLaunchParams.append(" rEigenVec_MomentumX=" + to_string_exact(rEigenVec_MomentumX)); + waveTest.chollaLaunchParams.append(" rEigenVec_MomentumY=" + to_string_exact(rEigenVec_MomentumY)); + waveTest.chollaLaunchParams.append(" rEigenVec_MomentumZ=" + to_string_exact(rEigenVec_MomentumZ)); + waveTest.chollaLaunchParams.append(" rEigenVec_E=" + to_string_exact(rEigenVec_E)); + waveTest.chollaLaunchParams.append(" rEigenVec_Bx=" + to_string_exact(rEigenVec_Bx)); + waveTest.chollaLaunchParams.append(" rEigenVec_By=" + to_string_exact(rEigenVec_By)); + waveTest.chollaLaunchParams.append(" rEigenVec_Bz=" + to_string_exact(rEigenVec_Bz)); + } +}; + +// Slow Magnetosonic Waves Moving Left and Right +// ============================================= +TEST_P(tMHDSYSTEMLinearWavesParameterizedMpi, + SlowMagnetosonicWaveRightMovingCorrectInputExpectCorrectOutput) +{ + // Specific to this test + double const waveSpeed = 0.5; + int const numTimeSteps = 854; + + double const prefix = 1./(2*std::sqrt(5)); + double const rEigenVec_rho = prefix * 4; + double const rEigenVec_MomentumX = prefix * 2; + double const rEigenVec_MomentumY = prefix * 4; + double const rEigenVec_MomentumZ = prefix * 0; + double const rEigenVec_Bx = prefix * 0; + double const rEigenVec_By = prefix * -2; + double const rEigenVec_Bz = prefix * 0; + double const rEigenVec_E = prefix * 3; + + // Get the test parameters + waveTest.numMpiRanks = GetParam(); + + // Set the launch parameters + setLaunchParams(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, + rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E, + rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz); + + // Set the number of timesteps + waveTest.setFiducialNumTimeSteps(numTimeSteps); + + // Check Results + waveTest.runL1ErrorTest(allowedL1Error, allowedError); +} + +TEST_P(tMHDSYSTEMLinearWavesParameterizedMpi, + SlowMagnetosonicWaveLeftMovingCorrectInputExpectCorrectOutput) +{ + // Specific to this test + double const waveSpeed = 0.5; + int const numTimeSteps = 854; + + double const prefix = 1./(2*std::sqrt(5)); + double const rEigenVec_rho = prefix * 4; + double const rEigenVec_MomentumX = prefix * -2; + double const rEigenVec_MomentumY = prefix * -4; + double const rEigenVec_MomentumZ = prefix * 0; + double const rEigenVec_Bx = prefix * 0; + double const rEigenVec_By = prefix * -2; + double const rEigenVec_Bz = prefix * 0; + double const rEigenVec_E = prefix * 3; + + // Get the test parameters + waveTest.numMpiRanks = GetParam(); + + // Set the launch parameters + setLaunchParams(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, + rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E, + rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz); + + // Set the number of timesteps + waveTest.setFiducialNumTimeSteps(numTimeSteps); + + // Check Results + waveTest.runL1ErrorTest(allowedL1Error, allowedError); +} + +INSTANTIATE_TEST_SUITE_P(, + tMHDSYSTEMLinearWavesParameterizedMpi, + ::testing::Values(1, 2, 4)); +/// @} +// ============================================================================= \ No newline at end of file diff --git a/src/system_tests/system_tester.cpp b/src/system_tests/system_tester.cpp index c59e6e770..a2835ce7c 100644 --- a/src/system_tests/system_tester.cpp +++ b/src/system_tests/system_tester.cpp @@ -21,6 +21,7 @@ // Local includes #include "../system_tests/system_tester.h" // Include the header file #include "../utils/testing_utilities.h" +#include "../io/io.h" // ============================================================================= // Public Members @@ -154,8 +155,7 @@ void systemTest::SystemTestRunner::runTest() else { // This is a field data set - testData = loadTestFieldData(dataSetName, - testDims); + testData = loadTestFieldData(dataSetName, testDims); // Get fiducial data fiducialData = _loadFiducialFieldData(dataSetName); } @@ -178,14 +178,11 @@ void systemTest::SystemTestRunner::runTest() // Check for equality and iff not equal return difference double absoluteDiff; int64_t ulpsDiff; - // Fixed epsilon is changed from the default since AMD/Clang - // appear to differ from NVIDIA/GCC/XL by roughly 1E-12 - double fixedEpsilon = 5.0E-12; bool areEqual = testingUtilities::nearlyEqualDbl(fiducialData.at(index), testData.at(index), absoluteDiff, ulpsDiff, - fixedEpsilon); + _fixedEpsilon); ASSERT_TRUE(areEqual) << std::endl << "Difference in " @@ -203,6 +200,136 @@ void systemTest::SystemTestRunner::runTest() } // ============================================================================= +// ============================================================================= +void systemTest::SystemTestRunner::runL1ErrorTest(double const &maxAllowedL1Error, + double const &maxAllowedError) +{ + /// Only run if this variable is set to `true`. Generally this and + /// globalCompareSystemTestResults should only be used for large MPI / tests + /// where the user wishes to separate the execution of cholla and the / + /// comparison of results onto different machines/jobs + if (globalRunCholla) + { + // Launch Cholla. Note that this dumps all console output to the console + // log file as requested by the user. + launchCholla(); + } + + // Check that there is hydro data and no particle data + if (_particleDataExists) + { + std::string errMessage = "Error: SystemTestRunner::runL1ErrorTest does not support particles"; + throw std::runtime_error(errMessage); + } + if (not _hydroDataExists) + { + std::string errMessage = "Error: SystemTestRunner::runL1ErrorTest requires hydro data"; + throw std::runtime_error(errMessage); + } + + /// If set to false then no comparison will be performed. Generally this and + /// globalRunCholla should only be used for large MPI tests where the user + /// wishes to separate the execution of cholla and the comparison of results + /// onto different machines/jobs + if (not globalCompareSystemTestResults) return; + + // Make sure we have all the required data files and open the data files + _testHydroFieldsFileVec.resize(numMpiRanks); + std::vector initialHydroFieldsFileVec(numMpiRanks); + for (size_t fileIndex = 0; fileIndex < numMpiRanks; fileIndex++) + { + // Initial time data + std::string fileName = "/0.h5." + std::to_string(fileIndex); + _checkFileExists(_outputDirectory + fileName); + initialHydroFieldsFileVec[fileIndex].openFile(_outputDirectory + fileName, + H5F_ACC_RDONLY); + + // Final time data + fileName = "/1.h5." + std::to_string(fileIndex); + _checkFileExists(_outputDirectory + fileName); + _testHydroFieldsFileVec[fileIndex].openFile(_outputDirectory + fileName, + H5F_ACC_RDONLY); + } + + // Get the list of test dataset names + _fiducialDataSetNames = _findDataSetNames(initialHydroFieldsFileVec[0]); + _testDataSetNames = _findDataSetNames(_testHydroFieldsFileVec[0]); + + // Start Performing Checks + // ======================= + // Check the number of time steps + if (_compareNumTimeSteps) _checkNumTimeSteps(); + + // Check that the test file has as many, or more, datasets than the fiducial + // file. Provide a warning if the datasets are not the same size + EXPECT_GE(_testDataSetNames.size(), _fiducialDataSetNames.size()) + << std::endl + << "Warning: The test data has " + << _testDataSetNames.size() + << " datasets and the fiducial data has " + << _fiducialDataSetNames.size() + << " datasets" << std::endl << std::endl; + + // Loop over the datasets to be tested + double L2Norm = 0; + double maxError = 0; + for (auto dataSetName: _fiducialDataSetNames) + { + if (dataSetName == "GasEnergy") + { + continue; + } + + // check that the test data has the dataset in it + ASSERT_EQ(std::count(_testDataSetNames.begin(), _testDataSetNames.end(), dataSetName), 1) + << "The test data does not contain the dataset '" + dataSetName + + "' or contains it more than once."; + + // Get data vectors + std::vector initialDims(3,1); + std::vector initialData; + std::vector finalDims(3,1); + std::vector finalData; + + // This is a field data set + initialData = loadTestFieldData(dataSetName, initialDims, initialHydroFieldsFileVec); + // Get fiducial data + finalData = loadTestFieldData(dataSetName, finalDims, _testHydroFieldsFileVec); + + // Check that they're the same length + ASSERT_EQ(initialData.size(), finalData.size()) + << "The initial and final '" + << dataSetName + << "' datasets are not the same length"; + + // Compute the L1 Error. + double L1Error = 0; + for (size_t i = 0; i < initialData.size(); i++) + { + double const diff = std::abs(initialData.at(i) - finalData.at(i)); + L1Error += diff; + maxError = (diff > maxError)? diff: maxError; + } + + L1Error *= (1./static_cast(initialDims[0]*initialDims[1]*initialDims[2])); + L2Norm += L1Error * L1Error; + + // Perform the correctness check + EXPECT_LT(L1Error, maxAllowedL1Error) << "the L1 error for the " + << dataSetName + << " data has exceeded the allowed value"; + } + + // Check the L1 Norm + L2Norm = std::sqrt(L2Norm); + EXPECT_LT(L2Norm, maxAllowedL1Error) + << "the norm of the L1 error vector has exceeded the allowed value"; + + // Check the Max Error + EXPECT_LT(maxError, maxAllowedError) << "The maximum error has exceeded the allowed value"; +} +// ============================================================================= + // ============================================================================= void systemTest::SystemTestRunner::launchCholla() { @@ -250,11 +377,11 @@ void systemTest::SystemTestRunner::setFiducialData(std::string const &fieldName, std::vector const &dataVec) { // First check if there's a fiducial data file - if (_fiducialFileExists) + if (_fiducialDataSets.count(fieldName) > 0) { - std::string errMessage = "Error: Fiducial data file already exists for test '" - + _fullTestFileName - + "' and cannot be overwritten."; + std::string errMessage = "Error: Fiducial dataset for field '" + + fieldName + + "' already exists and cannot be overwritten"; throw std::runtime_error(errMessage); } @@ -333,17 +460,41 @@ systemTest::SystemTestRunner::SystemTestRunner(bool const &particleData, _fullTestFileName = fullTestName.substr(0, fullTestName.find("/")); // Generate the input paths. Strip out everything after a "/" since that - // probably indicates a parameterized test + // probably indicates a parameterized test. Also, check that the files exist + // and load fiducial HDF5 file if required _chollaPath = ::globalChollaRoot.getString() + "/bin/cholla." + ::globalChollaBuild.getString() + "." + ::globalChollaMachine.getString(); - _chollaSettingsPath = ::globalChollaRoot.getString() - + "/src/system_tests/input_files/" - + _fullTestFileName + ".txt"; + _checkFileExists(_chollaPath); + if (useSettingsFile) + { + _chollaSettingsPath = ::globalChollaRoot.getString() + + "/src/system_tests/input_files/" + + _fullTestFileName + ".txt"; + _checkFileExists(_chollaSettingsPath); + } + else + { + _chollaSettingsPath = ::globalChollaRoot.getString() + + "/src/system_tests/input_files/" + + "blank_settings_file.txt"; + _checkFileExists(_chollaSettingsPath); + } + if (useFiducialFile) + { _fiducialFilePath = ::globalChollaRoot.getString() + "/cholla-tests-data/system_tests/" + _fullTestFileName + ".h5"; + _checkFileExists(_fiducialFilePath); + _fiducialFile.openFile(_fiducialFilePath, H5F_ACC_RDONLY); + _fiducialDataSetNames = _findDataSetNames(_fiducialFile); + _fiducialFileExists = true; + } + else + { + _fiducialFilePath = ""; + } // Generate output paths, these files don't exist yet _outputDirectory = ::globalChollaRoot.getString() + "/bin/" + fullTestName; @@ -360,17 +511,6 @@ systemTest::SystemTestRunner::SystemTestRunner(bool const &particleData, + "' either already exists or could not be created." << std::endl; } - - // Check that the files exist and load fiducial HDF5 file if required - _checkFileExists(_chollaPath); - if (useSettingsFile) _checkFileExists(_chollaSettingsPath); - if (useFiducialFile) - { - _checkFileExists(_fiducialFilePath); - _fiducialFile.openFile(_fiducialFilePath, H5F_ACC_RDONLY); - _fiducialDataSetNames = _findDataSetNames(_fiducialFile); - _fiducialFileExists = true; - }; } // ============================================================================= @@ -463,22 +603,25 @@ void systemTest::SystemTestRunner::_checkNumTimeSteps() // ============================================================================= std::vector systemTest::SystemTestRunner::loadTestFieldData( std::string dataSetName, - std::vector &testDims) + std::vector &testDims, + std::vector file) { - // Get the file we're using - std::vector file; + // Switch which fileset we're using if it's a particle dataset if (dataSetName == "particle_density") { file = _testParticlesFileVec; dataSetName = "density"; } - else + else if (file.size() == 0) { file = _testHydroFieldsFileVec; } - // Get the size of each dimension - H5::Attribute dimensions = file[0].openAttribute("dims"); + // Get the size of each dimension. First check if the field is a magnetic + // field or not to make sure we're retreiving the right dimensions + std::string dimsName = (dataSetName.find("magnetic") != std::string::npos)? + "magnetic_field_dims": "dims"; + H5::Attribute dimensions = file[0].openAttribute(dimsName.c_str()); dimensions.read(H5::PredType::NATIVE_ULONG, testDims.data()); // Allocate the vector @@ -510,7 +653,9 @@ std::vector systemTest::SystemTestRunner::loadTestFieldData( // Get dims_local std::vector dimsLocal(3,1); - H5::Attribute dimsLocalAttr = file[rank].openAttribute("dims_local"); + std::string dimsNameLocal = (dataSetName.find("magnetic") != std::string::npos)? + "magnetic_field_dims_local": "dims_local"; + H5::Attribute dimsLocalAttr = file[rank].openAttribute(dimsNameLocal.c_str()); dimsLocalAttr.read(H5::PredType::NATIVE_INT, dimsLocal.data()); // Now we add the data to the larger vector diff --git a/src/system_tests/system_tester.h b/src/system_tests/system_tester.h index 6d5aa1925..29b8b74d0 100644 --- a/src/system_tests/system_tester.h +++ b/src/system_tests/system_tester.h @@ -72,9 +72,24 @@ class systemTest::SystemTestRunner */ void runTest(); + /*! + * \brief Compute the L1 error for each field compared to the initial + * conditions. Doesn't work with particle data + * + * \param[in] maxAllowedL1Error The maximum allowed L1 error for this test + * \param[in] maxAllowedError The maximum allowed for any value in the test + * + */ + void runL1ErrorTest(double const &maxAllowedL1Error, double const &maxAllowedError=1E-7); + + /*! + * \brief Launch Cholla as it is set up + * + */ void launchCholla(); void openHydroTestData(); + /*! * \brief Get the Cholla Path object * @@ -128,6 +143,13 @@ class systemTest::SystemTestRunner */ std::vector getDataSetsToTest(){return _fiducialDataSetNames;}; + /*! + * \brief Set the Fixed Epsilon value + * + * \param[in] newVal The new value of fixed epsilon + */ + void setFixedEpsilon(double const &newVal){_fixedEpsilon = newVal;}; + /*! * \brief Choose which datasets to test. By default it tests all the * datasets in the fiducial data. A warning will be thrown if not all the @@ -193,10 +215,12 @@ class systemTest::SystemTestRunner * * \param[in] dataSetName The name of the dataset to get * \param[out] testDims An vector with the length of each dimension in it + * \param[in] file (optional) The vector of HDF5 files to load * \return std::vector A vector containing the data */ std::vector loadTestFieldData(std::string dataSetName, - std::vector &testDims); + std::vector &testDims, + std::vector file={}); /*! * \brief Generate a std::vector of the specified size populated by a sine @@ -241,7 +265,7 @@ class systemTest::SystemTestRunner * \param[in] useSettingsFile Indicate if you're using a settings file. If * `true` then the settings file is automatically found based on the naming * convention. If false then the user MUST provide all the required settings - * with the SystemTestRunner::setChollaLaunchParams method + * with the SystemTestRunner::chollaLaunchParams member variable */ SystemTestRunner(bool const &particleData=false, bool const &hydroData=true, @@ -290,6 +314,10 @@ class systemTest::SystemTestRunner /// The total number of particles in the fiducial dataset size_t _fiducialTotalNumParticles=0; + /// Fixed epsilon is changed from the default since AMD/Clang + /// appear to differ from NVIDIA/GCC/XL by roughly 1E-12 + double _fixedEpsilon = 5.0E-12; + /// Flag to indicate if a fiducial HDF5 data file is being used or a /// programmatically generated H5File object. `true` = use a file, `false` = /// use generated H5File object diff --git a/src/utils/DeviceVector.h b/src/utils/DeviceVector.h index 422f3d151..ca0cacba8 100644 --- a/src/utils/DeviceVector.h +++ b/src/utils/DeviceVector.h @@ -1,5 +1,5 @@ /*! - * \file device_vector.h + * \file DeviceVector.h * \author Robert 'Bob' Caddy (rvc@pitt.edu) * \brief Contains the declartion and implementation of the DeviceVector * class. Note that since this is a templated class the implementation must be @@ -48,8 +48,10 @@ namespace cuda_utilities * * \param[in] size The number of elements desired in the array. Can be * any positive integer. + * \param[in] initialize (optional) If true then initialize the GPU + * memory to int(0) */ - DeviceVector(size_t const size) {_allocate(size);} + DeviceVector(size_t const size, bool const initialize=false); /*! * \brief Destroy the Device Vector object by calling the `_deAllocate` @@ -178,7 +180,7 @@ namespace cuda_utilities void _allocate(size_t const size) { _size=size; - CudaSafeCall(cudaMalloc(&_ptr, size*sizeof(T))); + CudaSafeCall(cudaMalloc(&_ptr, _size*sizeof(T))); } /*! @@ -198,11 +200,23 @@ namespace cuda_utilities // ============================================================================= namespace cuda_utilities { - // ========================================================================= // Public Methods // ========================================================================= + // ========================================================================= + template + DeviceVector::DeviceVector(size_t const size, bool const initialize) + { + _allocate(size); + + if (initialize) + { + CudaSafeCall(cudaMemset(_ptr, 0, _size*sizeof(T))); + } + } + // ========================================================================= + // ========================================================================= template void DeviceVector::resize(size_t const newSize) diff --git a/src/utils/DeviceVector_tests.cu b/src/utils/DeviceVector_tests.cu index 26a63dbca..3db21baee 100644 --- a/src/utils/DeviceVector_tests.cu +++ b/src/utils/DeviceVector_tests.cu @@ -1,5 +1,5 @@ /*! - * \file device_vector_tests.cu + * \file DeviceVector_tests.cu * \author Robert 'Bob' Caddy (rvc@pitt.edu) * \brief Tests for the DeviceVector class * @@ -81,7 +81,7 @@ TEST(tALLDeviceVectorDestructor, // Get the pointer information cudaPointerAttributes ptrAttributes; - CudaSafeCall(cudaPointerGetAttributes(&ptrAttributes, devVector.data())); + cudaPointerGetAttributes(&ptrAttributes, devVector.data()); // Warning strings std::string typeMessage = "ptrAttributes.type should be 0 since " @@ -106,6 +106,9 @@ TEST(tALLDeviceVectorDestructor, #endif // O_HIP EXPECT_EQ(nullptr, ptrAttributes.devicePointer) << devPtrMessage; EXPECT_EQ(nullptr, ptrAttributes.hostPointer) << hostPtrMessage; + + // Reconstruct DeviceVector object to avoid error + new (&devVector) cuda_utilities::DeviceVector{vectorSize}; } TEST(tALLDeviceVectorStdVectorHostToDeviceCopyAndIndexing, diff --git a/src/utils/cuda_utilities.h b/src/utils/cuda_utilities.h index 9c07a95a6..3f0ae5fba 100644 --- a/src/utils/cuda_utilities.h +++ b/src/utils/cuda_utilities.h @@ -12,7 +12,6 @@ #include "../global/global_cuda.h" #include "../utils/gpu.hpp" - namespace cuda_utilities { /*! @@ -75,25 +74,61 @@ namespace cuda_utilities } } - // ========================================================================= /*! - * \brief Set the value that `pointer` points at in GPU memory to `value`. - * This only sets the first value in memory so if `pointer` points to an - * array then only `pointer[0]` will be set; i.e. this effectively does - * `pointer = &value` - * - * \tparam T Any scalar type - * \param[in] pointer The location in GPU memory - * \param[in] value The value to set `*pointer` to - */ - template - void setScalarDeviceMemory(T *pointer, T const value) + * \brief Initialize GPU memory + * + * \param[in] ptr The pointer to GPU memory + * \param[in] N The size of the array in bytes + */ + inline void initGpuMemory(Real *ptr, size_t N) { - CudaSafeCall( - cudaMemcpy(pointer, // destination - &value, // source - sizeof(T), - cudaMemcpyHostToDevice)); + CudaSafeCall(cudaMemset(ptr, 0, N)); } - // ========================================================================= -} \ No newline at end of file + + // ===================================================================== + /*! + * \brief Struct to determine the optimal number of blocks and threads + * per block to use when launching a kernel. The member + * variables are `threadsPerBlock` and `numBlocks` which are chosen with + the occupancy API. Can target any device on the system through the + * optional constructor argument. + * NOTE: On AMD there's currently an issue that stops kernels from being + * passed. As a workaround for now this struct just returns the maximum + * number of blocks and threads per block that a MI250X can run at once. + * + */ + template + struct AutomaticLaunchParams + { + public: + /*! + * \brief Construct a new Reduction Launch Params object. By default it + * generates values of numBlocks and threadsPerBlock suitable for a + * kernel with a grid-stride loop. For a kernel with one thread per + * element set the optional `numElements` argument to the number of + * elements + * + * \param[in] kernel The kernel to determine the launch parameters for + * \param[in] numElements The number of elements in the array that + the kernel operates on + */ + AutomaticLaunchParams(T &kernel, size_t numElements=0) + { + cudaOccupancyMaxPotentialBlockSize(&numBlocks, &threadsPerBlock, kernel, 0, 0); + + if (numElements > 0) + { + numBlocks = (numElements + threadsPerBlock - 1) / threadsPerBlock; + } + } + + /// Defaulted Destructor + ~AutomaticLaunchParams()=default; + + /// The maximum number of threads per block that the device supports + int threadsPerBlock; + /// The maximum number of scheduleable blocks on the device + int numBlocks; + }; + // ===================================================================== +} // end namespace cuda_utilities diff --git a/src/utils/cuda_utilities_tests.cpp b/src/utils/cuda_utilities_tests.cpp index ddefebfd7..dc2f20066 100644 --- a/src/utils/cuda_utilities_tests.cpp +++ b/src/utils/cuda_utilities_tests.cpp @@ -120,23 +120,3 @@ TEST(tALLCompute1DIndex, EXPECT_EQ(fiducialId, testId); } // ============================================================================= - -// ============================================================================= -TEST(tALLSetScalarDeviceMemory, - TypeDoubleInputExpectCorrectValueSet) -{ - double value = 173.246; - double *dev_ptr, host_val; - CudaSafeCall(cudaMalloc(&dev_ptr, sizeof(double))); - - cuda_utilities::setScalarDeviceMemory(dev_ptr, value); - - CudaSafeCall( - cudaMemcpy(&host_val, // destination - dev_ptr, // source - sizeof(double), - cudaMemcpyDeviceToHost)); - - EXPECT_EQ(value, host_val); -} -// ============================================================================= diff --git a/src/utils/gpu.hpp b/src/utils/gpu.hpp index 4c285965e..461f9821b 100644 --- a/src/utils/gpu.hpp +++ b/src/utils/gpu.hpp @@ -20,7 +20,7 @@ static void __attribute__((unused)) check(const hipfftResult err, const char *co exit(err); } -#endif // PARIS PARIC_GALACTIC +#endif //CUFFT PARIS PARIS_GALACTIC #define WARPSIZE 64 static constexpr int maxWarpsPerBlock = 1024/WARPSIZE; @@ -65,10 +65,11 @@ static constexpr int maxWarpsPerBlock = 1024/WARPSIZE; #define cudaGetDeviceProperties hipGetDeviceProperties #define cudaPointerAttributes hipPointerAttribute_t #define cudaPointerGetAttributes hipPointerGetAttributes +#define cudaOccupancyMaxPotentialBlockSize hipOccupancyMaxPotentialBlockSize // Texture definitions #define cudaArray hipArray -#define cudaMallocArray hipMallocArray +#define cudaMallocArray hipMallocArray #define cudaFreeArray hipFreeArray #define cudaMemcpyToArray hipMemcpyToArray #define cudaMemcpy2DToArray hipMemcpy2DToArray @@ -87,8 +88,10 @@ static constexpr int maxWarpsPerBlock = 1024/WARPSIZE; #define cudaTextureDesc hipTextureDesc #define cudaAddressModeClamp hipAddressModeClamp #define cudaFilterModeLinear hipFilterModeLinear -#define cudaFilterModePoint hipFilterModePoint +#define cudaFilterModePoint hipFilterModePoint // Texture Definitions +#define cudaPointerAttributes hipPointerAttribute_t +#define cudaPointerGetAttributes hipPointerGetAttributes // FFT definitions #define cufftDestroy hipfftDestroy diff --git a/src/utils/hydro_utilities.h b/src/utils/hydro_utilities.h index 990eb2a83..b89175835 100644 --- a/src/utils/hydro_utilities.h +++ b/src/utils/hydro_utilities.h @@ -71,6 +71,40 @@ namespace hydro_utilities { return P; } + /*! + * \brief Compute the kinetic energy from the density and velocities + * + * \param[in] d The density + * \param[in] vx The x velocity + * \param[in] vy The y velocity + * \param[in] vz The z velocity + * \return Real The kinetic energy + */ + inline __host__ __device__ Real Calc_Kinetic_Energy_From_Velocity(Real const &d, + Real const &vx, + Real const &vy, + Real const &vz) + { + return 0.5 * d * (vx*vx + vy*vy * vz*vz); + } + + /*! + * \brief Compute the kinetic energy from the density and momenta + * + * \param[in] d The density + * \param[in] mx The x momentum + * \param[in] my The y momentum + * \param[in] mz The z momentum + * \return Real The kinetic energy + */ + inline __host__ __device__ Real Calc_Kinetic_Energy_From_Momentum(Real const &d, + Real const &mx, + Real const &my, + Real const &mz) + { + return (0.5 / d) * (mx*mx + my*my * mz*mz); + } + inline __host__ __device__ Real Calc_Sound_Speed(Real const &E, Real const &d, Real const &mx, Real const &my, Real const &mz, Real const &gamma) { Real P = Calc_Pressure_Conserved(E, d, mx, my, mz, gamma); return sqrt(gamma * P / d); diff --git a/src/utils/hydro_utilities_tests.cpp b/src/utils/hydro_utilities_tests.cpp index e8a066d12..e0e3cf455 100644 --- a/src/utils/hydro_utilities_tests.cpp +++ b/src/utils/hydro_utilities_tests.cpp @@ -127,3 +127,45 @@ TEST(tHYDROHydroUtilsGetPressureFromDE, CorrectInputExpectCorrectOutput) { testingUtilities::checkResults(fiducial_Ps.at(i), test_Ps, parameters.names.at(i)); } } + +TEST(tHYDROtMHDCalcKineticEnergyFromVelocity, CorrectInputExpectCorrectOutput) { + TestParams parameters; + std::vector fiducialEnergies{0.0, + 6.307524975350106e-145, + 7.3762470327090601e+249}; + double const coef = 1E-50; + + for (size_t i = 0; i < parameters.names.size(); i++) + { + Real testEnergy = hydro_utilities::Calc_Kinetic_Energy_From_Velocity( + coef*parameters.d.at(i), + coef*parameters.vx.at(i), + coef*parameters.vy.at(i), + coef*parameters.vz.at(i)); + + testingUtilities::checkResults(fiducialEnergies.at(i), + testEnergy, + parameters.names.at(i)); + } +} + +TEST(tHYDROtMHDCalcKineticEnergyFromMomentum, CorrectInputExpectCorrectOutput) { + TestParams parameters; + std::vector fiducialEnergies{0.0, + 0.0, + 7.2568536478335773e+147}; + double const coef = 1E-50; + + for (size_t i = 0; i < parameters.names.size(); i++) + { + Real testEnergy = hydro_utilities::Calc_Kinetic_Energy_From_Momentum( + coef*parameters.d.at(i), + coef*parameters.mx.at(i), + coef*parameters.my.at(i), + coef*parameters.mz.at(i)); + + testingUtilities::checkResults(fiducialEnergies.at(i), + testEnergy, + parameters.names.at(i)); + } +} \ No newline at end of file diff --git a/src/utils/math_utilities.h b/src/utils/math_utilities.h new file mode 100644 index 000000000..735cec996 --- /dev/null +++ b/src/utils/math_utilities.h @@ -0,0 +1,92 @@ +/*! + * \file math_utilities.h + * \author Robert 'Bob' Caddy (rvc@pitt.edu) + * \brief Contains various functions for common mathematical operations + * + */ + +#pragma once + +// STL Includes +#include +#include + +// External Includes + +// Local Includes +#include "../global/global.h" +#include "../global/global_cuda.h" +#include "../utils/gpu.hpp" + +namespace math_utils +{ + // ========================================================================= + /*! + * \brief Rotate cartesian coordinates. All arguments are cast to double + * then rotated. If the type is 'int' then the value is rounded to the + * nearest int + * + * \details Rotation such that when pitch=90 and yaw=0 x1_rot = -x3 and when + * pitch=0 and yaw=90 x1_rot = -x2 + * + * \tparam T The return type + * \param[in] x_1 x1 coordinate + * \param[in] x_2 x2 coordinate + * \param[in] x_3 x3 coordinate + * \param[in] pitch Pitch angle in radians + * \param[in] yaw Yaw angle in radians + * \return std::tuple The new, rotated, coordinates in the + * order . Intended to be captured with structured binding + */ + template + inline std::tuple rotateCoords(Real const &x_1, Real const &x_2, + Real const &x_3, Real const &pitch, Real const &yaw) + { + // Compute the sines and cosines. Correct for floating point errors if + // the angle is 0.5*M_PI + Real const sin_yaw = std::sin(yaw); + Real const cos_yaw = (yaw==0.5*M_PI)? 0: std::cos(yaw); + Real const sin_pitch = std::sin(pitch); + Real const cos_pitch = (pitch==0.5*M_PI)? 0: std::cos(pitch); + + // Perform the rotation + Real const x_1_rot = (x_1 * cos_pitch * cos_yaw) + (x_2 * sin_yaw) + (x_3 * sin_pitch * cos_yaw); + Real const x_2_rot = (x_1 * cos_pitch * sin_yaw) + (x_2 * cos_yaw) + (x_3 * sin_pitch * sin_yaw); + Real const x_3_rot = (x_1 * sin_pitch) + (x_3 * cos_pitch); + + if (std::is_same::value) + { + return {round(x_1_rot), + round(x_2_rot), + round(x_3_rot)}; + } + else if (std::is_same::value) + { + return {x_1_rot, x_2_rot, x_3_rot}; + } + } + // ========================================================================= + + // ========================================================================= + /*! + * \brief Compute the dot product of a and b. + * + * \param[in] a1 The first element of a + * \param[in] a2 The second element of a + * \param[in] a3 The third element of a + * \param[in] b1 The first element of b + * \param[in] b2 The second element of b + * \param[in] b3 The third element of b + * + * \return Real The dot product of a and b + */ + inline __device__ __host__ Real dotProduct(Real const &a1, + Real const &a2, + Real const &a3, + Real const &b1, + Real const &b2, + Real const &b3) + {return a1*b1 + ((a2*b2) + (a3*b3));}; + // ========================================================================= + +}//math_utils diff --git a/src/utils/math_utilities_tests.cpp b/src/utils/math_utilities_tests.cpp new file mode 100644 index 000000000..38a2902d6 --- /dev/null +++ b/src/utils/math_utilities_tests.cpp @@ -0,0 +1,64 @@ +/*! + * \file math_utilities_tests.cpp + * \author Robert 'Bob' Caddy (rvc@pitt.edu) + * \brief Tests for the contents of math_utilities.h + * + */ + +// STL Includes +#include + +// External Includes +#include // Include GoogleTest and related libraries/headers + +// Local Includes +#include "../utils/testing_utilities.h" +#include "../utils/math_utilities.h" +#include "../global/global.h" + +// ============================================================================= +TEST(tALLRotateCoords, + CorrectInputExpectCorrectOutput) +{ + // Fiducial values + double const x_1 = 19.2497333410; + double const x_2 = 60.5197699003; + double const x_3 = 86.0613942621; + double const pitch = 1.239 * M_PI; + double const yaw = 0.171 * M_PI; + double const x_1_rot_fid = -31.565679455456568; + double const x_2_rot_fid = 14.745363873361605; + double const x_3_rot_fid = -76.05402749550727; + + auto [x_1_rot, x_2_rot, x_3_rot] = math_utils::rotateCoords(x_1, x_2, x_3, pitch, yaw); + + testingUtilities::checkResults<0>(x_1_rot_fid, x_1_rot, "x_1 rotated values"); + testingUtilities::checkResults<0>(x_2_rot_fid, x_2_rot, "x_2 rotated values"); + testingUtilities::checkResults<0>(x_3_rot_fid, x_3_rot, "x_3 rotated values"); +} +// ============================================================================= + +// ========================================================================= +/*! + * \brief Test the math_utils::dotProduct function + * + */ +TEST(tALLDotProduct, + CorrectInputExpectCorrectOutput) +{ + std::vector a{21.503067766457753, 48.316634031589935, 81.12177317622657}, + b{38.504606872151484, 18.984145880030045, 89.52561861038686}; + + double const fiducialDotProduct = 9007.6941261535867; + + double testDotProduct; + + testDotProduct = math_utils::dotProduct(a.at(0), a.at(1), a.at(2), + b.at(0), b.at(1), b.at(2)); + + // Now check results + testingUtilities::checkResults(fiducialDotProduct, + testDotProduct, + "dot product"); +} +// ========================================================================= \ No newline at end of file diff --git a/src/utils/mhd_utilities.cpp b/src/utils/mhd_utilities.cpp deleted file mode 100644 index c7747830e..000000000 --- a/src/utils/mhd_utilities.cpp +++ /dev/null @@ -1,18 +0,0 @@ -/*! - * \file mhd_utilities.cpp - * \author Robert 'Bob' Caddy (rvc@pitt.edu) - * \brief Contains the implementation of various utility functions for MHD - * - */ - -// STL Includes - -// External Includes - -// Local Includes -#include "../utils/mhd_utilities.h" - -namespace mhdUtils -{ - -} // end namespace mhdUtils \ No newline at end of file diff --git a/src/utils/mhd_utilities.cu b/src/utils/mhd_utilities.cu new file mode 100644 index 000000000..9e947b6c6 --- /dev/null +++ b/src/utils/mhd_utilities.cu @@ -0,0 +1,25 @@ +/*! + * \file mhd_utilities.cpp + * \author Robert 'Bob' Caddy (rvc@pitt.edu) + * \brief Contains the implementation of various utility functions for MHD and + * for the various kernels, functions, and tools required for the 3D VL+CT MHD + * integrator. Due to the CUDA/HIP compiler requiring that device functions be + * directly accessible to the file they're used in most device functions will be + * implemented in the header file + * + */ + +// STL Includes + +// External Includes + +// Local Includes +#include "../utils/mhd_utilities.h" + +namespace mhd{ +namespace utils +{ + +}//utils + +} // end namespace mhd \ No newline at end of file diff --git a/src/utils/mhd_utilities.h b/src/utils/mhd_utilities.h index f28cbb400..ef64b9536 100644 --- a/src/utils/mhd_utilities.h +++ b/src/utils/mhd_utilities.h @@ -15,14 +15,17 @@ #include "../global/global.h" #include "../global/global_cuda.h" #include "../utils/gpu.hpp" +#include "../utils/cuda_utilities.h" -/*! - * \brief Namespace for MHD utilities - * - */ -namespace mhdUtils -{ - namespace // Anonymouse namespace +namespace mhd{ +namespace utils{ + /*! + * \brief Namespace for functions required by functions within the mhd::utils + * namespace. Everything in this name space should be regarded as private + * but is made accesible for testing + * + */ + namespace _internal { // ===================================================================== /*! @@ -59,11 +62,12 @@ namespace mhdUtils return sqrt( (term1 + waveChoice * term2) / (2.0 * fmax(density, TINY_NUMBER)) ); } // ===================================================================== - }// Anonymouse namespace + }// mhd::utils::_internal namespace // ========================================================================= /*! - * \brief Compute the MHD energy in the cell + * \brief Compute the energy in a cell. If MHD is not defined then simply + * return the hydro only energy * * \param[in] pressure The gas pressure * \param[in] density The density @@ -87,9 +91,13 @@ namespace mhdUtils Real const &gamma) { // Compute and return energy - return (fmax(pressure,TINY_NUMBER)/(gamma - 1.)) - + 0.5 * density * (velocityX*velocityX + ((velocityY*velocityY) + (velocityZ*velocityZ))) - + 0.5 * (magneticX*magneticX + ((magneticY*magneticY) + (magneticZ*magneticZ))); + Real energy = (fmax(pressure,TINY_NUMBER)/(gamma - 1.)) + + 0.5 * density * (velocityX*velocityX + ((velocityY*velocityY) + (velocityZ*velocityZ))); + #ifdef MHD + energy += 0.5 * (magneticX*magneticX + ((magneticY*magneticY) + (magneticZ*magneticZ))); + #endif //MHD + + return energy; } // ========================================================================= @@ -157,6 +165,23 @@ namespace mhdUtils } // ========================================================================= + // ========================================================================= + /*! + * \brief Compute the magnetic energy + * + * \param[in] magneticX The magnetic field in the X-direction + * \param[in] magneticY The magnetic field in the Y-direction + * \param[in] magneticZ The magnetic field in the Z-direction + * \return Real The magnetic energy + */ + inline __host__ __device__ Real computeMagneticEnergy(Real const &magneticX, + Real const &magneticY, + Real const &magneticZ) + { + return 0.5 * (magneticX*magneticX + ((magneticY*magneticY) + (magneticZ*magneticZ))); + } + // ========================================================================= + // ========================================================================= /*! * \brief Compute the total MHD pressure. I.e. magnetic pressure + gas @@ -199,13 +224,13 @@ namespace mhdUtils Real const &gamma) { // Compute the sound speed - return _magnetosonicSpeed(density, - pressure, - magneticX, - magneticY, - magneticZ, - gamma, - 1.0); + return mhd::utils::_internal::_magnetosonicSpeed(density, + pressure, + magneticX, + magneticY, + magneticZ, + gamma, + 1.0); } // ========================================================================= @@ -229,13 +254,13 @@ namespace mhdUtils Real const &gamma) { // Compute the sound speed - return _magnetosonicSpeed(density, - pressure, - magneticX, - magneticY, - magneticZ, - gamma, - -1.0); + return mhd::utils::_internal::_magnetosonicSpeed(density, + pressure, + magneticX, + magneticY, + magneticZ, + gamma, + -1.0); } // ========================================================================= @@ -257,6 +282,7 @@ namespace mhdUtils // ========================================================================= // ========================================================================= + #ifdef MHD /*! * \brief Compute the cell centered average of the magnetic fields in a * given cell @@ -272,23 +298,41 @@ namespace mhdUtils * \param[out] avgBx The cell centered average magnetic field in the x-direction * \param[out] avgBy The cell centered average magnetic field in the y-direction * \param[out] avgBz The cell centered average magnetic field in the z-direction + * + * \return Real local struct with the X, Y, and Z cell centered magnetic + * fields. Intended to be called with structured binding like `auto [x, y, + * z] = mhd::utils::cellCenteredMagneticFields(*args*) */ - inline __host__ __device__ void cellCenteredMagneticFields(Real const *dev_conserved, + inline __host__ __device__ auto cellCenteredMagneticFields(Real const *dev_conserved, size_t const &id, size_t const &xid, size_t const &yid, size_t const &zid, size_t const &n_cells, size_t const &nx, - size_t const &ny, - Real &avgBx, - Real &avgBy, - Real &avgBz) + size_t const &ny) { - avgBx = 0.5 * (dev_conserved[(5+NSCALARS)*n_cells + id] + dev_conserved[(5+NSCALARS)*n_cells + ((xid-1) + yid*nx + zid*nx*ny)]); - avgBy = 0.5 * (dev_conserved[(6+NSCALARS)*n_cells + id] + dev_conserved[(6+NSCALARS)*n_cells + (xid + (yid-1)*nx + zid*nx*ny)]); - avgBz = 0.5 * (dev_conserved[(7+NSCALARS)*n_cells + id] + dev_conserved[(7+NSCALARS)*n_cells + (xid + yid*nx + (zid-1)*nx*ny)]); + // Ternary operator to check that no values outside of the magnetic field + // arrays are loaded. If the cell is on the edge that doesn't have magnetic + // fields on both sides then instead set the centered magnetic field to be + // equal to the magnetic field of the closest edge. T + Real avgBx = (xid > 0) ? + /*if true*/ 0.5 * (dev_conserved[(grid_enum::magnetic_x)*n_cells + id] + dev_conserved[(grid_enum::magnetic_x)*n_cells + cuda_utilities::compute1DIndex(xid-1, yid, zid, nx, ny)]): + /*if false*/ dev_conserved[(grid_enum::magnetic_x)*n_cells + id]; + Real avgBy = (yid > 0) ? + /*if true*/ 0.5 * (dev_conserved[(grid_enum::magnetic_y)*n_cells + id] + dev_conserved[(grid_enum::magnetic_y)*n_cells + cuda_utilities::compute1DIndex(xid, yid-1, zid, nx, ny)]): + /*if false*/ dev_conserved[(grid_enum::magnetic_y)*n_cells + id]; + Real avgBz = (zid > 0) ? + /*if true*/ 0.5 * (dev_conserved[(grid_enum::magnetic_z)*n_cells + id] + dev_conserved[(grid_enum::magnetic_z)*n_cells + cuda_utilities::compute1DIndex(xid, yid, zid-1, nx, ny)]): + /*if false*/ dev_conserved[(grid_enum::magnetic_z)*n_cells + id]; + + struct returnStruct + { + Real x, y, z; + }; + return returnStruct{avgBx, avgBy, avgBz}; } + #endif // MHD // ========================================================================= - -} // end namespace mhdUtils \ No newline at end of file +} // end namespace mhd::utils +} // end namespace mhd \ No newline at end of file diff --git a/src/utils/mhd_utilities_tests.cpp b/src/utils/mhd_utilities_tests.cu similarity index 81% rename from src/utils/mhd_utilities_tests.cpp rename to src/utils/mhd_utilities_tests.cu index c5cbb25fb..83500c68f 100644 --- a/src/utils/mhd_utilities_tests.cpp +++ b/src/utils/mhd_utilities_tests.cu @@ -11,6 +11,7 @@ #include #include #include +#include // External Includes #include // Include GoogleTest and related libraries/headers @@ -45,12 +46,11 @@ namespace } // ============================================================================= - // ============================================================================= -// Tests for the mhdUtils::computeEnergy function +// Tests for the mhd::utils::computeEnergy function // ============================================================================= /*! - * \brief Test the mhdUtils::computeEnergy function with the standard set of + * \brief Test the mhd::utils::computeEnergy function with the standard set of * parameters * */ @@ -64,7 +64,7 @@ TEST(tMHDComputeEnergy, for (size_t i = 0; i < parameters.names.size(); i++) { - Real testEnergy = mhdUtils::computeEnergy(parameters.pressureGas.at(i), + Real testEnergy = mhd::utils::computeEnergy(parameters.pressureGas.at(i), parameters.density.at(i), parameters.velocityX.at(i), parameters.velocityY.at(i), @@ -81,7 +81,7 @@ TEST(tMHDComputeEnergy, } /*! - * \brief Test the mhdUtils::computeEnergy function with a the standard set of + * \brief Test the mhd::utils::computeEnergy function with a the standard set of * parameters except pressure is now negative * */ @@ -95,7 +95,7 @@ TEST(tMHDComputeEnergy, for (size_t i = 0; i < parameters.names.size(); i++) { - Real testEnergy = mhdUtils::computeEnergy(-parameters.pressureGas.at(i), + Real testEnergy = mhd::utils::computeEnergy(-parameters.pressureGas.at(i), parameters.density.at(i), parameters.velocityX.at(i), parameters.velocityY.at(i), @@ -111,14 +111,14 @@ TEST(tMHDComputeEnergy, } } // ============================================================================= -// End of tests for the mhdUtils::computeEnergy function +// End of tests for the mhd::utils::computeEnergy function // ============================================================================= // ============================================================================= -// Tests for the mhdUtils::computeGasPressure function +// Tests for the mhd::utils::computeGasPressure function // ============================================================================= /*! - * \brief Test the mhdUtils::computeGasPressure function with the standard set of + * \brief Test the mhd::utils::computeGasPressure function with the standard set of * parameters. Energy has been increased to avoid negative pressures * */ @@ -133,7 +133,7 @@ TEST(tMHDComputeGasPressure, for (size_t i = 0; i < parameters.names.size(); i++) { - Real testGasPressure = mhdUtils::computeGasPressure(energyMultiplier.at(i) * parameters.energy.at(i), + Real testGasPressure = mhd::utils::computeGasPressure(energyMultiplier.at(i) * parameters.energy.at(i), parameters.density.at(i), parameters.momentumX.at(i), parameters.momentumY.at(i), @@ -150,7 +150,7 @@ TEST(tMHDComputeGasPressure, } /*! - * \brief Test the mhdUtils::computeGasPressure function with a the standard set + * \brief Test the mhd::utils::computeGasPressure function with a the standard set * of parameters which produce negative pressures * */ @@ -161,7 +161,7 @@ TEST(tMHDComputeGasPressure, for (size_t i = 0; i < parameters.names.size(); i++) { - Real testGasPressure = mhdUtils::computeGasPressure(parameters.energy.at(i), + Real testGasPressure = mhd::utils::computeGasPressure(parameters.energy.at(i), parameters.density.at(i), parameters.momentumX.at(i), parameters.momentumY.at(i), @@ -178,15 +178,15 @@ TEST(tMHDComputeGasPressure, } } // ============================================================================= -// End of tests for the mhdUtils::computeGasPressure function +// End of tests for the mhd::utils::computeGasPressure function // ============================================================================= // ============================================================================= -// Tests for the mhdUtils::computeThermalEnergy function +// Tests for the mhd::utils::computeThermalEnergy function // ============================================================================= /*! - * \brief Test the mhdUtils::computeThermalEnergy function with the standard set + * \brief Test the mhd::utils::computeThermalEnergy function with the standard set * of parameters. * */ @@ -201,7 +201,7 @@ TEST(tMHDComputeThermalEnergy, for (size_t i = 0; i < parameters.names.size(); i++) { - Real testGasPressure = mhdUtils::computeThermalEnergy(energyMultiplier.at(i) * parameters.energy.at(i), + Real testGasPressure = mhd::utils::computeThermalEnergy(energyMultiplier.at(i) * parameters.energy.at(i), parameters.density.at(i), parameters.momentumX.at(i), parameters.momentumY.at(i), @@ -217,14 +217,46 @@ TEST(tMHDComputeThermalEnergy, } } // ============================================================================= -// End of tests for the mhdUtils::computeThermalEnergyfunction +// End of tests for the mhd::utils::computeThermalEnergy function +// ============================================================================= + +// ============================================================================= +// Tests for the mhd::utils::computeMagneticEnergy function +// ============================================================================= +/*! + * \brief Test the mhd::utils::computeMagneticEnergy function with the standard + * set of parameters. + * + */ +TEST(tMHDcomputeMagneticEnergy, + CorrectInputExpectCorrectOutput) +{ + testParams parameters; + std::vector energyMultiplier{1.0E85, 1.0E4, 1.0E105}; + std::vector fiducialEnergy{0.0, + 805356.08013056568, + 6.7079331637514162e+201}; + + for (size_t i = 0; i < parameters.names.size(); i++) + { + Real testMagneticEnergy = mhd::utils::computeMagneticEnergy(parameters.magneticX.at(i), + parameters.magneticY.at(i), + parameters.magneticZ.at(i)); + + testingUtilities::checkResults(fiducialEnergy.at(i), + testMagneticEnergy, + parameters.names.at(i)); + } +} +// ============================================================================= +// End of tests for the mhd::utils::computeMagneticEnergy function // ============================================================================= // ============================================================================= -// Tests for the mhdUtils::computeTotalPressure function +// Tests for the mhd::utils::computeTotalPressure function // ============================================================================= /*! - * \brief Test the mhdUtils::computeTotalPressure function with the standard set + * \brief Test the mhd::utils::computeTotalPressure function with the standard set * of parameters. * */ @@ -238,7 +270,7 @@ TEST(tMHDComputeTotalPressure, for (size_t i = 0; i < parameters.names.size(); i++) { - Real testTotalPressure = mhdUtils::computeTotalPressure(parameters.pressureGas.at(i), + Real testTotalPressure = mhd::utils::computeTotalPressure(parameters.pressureGas.at(i), parameters.magneticX.at(i), parameters.magneticY.at(i), parameters.magneticZ.at(i)); @@ -250,7 +282,7 @@ TEST(tMHDComputeTotalPressure, } /*! - * \brief Test the mhdUtils::computeTotalPressure function with a the standard + * \brief Test the mhd::utils::computeTotalPressure function with a the standard * set of parameters. Gas pressure has been multiplied and made negative to * generate negative total pressures * @@ -263,7 +295,7 @@ TEST(tMHDComputeTotalPressure, for (size_t i = 0; i < parameters.names.size(); i++) { - Real testTotalPressure = mhdUtils::computeTotalPressure(pressureMultiplier.at(i) * parameters.pressureGas.at(i), + Real testTotalPressure = mhd::utils::computeTotalPressure(pressureMultiplier.at(i) * parameters.pressureGas.at(i), parameters.magneticX.at(i), parameters.magneticY.at(i), parameters.magneticZ.at(i)); @@ -275,14 +307,14 @@ TEST(tMHDComputeTotalPressure, } } // ============================================================================= -// End of tests for the mhdUtils::computeTotalPressure function +// End of tests for the mhd::utils::computeTotalPressure function // ============================================================================= // ============================================================================= -// Tests for the mhdUtils::fastMagnetosonicSpeed function +// Tests for the mhd::utils::fastMagnetosonicSpeed function // ============================================================================= /*! - * \brief Test the mhdUtils::fastMagnetosonicSpeed function with the standard + * \brief Test the mhd::utils::fastMagnetosonicSpeed function with the standard * set of parameters. All values are reduced by 1e-25 in the large number case * to avoid overflow * @@ -298,7 +330,7 @@ TEST(tMHDFastMagnetosonicSpeed, for (size_t i = 0; i < parameters.names.size(); i++) { - Real testFastMagnetosonicSpeed = mhdUtils::fastMagnetosonicSpeed( + Real testFastMagnetosonicSpeed = mhd::utils::fastMagnetosonicSpeed( coef.at(i)*parameters.density.at(i), coef.at(i)*parameters.pressureGas.at(i), coef.at(i)*parameters.magneticX.at(i), @@ -313,7 +345,7 @@ TEST(tMHDFastMagnetosonicSpeed, } /*! - * \brief Test the mhdUtils::fastMagnetosonicSpeed function with the standard + * \brief Test the mhd::utils::fastMagnetosonicSpeed function with the standard * set of parameters, density is negative. All values are reduced by 1e-25 in * the large number case to avoid overflow. * @@ -329,7 +361,7 @@ TEST(tMHDFastMagnetosonicSpeed, for (size_t i = 0; i < parameters.names.size(); i++) { - Real testFastMagnetosonicSpeed = mhdUtils::fastMagnetosonicSpeed( + Real testFastMagnetosonicSpeed = mhd::utils::fastMagnetosonicSpeed( -coef.at(i)*parameters.density.at(i), coef.at(i)*parameters.pressureGas.at(i), coef.at(i)*parameters.magneticX.at(i), @@ -343,14 +375,14 @@ TEST(tMHDFastMagnetosonicSpeed, } } // ============================================================================= -// End of tests for the mhdUtils::fastMagnetosonicSpeed function +// End of tests for the mhd::utils::fastMagnetosonicSpeed function // ============================================================================= // ============================================================================= -// Tests for the mhdUtils::slowMagnetosonicSpeed function +// Tests for the mhd::utils::slowMagnetosonicSpeed function // ============================================================================= /*! - * \brief Test the mhdUtils::slowMagnetosonicSpeed function with the standard + * \brief Test the mhd::utils::slowMagnetosonicSpeed function with the standard * set of parameters. All values are reduced by 1e-25 in the large number case * to avoid overflow * @@ -367,7 +399,7 @@ TEST(tMHDSlowMagnetosonicSpeed, for (size_t i = 2; i < parameters.names.size(); i++) { - Real testSlowMagnetosonicSpeed = mhdUtils::slowMagnetosonicSpeed( + Real testSlowMagnetosonicSpeed = mhd::utils::slowMagnetosonicSpeed( parameters.density.at(i) * coef, parameters.pressureGas.at(i) * coef, parameters.magneticX.at(i) * coef, @@ -382,7 +414,7 @@ TEST(tMHDSlowMagnetosonicSpeed, } /*! - * \brief Test the mhdUtils::slowMagnetosonicSpeed function with the standard + * \brief Test the mhd::utils::slowMagnetosonicSpeed function with the standard * set of parameters, density is negative. All values are reduced by 1e-25 in * the large number case to avoid overflow. * @@ -399,7 +431,7 @@ TEST(tMHDSlowMagnetosonicSpeed, for (size_t i = 2; i < parameters.names.size(); i++) { - Real testSlowMagnetosonicSpeed = mhdUtils::slowMagnetosonicSpeed( + Real testSlowMagnetosonicSpeed = mhd::utils::slowMagnetosonicSpeed( -parameters.density.at(i) * coef, parameters.pressureGas.at(i) * coef, parameters.magneticX.at(i) * coef, @@ -413,14 +445,14 @@ TEST(tMHDSlowMagnetosonicSpeed, } } // ============================================================================= -// End of tests for the mhdUtils::slowMagnetosonicSpeed function +// End of tests for the mhd::utils::slowMagnetosonicSpeed function // ============================================================================= // ============================================================================= -// Tests for the mhdUtils::alfvenSpeed function +// Tests for the mhd::utils::alfvenSpeed function // ============================================================================= /*! - * \brief Test the mhdUtils::alfvenSpeed function with the standard set of + * \brief Test the mhd::utils::alfvenSpeed function with the standard set of * parameters. * */ @@ -434,7 +466,7 @@ TEST(tMHDAlfvenSpeed, for (size_t i = 0; i < parameters.names.size(); i++) { - Real testAlfvenSpeed = mhdUtils::alfvenSpeed(parameters.magneticX.at(i), + Real testAlfvenSpeed = mhd::utils::alfvenSpeed(parameters.magneticX.at(i), parameters.density.at(i)); testingUtilities::checkResults(fiducialAlfvenSpeed.at(i), @@ -444,7 +476,7 @@ TEST(tMHDAlfvenSpeed, } /*! - * \brief Test the mhdUtils::alfvenSpeed function with the standard set of + * \brief Test the mhd::utils::alfvenSpeed function with the standard set of * parameters except density is negative * */ @@ -458,7 +490,7 @@ TEST(tMHDAlfvenSpeed, for (size_t i = 0; i < parameters.names.size(); i++) { - Real testAlfvenSpeed = mhdUtils::alfvenSpeed(parameters.magneticX.at(i), + Real testAlfvenSpeed = mhd::utils::alfvenSpeed(parameters.magneticX.at(i), -parameters.density.at(i)); testingUtilities::checkResults(fiducialAlfvenSpeed.at(i), @@ -467,12 +499,13 @@ TEST(tMHDAlfvenSpeed, } } // ============================================================================= -// End of tests for the mhdUtils::alfvenSpeed function +// End of tests for the mhd::utils::alfvenSpeed function // ============================================================================= // ============================================================================= -// Tests for the mhdUtils::cellCenteredMagneticFields function +// Tests for the mhd::utils::cellCenteredMagneticFields function // ============================================================================= +#ifdef MHD TEST(tMHDCellCenteredMagneticFields, CorrectInputExpectCorrectOutput) { @@ -484,7 +517,7 @@ TEST(tMHDCellCenteredMagneticFields, size_t const n_cells = std::pow(5,3); // Make sure the vector is large enough that the locations where the // magnetic field would be in the real grid are filled - std::vector testGrid(n_cells * (8+NSCALARS)); + std::vector testGrid(n_cells * (grid_enum::num_fields)); // Populate the grid with values where testGrid.at(i) = double(i). The // values chosen aren't that important, just that every cell has a unique // value @@ -494,16 +527,16 @@ TEST(tMHDCellCenteredMagneticFields, double const fiducialAvgBx = 637.5, fiducialAvgBy = 761.5, fiducialAvgBz = 883.5; - double testAvgBx, testAvgBy, testAvgBz; // Call the function to test - mhdUtils::cellCenteredMagneticFields(testGrid.data(), id, xid, yid, zid, n_cells, nx, ny, testAvgBx, testAvgBy, testAvgBz); + auto [testAvgBx, testAvgBy, testAvgBz] = mhd::utils::cellCenteredMagneticFields(testGrid.data(), id, xid, yid, zid, n_cells, nx, ny); // Check the results testingUtilities::checkResults(fiducialAvgBx, testAvgBx, "cell centered Bx value"); testingUtilities::checkResults(fiducialAvgBy, testAvgBy, "cell centered By value"); testingUtilities::checkResults(fiducialAvgBz, testAvgBz, "cell centered Bz value"); } +#endif // MHD // ============================================================================= -// End of tests for the mhdUtils::cellCenteredMagneticFields function +// End of tests for the mhd::utils::cellCenteredMagneticFields function // ============================================================================= diff --git a/src/utils/reduction_utilities.cu b/src/utils/reduction_utilities.cu index 820f27826..65933e42f 100644 --- a/src/utils/reduction_utilities.cu +++ b/src/utils/reduction_utilities.cu @@ -42,19 +42,5 @@ gridReduceMax(maxVal, out); } // ===================================================================== - - // ===================================================================== - void reductionLaunchParams(uint &numBlocks, uint &threadsPerBlock, uint const &deviceNum) - { - cudaDeviceProp prop; - cudaGetDeviceProperties(&prop, deviceNum); - - // Divide the total number of allowed threads by the number of - // threads per block - threadsPerBlock = prop.maxThreadsPerBlock; - numBlocks = (prop.maxThreadsPerMultiProcessor * prop.multiProcessorCount) - / threadsPerBlock; - } - // ===================================================================== }//reduction_utilities #endif //CUDA \ No newline at end of file diff --git a/src/utils/reduction_utilities.h b/src/utils/reduction_utilities.h index 6935d481b..39089ac2e 100644 --- a/src/utils/reduction_utilities.h +++ b/src/utils/reduction_utilities.h @@ -8,7 +8,7 @@ #pragma once // STL Includes -#include +#include // External Includes @@ -78,7 +78,80 @@ } // ===================================================================== + #ifndef O_HIP // ===================================================================== + // This section handles the atomics. It is complicated because CUDA + // doesn't currently support atomics with non-integral types. + // This code is taken from + // https://github.com/rapidsai/cuml/blob/dc14361ba11c41f7a4e1e6a3625bbadd0f52daf7/cpp/src_prims/stats/minmax.cuh + // with slight tweaks for our use case. + // ===================================================================== + /*! + * \brief Do a device side bit cast + * + * \tparam To The output type + * \tparam From The input type + * \param from The input value + * \return To The bit cast version of From as type To + */ + template + __device__ constexpr To bit_cast(const From& from) noexcept + { + // TODO: replace with `std::bitcast` once we adopt C++20 or libcu++ adds it + To to{}; + static_assert(sizeof(To) == sizeof(From)); + memcpy(&to, &from, sizeof(To)); + return to; + } + + /*! + * \brief Encode a float as an int + * + * \param val The float to encode + * \return int The encoded int + */ + inline __device__ int encode(float val) + { + int i = bit_cast(val); + return i >= 0 ? i : (1 << 31) | ~i; + } + + /*! + * \brief Encode a double as a long long int + * + * \param val The double to encode + * \return long long The encoded long long int + */ + inline __device__ long long encode(double val) + { + std::int64_t i = bit_cast(val); + return i >= 0 ? i : (1ULL << 63) | ~i; + } + + /*! + * \brief Decodes an int as a float + * + * \param val The int to decode + * \return float The decoded float + */ + inline __device__ float decode(int val) + { + if (val < 0) val = (1 << 31) | ~val; + return bit_cast(val); + } + + /*! + * \brief Decodes a long long int as a double + * + * \param val The long long to decode + * \return double The decoded double + */ + inline __device__ double decode(long long val) + { + if (val < 0) val = (1ULL << 63) | ~val; + return bit_cast(val); + } + #endif //O_HIP /*! * \brief Perform an atomic reduction to find the maximum value of `val` * @@ -88,27 +161,71 @@ * the grid. Typically this should be a partial reduction that has * already been reduced to the block level */ - __inline__ __device__ double atomicMax_double(double* address, double val) + inline __device__ float atomicMaxBits(float* address, float val) { - unsigned long long int* address_as_ull = (unsigned long long int*) address; - unsigned long long int old = *address_as_ull, assumed; - // Explanation of loop here: - // https://stackoverflow.com/questions/16077464/atomicadd-for-double-on-gpu - // The loop is to make sure the value at address doesn't change - // between the load at the atomic since the entire operation isn't - // atomic - - // While it appears that this could result in many times more atomic - // operations than required, in practice it's only a handful of - // extra operation even in the worst case. Running with 16,000 - // blocks gives ~8-37 atomics after brief testing - do { - assumed = old; - old = atomicCAS(address_as_ull, - assumed, - __double_as_longlong(fmax(__longlong_as_double(assumed),val))); - } while (assumed != old); - return __longlong_as_double(old); + #ifdef O_HIP + return atomicMax(address, val); + #else //O_HIP + int old = atomicMax((int*)address, encode(val)); + return decode(old); + #endif //O_HIP + } + + /*! + * \brief Perform an atomic reduction to find the maximum value of `val` + * + * \param[out] address The pointer to where to store the reduced scalar + * value in device memory + * \param[in] val The thread local variable to find the maximum of across + * the grid. Typically this should be a partial reduction that has + * already been reduced to the block level + */ + inline __device__ double atomicMaxBits(double* address, double val) + { + #ifdef O_HIP + return atomicMax(address, val); + #else //O_HIP + long long old = atomicMax((long long*)address, encode(val)); + return decode(old); + #endif //O_HIP + } + + /*! + * \brief Perform an atomic reduction to find the minimum value of `val` + * + * \param[out] address The pointer to where to store the reduced scalar + * value in device memory + * \param[in] val The thread local variable to find the minimum of across + * the grid. Typically this should be a partial reduction that has + * already been reduced to the block level + */ + inline __device__ float atomicMinBits(float* address, float val) + { + #ifdef O_HIP + return atomicMin(address, val); + #else //O_HIP + int old = atomicMin((int*)address, encode(val)); + return decode(old); + #endif //O_HIP + } + + /*! + * \brief Perform an atomic reduction to find the minimum value of `val` + * + * \param[out] address The pointer to where to store the reduced scalar + * value in device memory + * \param[in] val The thread local variable to find the minimum of across + * the grid. Typically this should be a partial reduction that has + * already been reduced to the block level + */ + inline __device__ double atomicMinBits(double* address, double val) + { + #ifdef O_HIP + return atomicMin(address, val); + #else //O_HIP + long long old = atomicMin((long long*)address, encode(val)); + return decode(old); + #endif //O_HIP } // ===================================================================== @@ -119,6 +236,10 @@ * before the kernel launch that uses this function to avoid any * potential race condition; the `cuda_utilities::setScalarDeviceMemory` * function exists for this purpose. + * of `val`. Note that the value of `out` should be set appropriately + * before the kernel launch that uses this function to avoid any + * potential race condition; the `cuda_utilities::setScalarDeviceMemory` + * function exists for this purpose. * * \details This function can perform a reduction to find the maximum of * the thread local variable `val` across the entire grid. It relies on a @@ -128,7 +249,7 @@ * by using as many threads per block as possible and as few blocks as * possible since each block has to perform an atomic operation. To * accomplish this it is reccommened that you use the - * `reductionLaunchParams` functions to get the optimal number of blocks + * `AutomaticLaunchParams` functions to get the optimal number of blocks * and threads per block to launch rather than relying on Cholla defaults * and then within the kernel using a grid-stride loop to make sure the * kernel works with any combination of threads and blocks. Note that @@ -146,18 +267,22 @@ */ __inline__ __device__ void gridReduceMax(Real val, Real* out) { - // __syncthreads(); // Wait for all threads to calculate val; // Reduce the entire block in parallel val = blockReduceMax(val); // Write block level reduced value to the output scalar atomically - if (threadIdx.x == 0) atomicMax_double(out, val); + if (threadIdx.x == 0) atomicMaxBits(out, val); } // ===================================================================== // ===================================================================== /*! + * \brief Find the maximum value in the array. Make sure to initialize + * `out` correctly before using this kernel; the + * `cuda_utilities::setScalarDeviceMemory` function exists for this + * purpose. If `in` and `out` are the same array that's ok, all the + * loads are completed before the overwrite occurs. * \brief Find the maximum value in the array. Make sure to initialize * `out` correctly before using this kernel; the * `cuda_utilities::setScalarDeviceMemory` function exists for this @@ -171,23 +296,5 @@ */ __global__ void kernelReduceMax(Real *in, Real* out, size_t N); // ===================================================================== - - // ===================================================================== - /*! - * \brief Determine the optimal number of blocks and threads per block to - * use when launching a reduction kernel - * - * \param[out] numBlocks The maximum number of blocks that are - * scheduleable by the device in use when each block has the maximum - * number of threads - * \param[out] threadsPerBlock The maximum threads per block supported by - * the device in use - * \param[in] deviceNum optional: which device is being targeted. - * Defaults to zero - */ - void reductionLaunchParams(uint &numBlocks, - uint &threadsPerBlock, - uint const &deviceNum=0); - // ===================================================================== } // namespace reduction_utilities #endif //CUDA diff --git a/src/utils/reduction_utilities_tests.cu b/src/utils/reduction_utilities_tests.cu index 2314b33be..64613cc5b 100644 --- a/src/utils/reduction_utilities_tests.cu +++ b/src/utils/reduction_utilities_tests.cu @@ -17,6 +17,8 @@ // Local Includes #include "../utils/testing_utilities.h" #include "../utils/reduction_utilities.h" +#include "../utils/cuda_utilities.h" +#include "../utils/DeviceVector.h" #include "../global/global.h" @@ -28,8 +30,7 @@ TEST(tALLKernelReduceMax, CorrectInputExpectCorrectOutput) { // Launch parameters // ================= - uint numBlocks, threadsPerBlock; - reduction_utilities::reductionLaunchParams(numBlocks, threadsPerBlock); + cuda_utilities::AutomaticLaunchParams static const launchParams(reduction_utilities::kernelReduceMax); // Grid Parameters & testing parameters // ==================================== @@ -37,7 +38,6 @@ TEST(tALLKernelReduceMax, CorrectInputExpectCorrectOutput) size_t const size = std::pow(gridSize, 3);; Real const maxValue = 4; std::vector host_grid(size); - Real host_max = -DBL_MAX; // Fill grid with random values and assign maximum value std::mt19937 prng(1); @@ -52,44 +52,22 @@ TEST(tALLKernelReduceMax, CorrectInputExpectCorrectOutput) // Allocating and copying to device // ================================ - Real *dev_grid; - CudaSafeCall(cudaMalloc(&dev_grid, host_grid.size() * sizeof(Real))); - CudaSafeCall(cudaMemcpy(dev_grid, host_grid.data(), host_grid.size() * sizeof(Real), cudaMemcpyHostToDevice)); + cuda_utilities::DeviceVector dev_grid(host_grid.size()); + dev_grid.cpyHostToDevice(host_grid); - Real *dev_max_array; - CudaSafeCall(cudaMalloc(&dev_max_array, numBlocks*sizeof(Real))); - // Sets all bytes to 0. - cudaMemset(dev_max_array,0,numBlocks*sizeof(Real)); - - Real host_max_array[numBlocks]; - //Real *host_max_array = (Real *) malloc(numBlocks*sizeof(Real)); - //CudaSafeCall( cudaHostAlloc(&host_max_array, numBlocks*sizeof(Real), cudaHostAllocDefault) ); + cuda_utilities::DeviceVector static dev_max(1); + dev_max.assign(std::numeric_limits::lowest()); // Do the reduction // ================ - hipLaunchKernelGGL(reduction_utilities::kernelReduceMax, numBlocks, threadsPerBlock, 0, 0, dev_grid, dev_max_array, host_grid.size()); + hipLaunchKernelGGL(reduction_utilities::kernelReduceMax, + launchParams.numBlocks, launchParams.threadsPerBlock, 0, 0, + dev_grid.data(), dev_max.data(), host_grid.size()); CudaCheckError(); - - // Copy back and sync - // ================== - CudaSafeCall(cudaMemcpy(&host_max_array, dev_max_array, numBlocks*sizeof(Real), cudaMemcpyDeviceToHost)); - cudaDeviceSynchronize(); - - for (int i = 0; i < numBlocks; i++) - { - host_max = fmax(host_max,host_max_array[i]); - } - - //free(host_max_array); - - cudaFree(dev_max_array); - - cudaFree(dev_grid); - // Perform comparison - testingUtilities::checkResults(maxValue, host_max, "maximum value found"); + testingUtilities::checkResults(maxValue, dev_max.at(0), "maximum value found"); } // ============================================================================= // Tests for divergence max reduction diff --git a/src/utils/testing_utilities.cpp b/src/utils/testing_utilities.cpp index 9b8bee948..6035b68b5 100644 --- a/src/utils/testing_utilities.cpp +++ b/src/utils/testing_utilities.cpp @@ -80,52 +80,6 @@ namespace testingUtilities } // ========================================================================= - // ========================================================================= - void checkResults(double fiducialNumber, - double testNumber, - std::string outString, - double fixedEpsilon, - int ulpsEpsilon) - { - // Check for equality and if not equal return difference - double absoluteDiff; - int64_t ulpsDiff; - bool areEqual; - - if ((fixedEpsilon < 0) and (ulpsEpsilon < 0)) - { - areEqual = testingUtilities::nearlyEqualDbl(fiducialNumber, - testNumber, - absoluteDiff, - ulpsDiff); - } - else if ((fixedEpsilon > 0) and (ulpsEpsilon < 0)) - { - areEqual = testingUtilities::nearlyEqualDbl(fiducialNumber, - testNumber, - absoluteDiff, - ulpsDiff, - fixedEpsilon); - } - else - { - areEqual = testingUtilities::nearlyEqualDbl(fiducialNumber, - testNumber, - absoluteDiff, - ulpsDiff, - fixedEpsilon, - ulpsEpsilon); - } - - EXPECT_TRUE(areEqual) - << "Difference in " << outString << std::endl - << "The fiducial value is: " << fiducialNumber << std::endl - << "The test value is: " << testNumber << std::endl - << "The absolute difference is: " << absoluteDiff << std::endl - << "The ULP difference is: " << ulpsDiff << std::endl; - } - // ========================================================================= - void wrapperEqual(int i, int j, int k, std::string dataSetName, double test_value, double fid_value, double fixedEpsilon=5.0E-12) { @@ -139,24 +93,24 @@ namespace testingUtilities outString += k; outString += "]"; - checkResults(fid_value,test_value,outString,fixedEpsilon); + ASSERT_NO_FATAL_FAILURE(checkResults<1>(fid_value,test_value,outString,fixedEpsilon)); } void analyticConstant(systemTest::SystemTestRunner testObject, std::string dataSetName, double value) { std::vector testDims(3,1); std::vector testData = testObject.loadTestFieldData(dataSetName,testDims); for (size_t i = 0; i < testDims[0]; i++) + { + for (size_t j = 0; j < testDims[1]; j++) { - for (size_t j = 0; j < testDims[1]; j++) - { - for (size_t k = 0; k < testDims[2]; k++) - { - size_t index = (i * testDims[1] * testDims[2]) + (j * testDims[2]) + k; - - wrapperEqual(i,j,k,dataSetName,testData.at(index),value); - } - } + for (size_t k = 0; k < testDims[2]; k++) + { + size_t index = (i * testDims[1] * testDims[2]) + (j * testDims[2]) + k; + + ASSERT_NO_FATAL_FAILURE(wrapperEqual(i,j,k,dataSetName,testData.at(index),value)); + } } + } } void analyticSine(systemTest::SystemTestRunner testObject, std::string dataSetName, @@ -166,17 +120,17 @@ namespace testingUtilities std::vector testDims(3,1); std::vector testData = testObject.loadTestFieldData(dataSetName,testDims); for (size_t i = 0; i < testDims[0]; i++) - { - for (size_t j = 0; j < testDims[1]; j++) - { - for (size_t k = 0; k < testDims[2]; k++) - { - double value = constant + amplitude*std::sin(kx*i+ky*j+kz*k+phase); - size_t index = (i * testDims[1] * testDims[2]) + (j * testDims[2]) + k; - wrapperEqual(i,j,k,dataSetName,testData.at(index),value,tolerance); - } - } - } + { + for (size_t j = 0; j < testDims[1]; j++) + { + for (size_t k = 0; k < testDims[2]; k++) + { + double value = constant + amplitude*std::sin(kx*i+ky*j+kz*k+phase); + size_t index = (i * testDims[1] * testDims[2]) + (j * testDims[2]) + k; + ASSERT_NO_FATAL_FAILURE(wrapperEqual(i,j,k,dataSetName,testData.at(index),value,tolerance)); + } + } + } } diff --git a/src/utils/testing_utilities.h b/src/utils/testing_utilities.h index 927a61f28..b98780247 100644 --- a/src/utils/testing_utilities.h +++ b/src/utils/testing_utilities.h @@ -10,6 +10,10 @@ // STL includes #include +#include +#include +#include + #include "../system_tests/system_tester.h" // provide systemTest class // ============================================================================= @@ -106,6 +110,8 @@ namespace testingUtilities * \brief A simple function to compare two doubles with the nearlyEqualDbl * function, perform a GTest assert on the result, and print out the values * + * \tparam checkType The type of GTest assertion to use. "0" for and + * "EXPECT" and "1" for an "ASSERT" * \param[in] fiducialNumber The fiducial number to test against * \param[in] testNumber The unverified number to test * \param[in] outString A string to be printed in the first line of the output @@ -115,11 +121,66 @@ namespace testingUtilities * \param[in] ulpsEpsilon The ULP epsilon to use in the comparison. Negative * values are ignored and default behaviour is used */ + template void checkResults(double fiducialNumber, double testNumber, std::string outString, double fixedEpsilon = -999, - int ulpsEpsilon = -999); + int ulpsEpsilon = -999) + { + // Check for equality and if not equal return difference + double absoluteDiff; + int64_t ulpsDiff; + bool areEqual; + + if ((fixedEpsilon < 0) and (ulpsEpsilon < 0)) + { + areEqual = testingUtilities::nearlyEqualDbl(fiducialNumber, + testNumber, + absoluteDiff, + ulpsDiff); + } + else if ((fixedEpsilon > 0) and (ulpsEpsilon < 0)) + { + areEqual = testingUtilities::nearlyEqualDbl(fiducialNumber, + testNumber, + absoluteDiff, + ulpsDiff, + fixedEpsilon); + } + else + { + areEqual = testingUtilities::nearlyEqualDbl(fiducialNumber, + testNumber, + absoluteDiff, + ulpsDiff, + fixedEpsilon, + ulpsEpsilon); + } + + std::stringstream outputMessage; + outputMessage << std::setprecision(std::numeric_limits::max_digits10) + << "Difference in " << outString << std::endl + << "The fiducial value is: " << fiducialNumber << std::endl + << "The test value is: " << testNumber << std::endl + << "The absolute difference is: " << absoluteDiff << std::endl + << "The ULP difference is: " << ulpsDiff << std::endl; + + if (checkType == 0) + { + EXPECT_TRUE(areEqual) << outputMessage.str(); + } + else if (checkType == 1) + { + ASSERT_TRUE(areEqual) << outputMessage.str(); + } + else + { + throw std::runtime_error("Incorrect template argument passed to " + "checkResults. Options are 0 and 1 but " + + std::to_string(checkType) + " was passed"); + } + } // ========================================================================= // =========================================================================