.testing: Add WORK_SPACE cpp macro to control scratch space

- Added CPP macro "WORK_SPACE" which defaults to ".". This controls where the work/ and results/ directories are located as used by the target "test" in .testing/Makefile. - Use WORK_SPACE=/lustre/f2/scratch/$USER/runner/$CI_RUNNER_ID in the job script so that if the runner is later moved to a read-only-from-compute disk the pipeline still works.
NOAA-GFDL · Jan 13, 2023 · 91f3288 · 91f3288
1 parent 0f5e6ca
commit 91f3288
Show file tree

Hide file tree

Showing 2 changed files with 69 additions and 63 deletions.
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
@@ -182,9 +182,9 @@ actions:gnu:
     - cd .testing
     - module unload PrgEnv-pgi PrgEnv-intel PrgEnv-gnu darshan ; module load PrgEnv-gnu ; module unload netcdf gcc ; module load gcc/7.3.0 cray-hdf5 cray-netcdf
     - make -s -j
-    - make preproc-compile -s -j
+    - MPIRUN= make preproc -s -j
     - echo -e "\e[0Ksection_end:`date +%s`:compile\r\e[0K"
-    - (echo '#!/bin/bash';echo 'make MPIRUN="srun -mblock --exclusive" test -s -j') > job.sh
+    - (echo '#!/bin/bash';echo 'make MPIRUN="srun -mblock --exclusive" WORK_SPACE=/lustre/f2/scratch/$USER/runner/$CI_RUNNER_ID test -s -j') > job.sh
     - sbatch --clusters=c3,c4 --nodes=5 --time=0:05:00 --account=gfdl_o --qos=debug --job-name=MOM6.gnu.testing --output=log.$CI_JOB_ID --wait job.sh || ( cat log.$CI_JOB_ID ; exit 911 )
     - make test.summary
 
@@ -202,9 +202,9 @@ actions:intel:
     - cd .testing
     - module unload PrgEnv-pgi PrgEnv-intel PrgEnv-gnu darshan; module load PrgEnv-intel; module unload netcdf intel; module load intel/18.0.6.288 cray-hdf5 cray-netcdf
     - make -s -j
-    - make preproc-compile -s -j
+    - MPIRUN= make preproc -s -j
     - echo -e "\e[0Ksection_end:`date +%s`:compile\r\e[0K"
-    - (echo '#!/bin/bash';echo 'make MPIRUN="srun -mblock --exclusive" test -s -j') > job.sh
+    - (echo '#!/bin/bash';echo 'make MPIRUN="srun -mblock --exclusive" WORK_SPACE=/lustre/f2/scratch/$USER/runner/$CI_RUNNER_ID test -s -j') > job.sh
     - sbatch --clusters=c3,c4 --nodes=5 --time=0:05:00 --account=gfdl_o --qos=debug --job-name=MOM6.intel.testing --output=log.$CI_JOB_ID --wait job.sh || ( cat log.$CI_JOB_ID ; exit 911 )
     - make test.summary
 

diff --git a/.testing/Makefile b/.testing/Makefile
@@ -57,6 +57,9 @@
 #   MOM_TARGET_LOCAL_BRANCH     Target branch name
 # (NOTE: These would typically be configured by a CI.)
 #
+# Paths for stages:
+#   WORK_SPACE   Location to place work/ and results/ directories (i.e. where to run the model)
+#
 #----
 
 # TODO: POSIX shell compatibility
@@ -129,6 +132,8 @@ CONFIGS ?= $(wildcard tc*)
 TESTS ?= grid layout rotate restart openmp nan $(foreach d,$(DIMS),dim.$(d))
 DIMS ?= t l h z q r
 
+# Default is to place work/ and results/ in current directory
+WORK_SPACE ?= .
 
 #---
 # Test configuration
@@ -408,11 +413,11 @@ endef
 $(foreach d,$(DIMS),$(eval $(call TEST_DIM_RULE,$(d))))
 
 .PHONY: run.symmetric run.asymmetric run.nans run.openmp run.cov
-run.symmetric: $(foreach c,$(CONFIGS),work/$(c)/symmetric/ocean.stats)
-run.asymmetric: $(foreach c,$(filter-out tc3,$(CONFIGS)),$(CONFIGS),work/$(c)/asymmetric/ocean.stats)
-run.nan: $(foreach c,$(CONFIGS),work/$(c)/nan/ocean.stats)
-run.openmp: $(foreach c,$(CONFIGS),work/$(c)/openmp/ocean.stats)
-run.cov: $(foreach c,$(CONFIGS),work/$(c)/cov/ocean.stats)
+run.symmetric: $(foreach c,$(CONFIGS),$(WORK_SPACE)/work/$(c)/symmetric/ocean.stats)
+run.asymmetric: $(foreach c,$(filter-out tc3,$(CONFIGS)),$(CONFIGS),$(WORK_SPACE)/work/$(c)/asymmetric/ocean.stats)
+run.nan: $(foreach c,$(CONFIGS),$(WORK_SPACE)/work/$(c)/nan/ocean.stats)
+run.openmp: $(foreach c,$(CONFIGS),$(WORK_SPACE)/work/$(c)/openmp/ocean.stats)
+run.cov: $(foreach c,$(CONFIGS),$(WORK_SPACE)/work/$(c)/cov/ocean.stats)
 
 # Configuration test rules
 # $(1): Configuration name (tc1, tc2, &c.)
@@ -444,21 +449,21 @@ FAIL = ${RED}FAIL${RESET}
 # $(2): Test type (grid, layout, &c.)
 # $(3): Comparison targets (symmetric asymmetric, symmetric layout, &c.)
 define CMP_RULE
-.PRECIOUS: $(foreach b,$(3),work/$(1)/$(b)/ocean.stats)
-$(1).$(2): $(foreach b,$(3),work/$(1)/$(b)/ocean.stats)
-	@test "$$(shell ls -A results/$(1) 2>/dev/null)" || rm -rf results/$(1)
+.PRECIOUS: $(foreach b,$(3),$(WORK_SPACE)/work/$(1)/$(b)/ocean.stats)
+$(1).$(2): $(foreach b,$(3),$(WORK_SPACE)/work/$(1)/$(b)/ocean.stats)
+	@test "$$(shell ls -A $(WORK_SPACE)/results/$(1) 2>/dev/null)" || rm -rf $(WORK_SPACE)/results/$(1)
 	@cmp $$^ || !( \
-	  mkdir -p results/$(1); \
-	  (diff $$^ | tee results/$(1)/ocean.stats.$(2).diff | head -n 20) ; \
+	  mkdir -p $(WORK_SPACE)/results/$(1); \
+	  (diff $$^ | tee $(WORK_SPACE)/results/$(1)/ocean.stats.$(2).diff | head -n 20) ; \
 	  echo -e "$(FAIL): Solutions $(1).$(2) have changed." \
 	)
 	@echo -e "$(PASS): Solutions $(1).$(2) agree."
 
-.PRECIOUS: $(foreach b,$(3),work/$(1)/$(b)/chksum_diag)
-$(1).$(2).diag: $(foreach b,$(3),work/$(1)/$(b)/chksum_diag)
+.PRECIOUS: $(foreach b,$(3),$(WORK_SPACE)/work/$(1)/$(b)/chksum_diag)
+$(1).$(2).diag: $(foreach b,$(3),$(WORK_SPACE)/work/$(1)/$(b)/chksum_diag)
 	@cmp $$^ || !( \
-	  mkdir -p results/$(1); \
-	  (diff $$^ | tee results/$(1)/chksum_diag.$(2).diff | head -n 20) ; \
+	  mkdir -p $(WORK_SPACE)/results/$(1); \
+	  (diff $$^ | tee $(WORK_SPACE)/results/$(1)/chksum_diag.$(2).diff | head -n 20) ; \
 	  echo -e "$(FAIL): Diagnostics $(1).$(2).diag have changed." \
 	)
 	@echo -e "$(PASS): Diagnostics $(1).$(2).diag agree."
@@ -478,36 +483,37 @@ $(foreach c,$(CONFIGS),$(eval $(call CONFIG_DIM_RULE,$(c))))
 
 # Custom comparison rules
 
+
 # Restart tests only compare the final stat record
-.PRECIOUS: $(foreach b,symmetric restart target,work/%/$(b)/ocean.stats)
-%.restart: $(foreach b,symmetric restart,work/%/$(b)/ocean.stats)
-	@test "$(shell ls -A results/$* 2>/dev/null)" || rm -rf results/$*
+.PRECIOUS: $(foreach b,symmetric restart target,$(WORK_SPACE)/work/%/$(b)/ocean.stats)
+%.restart: $(foreach b,symmetric restart,$(WORK_SPACE)/work/%/$(b)/ocean.stats)
+	@test "$(shell ls -A $(WORK_SPACE)/results/$* 2>/dev/null)" || rm -rf $(WORK_SPACE)/results/$*
 	@cmp $(foreach f,$^,<(tr -s ' ' < $(f) | cut -d ' ' -f3- | tail -n 1)) \
 	  || !( \
-	    mkdir -p results/$*; \
-	    (diff $^ | tee results/$*/chksum_diag.restart.diff | head -n 20) ; \
+	    mkdir -p $(WORK_SPACE)/results/$*; \
+	    (diff $^ | tee $(WORK_SPACE)/results/$*/chksum_diag.restart.diff | head -n 20) ; \
 	    echo -e "$(FAIL): Solutions $*.restart have changed." \
 	  )
 	@echo -e "$(PASS): Solutions $*.restart agree."
 
 # TODO: chksum_diag parsing of restart files
 
 # stats rule is unchanged, but we cannot use CMP_RULE to generate it.
-%.regression: $(foreach b,symmetric target,work/%/$(b)/ocean.stats)
-	@test "$(shell ls -A results/$* 2>/dev/null)" || rm -rf results/$*
+%.regression: $(foreach b,symmetric target,$(WORK_SPACE)/work/%/$(b)/ocean.stats)
+	@test "$(shell ls -A $(WORK_SPACE)/results/$* 2>/dev/null)" || rm -rf $(WORK_SPACE)/results/$*
 	@cmp $^ || !( \
-	  mkdir -p results/$*; \
-	  (diff $^ | tee results/$*/ocean.stats.regression.diff | head -n 20) ; \
+	  mkdir -p $(WORK_SPACE)/results/$*; \
+	  (diff $^ | tee $(WORK_SPACE)/results/$*/ocean.stats.regression.diff | head -n 20) ; \
 	  echo -e "$(FAIL): Solutions $*.regression have changed." \
 	)
 	@echo -e "$(PASS): Solutions $*.regression agree."
 
 # Regression testing only checks for changes in existing diagnostics
-%.regression.diag: $(foreach b,symmetric target,work/%/$(b)/chksum_diag)
+%.regression.diag: $(foreach b,symmetric target,$(WORK_SPACE)/work/%/$(b)/chksum_diag)
 	@! diff $^ | grep "^[<>]" | grep "^>" > /dev/null \
 	  || ! (\
-	    mkdir -p results/$*; \
-	    (diff $^ | tee results/$*/chksum_diag.regression.diff | head -n 20) ; \
+	    mkdir -p $(WORK_SPACE)/results/$*; \
+	    (diff $^ | tee $(WORK_SPACE)/results/$*/chksum_diag.regression.diff | head -n 20) ; \
 	    echo -e "$(FAIL): Diagnostics $*.regression.diag have changed." \
 	  )
 	@cmp $^ || ( \
@@ -536,7 +542,7 @@ tc4/configure: tc4/configure.ac
 #---
 # Test run output files
 
-# Rule to build work/<tc>/{ocean.stats,chksum_diag}.<tag>
+# Rule to build $(WORK_SPACE)/work/<tc>/{ocean.stats,chksum_diag}.<tag>
 # $(1): Test configuration name <tag>
 # $(2): Executable type
 # $(3): Enable coverage flag
@@ -545,15 +551,15 @@ tc4/configure: tc4/configure.ac
 # $(6): Number of MPI ranks
 
 define STAT_RULE
-work/%/$(1)/ocean.stats work/%/$(1)/chksum_diag: build/$(2)/MOM6 | preproc
+$(WORK_SPACE)/work/%/$(1)/ocean.stats $(WORK_SPACE)/work/%/$(1)/chksum_diag: build/$(2)/MOM6 | preproc
 	@echo "Running test $$*.$(1)..."
 	mkdir -p $$(@D)
 	cp -RL $$*/* $$(@D)
 	mkdir -p $$(@D)/RESTART
 	echo -e "$(4)" > $$(@D)/MOM_override
-	rm -f results/$$*/std.$(1).{out,err}
+	rm -f $(WORK_SPACE)/results/$$*/std.$(1).{out,err}
 	cd $$(@D) \
-	  && $(TIME) $(5) $(MPIRUN) -n $(6) ../../../$$< 2> std.err > std.out \
+	  && $(TIME) $(5) $(MPIRUN) -n $(6) $(abspath $$<) 2> std.err > std.out \
 	  || !( \
 	    mkdir -p ../../../results/$$*/ ; \
 	    cat std.out | tee ../../../results/$$*/std.$(1).out | tail -n 20 ; \
@@ -563,7 +569,7 @@ work/%/$(1)/ocean.stats work/%/$(1)/chksum_diag: build/$(2)/MOM6 | preproc
 	  )
 	@echo -e "$(DONE): $$*.$(1); no runtime errors."
 	if [ $(3) ]; then \
-	  mkdir -p results/$$* ; \
+	  mkdir -p $(WORK_SPACE)/results/$$* ; \
 	  cd build/$(2) ; \
 	  gcov -b *.gcda > gcov.$$*.$(1).out ; \
 	  find -name "*.gcov" -exec sed -i -r 's/^( *[0-9]*)\*:/ \1:/g' {} \; ; \
@@ -614,7 +620,7 @@ $(eval $(call STAT_RULE,cov,cov,true,,,1))
 #  2. Convert DAYMAX from TIMEUNIT to seconds
 #  3. Apply seconds to `ocean_solo_nml` inside input.nml.
 # NOTE: Assumes that runtime set by DAYMAX, will fail if set by input.nml
-work/%/restart/ocean.stats: build/symmetric/MOM6 | preproc
+$(WORK_SPACE)/work/%/restart/ocean.stats: build/symmetric/MOM6 | preproc
 	rm -rf $(@D)
 	mkdir -p $(@D)
 	cp -RL $*/* $(@D)
@@ -628,9 +634,9 @@ work/%/restart/ocean.stats: build/symmetric/MOM6 | preproc
 	  && halfperiod=$$(awk -v t=$${daymax} -v dt=$${timeunit} 'BEGIN {printf "%.f", 0.5*t*dt}') \
 	  && printf "\n&ocean_solo_nml\n    seconds = $${halfperiod}\n/\n" >> input.nml
 	# Remove any previous archived output
-	rm -f results/$*/std.restart{1,2}.{out,err}
+	rm -f $(WORK_SPACE)/results/$*/std.restart{1,2}.{out,err}
 	# Run the first half-period
-	cd $(@D) && $(TIME) $(MPIRUN) -n 1 ../../../$< 2> std1.err > std1.out \
+	cd $(@D) && $(TIME) $(MPIRUN) -n 1 $(abspath $<) 2> std1.err > std1.out \
 	  || !( \
 	    cat std1.out | tee ../../../results/$*/std.restart1.out | tail -n 20 ; \
 	    cat std1.err | tee ../../../results/$*/std.restart1.err | tail -n 20 ; \
@@ -641,7 +647,7 @@ work/%/restart/ocean.stats: build/symmetric/MOM6 | preproc
 	mkdir $(@D)/RESTART
 	cd $(@D) && sed -i -e "s/input_filename *= *'n'/input_filename = 'r'/g" input.nml
 	# Run the second half-period
-	cd $(@D) && $(TIME) $(MPIRUN) -n 1 ../../../$< 2> std2.err > std2.out \
+	cd $(@D) && $(TIME) $(MPIRUN) -n 1 $(abspath $<) 2> std2.err > std2.out \
 	  || !( \
 	    cat std2.out | tee ../../../results/$*/std.restart2.out | tail -n 20 ; \
 	    cat std2.err | tee ../../../results/$*/std.restart2.err | tail -n 20 ; \
@@ -654,20 +660,20 @@ work/%/restart/ocean.stats: build/symmetric/MOM6 | preproc
 # Not a true rule; only call this after `make test` to summarize test results.
 .PHONY: test.summary
 test.summary:
-	@if ls results/*/* &> /dev/null; then \
-	  if ls results/*/std.*.err &> /dev/null; then \
+	@if ls $(WORK_SPACE)/results/*/* &> /dev/null; then \
+	  if ls $(WORK_SPACE)/results/*/std.*.err &> /dev/null; then \
 	    echo "The following tests failed to complete:" ; \
-	    ls results/*/std.*.out \
+	    ls $(WORK_SPACE)/results/*/std.*.out \
 	      | awk '{split($$0,a,"/"); split(a[3],t,"."); v=t[2]; if(length(t)>3) v=v"."t[3]; print a[2],":",v}'; \
 	  fi; \
-	  if ls results/*/ocean.stats.*.diff &> /dev/null; then \
+	  if ls $(WORK_SPACE)/results/*/ocean.stats.*.diff &> /dev/null; then \
 	    echo "The following tests report solution regressions:" ; \
-	    ls results/*/ocean.stats.*.diff \
+	    ls $(WORK_SPACE)/results/*/ocean.stats.*.diff \
 	      | awk '{split($$0,a,"/"); split(a[3],t,"."); v=t[3]; if(length(t)>4) v=v"."t[4]; print a[2],":",v}'; \
 	  fi; \
-	  if ls results/*/chksum_diag.*.diff &> /dev/null; then \
+	  if ls $(WORK_SPACE)/results/*/chksum_diag.*.diff &> /dev/null; then \
 	    echo "The following tests report diagnostic regressions:" ; \
-	    ls results/*/chksum_diag.*.diff \
+	    ls $(WORK_SPACE)/results/*/chksum_diag.*.diff \
 	      | awk '{split($$0,a,"/"); split(a[3],t,"."); v=t[2]; if(length(t)>3) v=v"."t[3]; print a[2],":",v}'; \
 	  fi; \
 	  false ; \
@@ -683,28 +689,28 @@ test.summary:
 .PHONY: run.cov.unit
 run.cov.unit: build/unit/MOM_file_parser_tests.F90.gcov
 
-work/unit/std.out: build/unit/MOM_unit_tests
+$(WORK_SPACE)/work/unit/std.out: build/unit/MOM_unit_tests
 	if [ $(REPORT_COVERAGE) ]; then \
 	  find build/unit -name *.gcda -exec rm -f '{}' \; ; \
 	fi
 	rm -rf $(@D)
 	mkdir -p $(@D)
 	cd $(@D) \
-	  && $(TIME) $(MPIRUN) -n 1 ../../$< 2> std.err > std.out \
+	  && $(TIME) $(MPIRUN) -n 1 $(abspath $<) 2> std.err > std.out \
 	  || !( \
 	    cat std.out | tail -n 100 ; \
 	    cat std.err | tail -n 100 ; \
 	  )
 	cd $(@D) \
-	  && $(TIME) $(MPIRUN) -n 2 ../../$< 2> p2.std.err > p2.std.out \
+	  && $(TIME) $(MPIRUN) -n 2 $(abspath $<) 2> p2.std.err > p2.std.out \
 	  || !( \
 	    cat p2.std.out | tail -n 100 ; \
 	    cat p2.std.err | tail -n 100 ; \
 	  )
 
 # NOTE: .gcov actually depends on .gcda, but .gcda is produced with std.out
-# TODO: Replace work/unit/std.out with *.gcda?
-build/unit/MOM_file_parser_tests.F90.gcov: work/unit/std.out
+# TODO: Replace $(WORK_SPACE)/work/unit/std.out with *.gcda?
+build/unit/MOM_file_parser_tests.F90.gcov: $(WORK_SPACE)/work/unit/std.out
 	cd $(@D) \
 	  && gcov -b *.gcda > gcov.unit.out
 	find $(@D) -name "*.gcov" -exec sed -i -r 's/^( *[0-9]*)\*:/ \1:/g' {} \;
@@ -731,22 +737,22 @@ PCONFIGS = p0
 profile: $(foreach p,$(PCONFIGS), prof.$(p))
 
 .PHONY: prof.p0
-prof.p0: work/p0/opt/clocks.json work/p0/opt_target/clocks.json
+prof.p0: $(WORK_SPACE)/work/p0/opt/clocks.json $(WORK_SPACE)/work/p0/opt_target/clocks.json
 	python tools/compare_clocks.py $^
 
-work/p0/%/clocks.json: work/p0/%/std.out
+$(WORK_SPACE)/work/p0/%/clocks.json: $(WORK_SPACE)/work/p0/%/std.out
 	python tools/parse_fms_clocks.py -d $(@D) $^ > $@
 
-work/p0/opt/std.out: build/opt/MOM6
-work/p0/opt_target/std.out: build/opt_target/MOM6
+$(WORK_SPACE)/work/p0/opt/std.out: build/opt/MOM6
+$(WORK_SPACE)/work/p0/opt_target/std.out: build/opt_target/MOM6
 
-work/p0/%/std.out:
+$(WORK_SPACE)/work/p0/%/std.out:
 	mkdir -p $(@D)
 	cp -RL p0/* $(@D)
 	mkdir -p $(@D)/RESTART
 	echo -e "" > $(@D)/MOM_override
 	cd $(@D) \
-	  && $(MPIRUN) -n 1 ../../../$< 2> std.err > std.out
+	  && $(MPIRUN) -n 1 $(abspath $<) 2> std.err > std.out
 
 
 #---
@@ -759,16 +765,16 @@ PERF_EVENTS ?=
 perf: $(foreach p,$(PCONFIGS), perf.$(p))
 
 .PHONY: prof.p0
-perf.p0: work/p0/opt/profile.json work/p0/opt_target/profile.json
+perf.p0: $(WORK_SPACE)/work/p0/opt/profile.json $(WORK_SPACE)/work/p0/opt_target/profile.json
 	python tools/compare_perf.py $^
 
-work/p0/%/profile.json: work/p0/%/perf.data
+$(WORK_SPACE)/work/p0/%/profile.json: $(WORK_SPACE)/work/p0/%/perf.data
 	python tools/parse_perf.py -f $< > $@
 
-work/p0/opt/perf.data: build/opt/MOM6
-work/p0/opt_target/perf.data: build/opt_target/MOM6
+$(WORK_SPACE)/work/p0/opt/perf.data: build/opt/MOM6
+$(WORK_SPACE)/work/p0/opt_target/perf.data: build/opt_target/MOM6
 
-work/p0/%/perf.data:
+$(WORK_SPACE)/work/p0/%/perf.data:
 	mkdir -p $(@D)
 	cp -RL p0/* $(@D)
 	mkdir -p $(@D)/RESTART
@@ -799,7 +805,7 @@ clean.build:
 .PHONY: clean.stats
 clean.stats:
 	@[ $$(basename $$(pwd)) = .testing ]
-	rm -rf work results
+	rm -rf $(WORK_SPACE)/work $(WORK_SPACE)/results
 
 
 .PHONY: clean.preproc