From de5b98897373ed9fe0d95f87bc41e2f32f099c5f Mon Sep 17 00:00:00 2001 From: James Foucar Date: Mon, 24 Apr 2017 16:56:59 -0600 Subject: [PATCH 1/5] PET must halve TASKS when doubling THRDS This is the only way both the single and 2-threads cases can run on titan under the same batch submission. Titan does not let you overload nodes. Also, ACME is making heavy use of PET, so it needs to be tested in cime_developer. --- scripts/lib/CIME/SystemTests/pet.py | 6 ++++++ scripts/lib/update_acme_tests.py | 1 + 2 files changed, 7 insertions(+) diff --git a/scripts/lib/CIME/SystemTests/pet.py b/scripts/lib/CIME/SystemTests/pet.py index d6a6718464b..636f42b3e64 100644 --- a/scripts/lib/CIME/SystemTests/pet.py +++ b/scripts/lib/CIME/SystemTests/pet.py @@ -30,6 +30,12 @@ def _case_one_setup(self): if self._case.get_value("NTHRDS_%s"%comp) <= 1: self._case.set_value("NTHRDS_%s"%comp, 2) + # Need to halve NTASKS since we have double the threads + ntasks = self._case.get_value("NTASKS_%s" % comp) + if ntasks > 1: + ntasks /= 2 + self._case.set_value("NTASKS_%s" % comp, ntasks) + # Need to redo case_setup because we may have changed the number of threads case_setup(self._case, reset=True) diff --git a/scripts/lib/update_acme_tests.py b/scripts/lib/update_acme_tests.py index e7592980bee..7b407ca88d0 100644 --- a/scripts/lib/update_acme_tests.py +++ b/scripts/lib/update_acme_tests.py @@ -48,6 +48,7 @@ "ERP.f45_g37_rx1.A", "SMS_D_Ln9.f19_g16_rx1.A", "DAE.f19_f19.A", + "PET.f19_f19.A", "SMS.T42_T42.S", "PRE.f45_g37_rx1.ADESP") ), From 0ed54b217bbae3b384a952c355b0a34c607719cb Mon Sep 17 00:00:00 2001 From: James Foucar Date: Tue, 25 Apr 2017 12:49:13 -0600 Subject: [PATCH 2/5] Double nodes instead of halving tasks --- scripts/lib/CIME/SystemTests/pet.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/scripts/lib/CIME/SystemTests/pet.py b/scripts/lib/CIME/SystemTests/pet.py index 636f42b3e64..3557420189e 100644 --- a/scripts/lib/CIME/SystemTests/pet.py +++ b/scripts/lib/CIME/SystemTests/pet.py @@ -30,11 +30,9 @@ def _case_one_setup(self): if self._case.get_value("NTHRDS_%s"%comp) <= 1: self._case.set_value("NTHRDS_%s"%comp, 2) - # Need to halve NTASKS since we have double the threads - ntasks = self._case.get_value("NTASKS_%s" % comp) - if ntasks > 1: - ntasks /= 2 - self._case.set_value("NTASKS_%s" % comp, ntasks) + # Subtle: machines like titan will not let you overload the number of threads + # on a node. + self._case.num_nodes *= 2 # Need to redo case_setup because we may have changed the number of threads case_setup(self._case, reset=True) From 975958dcaeff22680dd96a279986c860d7ace8cc Mon Sep 17 00:00:00 2001 From: James Foucar Date: Tue, 25 Apr 2017 15:30:26 -0600 Subject: [PATCH 3/5] Re-initialize key case values upon case.setup --- scripts/lib/CIME/SystemTests/pet.py | 4 ---- scripts/lib/CIME/case.py | 6 +++--- scripts/lib/CIME/case_setup.py | 8 +++++--- 3 files changed, 8 insertions(+), 10 deletions(-) diff --git a/scripts/lib/CIME/SystemTests/pet.py b/scripts/lib/CIME/SystemTests/pet.py index 3557420189e..d6a6718464b 100644 --- a/scripts/lib/CIME/SystemTests/pet.py +++ b/scripts/lib/CIME/SystemTests/pet.py @@ -30,10 +30,6 @@ def _case_one_setup(self): if self._case.get_value("NTHRDS_%s"%comp) <= 1: self._case.set_value("NTHRDS_%s"%comp, 2) - # Subtle: machines like titan will not let you overload the number of threads - # on a node. - self._case.num_nodes *= 2 - # Need to redo case_setup because we may have changed the number of threads case_setup(self._case, reset=True) diff --git a/scripts/lib/CIME/case.py b/scripts/lib/CIME/case.py index d10088fe396..8d7308054aa 100644 --- a/scripts/lib/CIME/case.py +++ b/scripts/lib/CIME/case.py @@ -107,7 +107,7 @@ def __init__(self, case_root=None, read_only=True): self.cores_per_task = None # check if case has been configured and if so initialize derived if self.get_value("CASEROOT") is not None: - self._initialize_derived_attributes() + self.initialize_derived_attributes() def check_if_comp_var(self, vid): vid = vid @@ -119,7 +119,7 @@ def check_if_comp_var(self, vid): return vid, comp, iscompvar return vid, comp, iscompvar - def _initialize_derived_attributes(self): + def initialize_derived_attributes(self): """ These are derived variables which can be used in the config_* files for variable substitution using the {{ var }} syntax @@ -809,7 +809,7 @@ def configure(self, compset_name, grid_name, machine_name=None, if test: self.set_value("TEST",True) - self._initialize_derived_attributes() + self.initialize_derived_attributes() # Make sure that parallel IO is not specified if total_tasks==1 if self.total_tasks == 1: diff --git a/scripts/lib/CIME/case_setup.py b/scripts/lib/CIME/case_setup.py index 087c5999b38..bbd743501a1 100644 --- a/scripts/lib/CIME/case_setup.py +++ b/scripts/lib/CIME/case_setup.py @@ -136,9 +136,6 @@ def _case_setup_impl(case, caseroot, clean=False, test_mode=False, reset=False, else: expect(False, "NINST_%s value %d greater than NTASKS_%s %d" % (comp, ninst, comp, ntasks)) - # Set TOTAL_CORES - case.set_value("TOTAL_CORES", case.total_tasks * case.cores_per_task ) - if os.path.exists("case.run"): logger.info("Machine/Decomp/Pes configuration has already been done ...skipping") else: @@ -187,6 +184,11 @@ def _case_setup_impl(case, caseroot, clean=False, test_mode=False, reset=False, logger.debug("at copy TOTALPES = %s"%case.get_value("TOTALPES")) lock_file("env_mach_pes.xml") + # Set TOTAL_CORES + case.set_value("TOTAL_CORES", case.total_tasks * case.cores_per_task ) + + case.initialize_derived_attributes() + # Create user_nl files for the required number of instances if not os.path.exists("user_nl_cpl"): logger.info("Creating user_nl_xxx files for components and cpl") From 4cb307589f7bb6180d3cf26f9c2a02055e2dbff6 Mon Sep 17 00:00:00 2001 From: James Foucar Date: Tue, 25 Apr 2017 16:00:31 -0600 Subject: [PATCH 4/5] Order of operations was not correct --- scripts/lib/CIME/case_setup.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/scripts/lib/CIME/case_setup.py b/scripts/lib/CIME/case_setup.py index bbd743501a1..9baa2d215a7 100644 --- a/scripts/lib/CIME/case_setup.py +++ b/scripts/lib/CIME/case_setup.py @@ -138,6 +138,8 @@ def _case_setup_impl(case, caseroot, clean=False, test_mode=False, reset=False, if os.path.exists("case.run"): logger.info("Machine/Decomp/Pes configuration has already been done ...skipping") + + case.initialize_derived_attributes() else: check_pelayouts_require_rebuild(case, models) @@ -156,6 +158,8 @@ def _case_setup_impl(case, caseroot, clean=False, test_mode=False, reset=False, cost_pes = env_mach_pes.get_cost_pes(pestot, thread_count, machine=case.get_value("MACH")) case.set_value("COST_PES", cost_pes) + case.initialize_derived_attributes() + # create batch files logger.info("Creating batch script case.run") env_batch = case.get_env("batch") @@ -187,8 +191,6 @@ def _case_setup_impl(case, caseroot, clean=False, test_mode=False, reset=False, # Set TOTAL_CORES case.set_value("TOTAL_CORES", case.total_tasks * case.cores_per_task ) - case.initialize_derived_attributes() - # Create user_nl files for the required number of instances if not os.path.exists("user_nl_cpl"): logger.info("Creating user_nl_xxx files for components and cpl") From 56aba434897ac6df2c8b255ac45a1976359c6d2a Mon Sep 17 00:00:00 2001 From: James Foucar Date: Tue, 25 Apr 2017 16:03:41 -0600 Subject: [PATCH 5/5] PIO settings need to happen before init_derived_attributes --- scripts/lib/CIME/case_setup.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/scripts/lib/CIME/case_setup.py b/scripts/lib/CIME/case_setup.py index 9baa2d215a7..5afc3979bf0 100644 --- a/scripts/lib/CIME/case_setup.py +++ b/scripts/lib/CIME/case_setup.py @@ -158,6 +158,10 @@ def _case_setup_impl(case, caseroot, clean=False, test_mode=False, reset=False, cost_pes = env_mach_pes.get_cost_pes(pestot, thread_count, machine=case.get_value("MACH")) case.set_value("COST_PES", cost_pes) + # Make sure pio settings are consistent + if adjust_pio: + adjust_pio_layout(case, tasks_per_node) + case.initialize_derived_attributes() # create batch files @@ -177,10 +181,6 @@ def _case_setup_impl(case, caseroot, clean=False, test_mode=False, reset=False, logger.info("Writing %s script from input template %s" % (job, input_batch_script)) env_batch.make_batch_script(input_batch_script, job, case, pestot, tasks_per_node, num_nodes, thread_count) - # Make sure pio settings are consistant - if adjust_pio: - adjust_pio_layout(case, tasks_per_node) - # Make a copy of env_mach_pes.xml in order to be able # to check that it does not change once case.setup is invoked logger.info("Locking file env_mach_pes.xml")