From 297b91213f5ede0f4134437f94ef2c5f26fb1245 Mon Sep 17 00:00:00 2001 From: "Keaton J. Burns" Date: Tue, 9 Jan 2024 14:59:19 -0500 Subject: [PATCH 1/5] Add built-in cprofile tooling --- dedalus/core/solvers.py | 44 +++++++++++++++++++++++++++++++++++------ 1 file changed, 38 insertions(+), 6 deletions(-) diff --git a/dedalus/core/solvers.py b/dedalus/core/solvers.py index 135b1933..0ff2964a 100644 --- a/dedalus/core/solvers.py +++ b/dedalus/core/solvers.py @@ -6,6 +6,8 @@ import h5py import pathlib import scipy.linalg +import cProfile +import pstats from math import prod from . import subsystems @@ -14,6 +16,7 @@ from ..libraries.matsolvers import matsolvers from ..tools.config import config from ..tools.array import scipy_sparse_eigs +from ..tools.parallel import Sync import logging logger = logging.getLogger(__name__.split('.')[-1]) @@ -485,6 +488,8 @@ class InitialValueSolver(SolverBase): Iteration cadence for enforcing Hermitian symmetry on real variables (default: 100). warmup_iterations : int, optional Number of warmup iterations to disregard when computing runtime statistics (default: 10). + profile : bool, optional + Save profiles with cProfile (default: False). **kw : Other options passed to ProblemBase. @@ -510,15 +515,20 @@ class InitialValueSolver(SolverBase): matsolver_default = 'MATRIX_FACTORIZER' matrices = ['M', 'L'] - def __init__(self, problem, timestepper, enforce_real_cadence=100, warmup_iterations=10, **kw): + def __init__(self, problem, timestepper, enforce_real_cadence=100, warmup_iterations=10, profile=False, **kw): logger.debug('Beginning IVP instantiation') - super().__init__(problem, **kw) - if np.isrealobj(self.dtype.type()): - self.enforce_real_cadence = enforce_real_cadence - else: - self.enforce_real_cadence = None + # Setup timing and profiling + self.dist = problem.dist self._bcast_array = np.zeros(1, dtype=float) self.init_time = self.world_time + self.profile = profile + if profile: + self.setup_profiler = cProfile.Profile() + self.warmup_profiler = cProfile.Profile() + self.run_profiler = cProfile.Profile() + self.setup_profiler.enable() + # Build subsystems and subproblems + super().__init__(problem, **kw) # Build LHS matrices self.build_matrices(self.subproblems, ['M', 'L']) # Compute total modes @@ -538,6 +548,10 @@ def __init__(self, problem, timestepper, enforce_real_cadence=100, warmup_iterat self.sim_time = self.initial_sim_time = problem.time.allreduce_data_max(layout='g') self.iteration = self.initial_iteration = 0 self.warmup_iterations = warmup_iterations + if np.isrealobj(self.dtype.type()): + self.enforce_real_cadence = enforce_real_cadence + else: + self.enforce_real_cadence = None # Default integration parameters self.stop_sim_time = np.inf self.stop_wall_time = np.inf @@ -648,8 +662,14 @@ def step(self, dt): wall_time = self.wall_time if self.iteration == self.initial_iteration: self.start_time = wall_time + if self.profile: + self.dump_profiles(self.setup_profiler, "setup") + self.warmup_profiler.enable() if self.iteration == self.initial_iteration + self.warmup_iterations: self.warmup_time = wall_time + if self.profile: + self.dump_profiles(self.warmup_profiler, "warmup") + self.run_profiler.enable() # Advance using timestepper self.timestepper.step(dt, wall_time) # Update iteration @@ -704,6 +724,8 @@ def log_stats(self, format=".4g"): logger.info(f"Final iteration: {self.iteration}") logger.info(f"Final sim time: {self.sim_time}") logger.info(f"Setup time (init - iter 0): {self.start_time:{format}} sec") + if self.profile: + self.dump_profiles(self.run_profiler, "runtime") if self.iteration >= self.initial_iteration + self.warmup_iterations: warmup_time = self.warmup_time - self.start_time run_time = log_time - self.warmup_time @@ -716,3 +738,13 @@ def log_stats(self, format=".4g"): logger.info(f"Speed: {(modes*stages/cpus/run_time):{format}} mode-stages/cpu-sec") else: logger.info(f"Timings unavailable because warmup did not complete.") + + def dump_profiles(self, profiler, name): + comm = self.dist.comm + # Write stats from each process + profiler.dump_stats(f"/tmp/{name}_proc{comm.rank}.prof") + # Sum stats over processes + with Sync(comm): + if comm.rank == 0: + joint_stats = pstats.Stats(*(f"/tmp/{name}_proc{i}.prof" for i in range(comm.size))) + joint_stats.dump_stats(f"{name}.prof") From 54a3f6a9c063eb158c5515e93687f69a7cd14aaa Mon Sep 17 00:00:00 2001 From: "Keaton J. Burns" Date: Wed, 10 Jan 2024 13:57:48 -0500 Subject: [PATCH 2/5] Use pickling instead of /tmp to gather profiles from processes --- dedalus/core/solvers.py | 18 ++++++++++-------- dedalus/tools/parallel.py | 11 +++++++++++ 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/dedalus/core/solvers.py b/dedalus/core/solvers.py index 0ff2964a..976c71f1 100644 --- a/dedalus/core/solvers.py +++ b/dedalus/core/solvers.py @@ -16,7 +16,7 @@ from ..libraries.matsolvers import matsolvers from ..tools.config import config from ..tools.array import scipy_sparse_eigs -from ..tools.parallel import Sync +from ..tools.parallel import ProfileWrapper import logging logger = logging.getLogger(__name__.split('.')[-1]) @@ -741,10 +741,12 @@ def log_stats(self, format=".4g"): def dump_profiles(self, profiler, name): comm = self.dist.comm - # Write stats from each process - profiler.dump_stats(f"/tmp/{name}_proc{comm.rank}.prof") - # Sum stats over processes - with Sync(comm): - if comm.rank == 0: - joint_stats = pstats.Stats(*(f"/tmp/{name}_proc{i}.prof" for i in range(comm.size))) - joint_stats.dump_stats(f"{name}.prof") + # Disable and create stats on each process + profiler.create_stats() + # Gather using wrapper class to avoid pickling issues + profiles = comm.gather(ProfileWrapper(profiler.stats), root=0) + # Sum stats on root process + if comm.rank == 0: + joint_stats = pstats.Stats(*profiles) + joint_stats.dump_stats(f"{name}.prof") + diff --git a/dedalus/tools/parallel.py b/dedalus/tools/parallel.py index 0c03191a..be21c849 100644 --- a/dedalus/tools/parallel.py +++ b/dedalus/tools/parallel.py @@ -56,3 +56,14 @@ def __enter__(self): def __exit__(self, type, value, traceback): for i in range(self.size-self.rank): self.comm.Barrier() + + +class ProfileWrapper: + """Pickleable wrapper for cProfile.Profile for use with pstats.Stats""" + + def __init__(self, stats): + self.stats = stats + + def create_stats(self): + pass + From dbc204132927f5ecd1727cc445bbcb714d185165 Mon Sep 17 00:00:00 2001 From: Benjamin Brown Date: Fri, 12 Jan 2024 20:38:12 -0800 Subject: [PATCH 3/5] Adds per-core outputs of profiling data, to determine imbalances and bottlenecks. Currently saving via shelve. --- dedalus/core/solvers.py | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/dedalus/core/solvers.py b/dedalus/core/solvers.py index 976c71f1..b5f030f8 100644 --- a/dedalus/core/solvers.py +++ b/dedalus/core/solvers.py @@ -10,6 +10,9 @@ import pstats from math import prod +from collections import defaultdict +import shelve + from . import subsystems from . import timesteppers from .evaluator import Evaluator @@ -743,10 +746,25 @@ def dump_profiles(self, profiler, name): comm = self.dist.comm # Disable and create stats on each process profiler.create_stats() + p = pstats.Stats(profiler) + p.strip_dirs() # Gather using wrapper class to avoid pickling issues - profiles = comm.gather(ProfileWrapper(profiler.stats), root=0) + profiles = comm.gather(ProfileWrapper(p.stats), root=0) # Sum stats on root process if comm.rank == 0: + if self.profile=='verbose': + profile_database = pathlib.Path(f"{name}_profiles") + stats = {'primcalls':defaultdict(list),'totcalls':defaultdict(list),'tottime':defaultdict(list),'cumtime':defaultdict(list)} + for profile in profiles: + for func, (primcalls, totcalls, tottime, cumtime, callers) in profile.stats.items(): + stats['primcalls'][func].append(primcalls) + stats['totcalls'][func].append(totcalls) + stats['tottime'][func].append(tottime) + stats['cumtime'][func].append(cumtime) + with shelve.open(str(profile_database), flag='n') as shelf: + for key in stats: + shelf[key] = stats[key] + + # creation of joint_stats destroys profiles, so do this second joint_stats = pstats.Stats(*profiles) joint_stats.dump_stats(f"{name}.prof") - From a5671ffb6289128e50f4b492009bdf490c79e3d3 Mon Sep 17 00:00:00 2001 From: Benjamin Brown Date: Sat, 13 Jan 2024 20:24:18 -0800 Subject: [PATCH 4/5] Adds config-file driving for profiling, in new 'profiling' section. --- dedalus/core/solvers.py | 11 +++++++++-- dedalus/dedalus.cfg | 8 ++++++++ 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/dedalus/core/solvers.py b/dedalus/core/solvers.py index b5f030f8..9f7e126d 100644 --- a/dedalus/core/solvers.py +++ b/dedalus/core/solvers.py @@ -21,6 +21,9 @@ from ..tools.array import scipy_sparse_eigs from ..tools.parallel import ProfileWrapper +PROFILE_SCRIPT = config['profiling'].getboolean('PROFILE_SCRIPT') +PROFILE_MODE_DEFAULT = config['profiling'].get('PROFILE_MODE_DEFAULT') + import logging logger = logging.getLogger(__name__.split('.')[-1]) @@ -493,6 +496,9 @@ class InitialValueSolver(SolverBase): Number of warmup iterations to disregard when computing runtime statistics (default: 10). profile : bool, optional Save profiles with cProfile (default: False). + profile_mode : string, optional + Output joined files [summary] or also include per-core analysis [full] (default: full). + **kw : Other options passed to ProblemBase. @@ -518,7 +524,7 @@ class InitialValueSolver(SolverBase): matsolver_default = 'MATRIX_FACTORIZER' matrices = ['M', 'L'] - def __init__(self, problem, timestepper, enforce_real_cadence=100, warmup_iterations=10, profile=False, **kw): + def __init__(self, problem, timestepper, enforce_real_cadence=100, warmup_iterations=10, profile=PROFILE_SCRIPT, profile_mode=PROFILE_MODE_DEFAULT, **kw): logger.debug('Beginning IVP instantiation') # Setup timing and profiling self.dist = problem.dist @@ -526,6 +532,7 @@ def __init__(self, problem, timestepper, enforce_real_cadence=100, warmup_iterat self.init_time = self.world_time self.profile = profile if profile: + self.profile_mode = profile_mode.lower() self.setup_profiler = cProfile.Profile() self.warmup_profiler = cProfile.Profile() self.run_profiler = cProfile.Profile() @@ -752,7 +759,7 @@ def dump_profiles(self, profiler, name): profiles = comm.gather(ProfileWrapper(p.stats), root=0) # Sum stats on root process if comm.rank == 0: - if self.profile=='verbose': + if self.profile_mode=='full': profile_database = pathlib.Path(f"{name}_profiles") stats = {'primcalls':defaultdict(list),'totcalls':defaultdict(list),'tottime':defaultdict(list),'cumtime':defaultdict(list)} for profile in profiles: diff --git a/dedalus/dedalus.cfg b/dedalus/dedalus.cfg index 416e621d..ef4d5baa 100644 --- a/dedalus/dedalus.cfg +++ b/dedalus/dedalus.cfg @@ -118,3 +118,11 @@ # This works around NFS caching issues FILEHANDLER_TOUCH_TMPFILE = False + +[profiling] + + # Profile script using cProfile + PROFILE_SCRIPT = False + + # Level of profiling to conduct (summary, full) + PROFILE_MODE_DEFAULT = full From 09a3bc0b90a89355adf97d1a3be29ba4d0d8b4d3 Mon Sep 17 00:00:00 2001 From: "Keaton J. Burns" Date: Wed, 31 Jan 2024 13:13:34 -0500 Subject: [PATCH 5/5] Update profile options and put in subdirectory by default --- dedalus/core/solvers.py | 44 +++++++++++++++++++-------------------- dedalus/dedalus.cfg | 14 ++++++++----- dedalus/tools/parallel.py | 10 +++++++++ 3 files changed, 41 insertions(+), 27 deletions(-) diff --git a/dedalus/core/solvers.py b/dedalus/core/solvers.py index 9f7e126d..81a4318b 100644 --- a/dedalus/core/solvers.py +++ b/dedalus/core/solvers.py @@ -9,9 +9,8 @@ import cProfile import pstats from math import prod - from collections import defaultdict -import shelve +import pickle from . import subsystems from . import timesteppers @@ -19,10 +18,11 @@ from ..libraries.matsolvers import matsolvers from ..tools.config import config from ..tools.array import scipy_sparse_eigs -from ..tools.parallel import ProfileWrapper +from ..tools.parallel import ProfileWrapper, parallel_mkdir -PROFILE_SCRIPT = config['profiling'].getboolean('PROFILE_SCRIPT') -PROFILE_MODE_DEFAULT = config['profiling'].get('PROFILE_MODE_DEFAULT') +PROFILE_DEFAULT = config['profiling'].getboolean('PROFILE_DEFAULT') +PARALLEL_PROFILE_DEFAULT = config['profiling'].getboolean('PARALLEL_PROFILE_DEFAULT') +PROFILE_DIRECTORY = pathlib.Path(config['profiling'].get('PROFILE_DIRECTORY')) import logging logger = logging.getLogger(__name__.split('.')[-1]) @@ -495,10 +495,9 @@ class InitialValueSolver(SolverBase): warmup_iterations : int, optional Number of warmup iterations to disregard when computing runtime statistics (default: 10). profile : bool, optional - Save profiles with cProfile (default: False). - profile_mode : string, optional - Output joined files [summary] or also include per-core analysis [full] (default: full). - + Save accumulated profiles with cProfile (default: False). + parallel_profile : bool, optional + Save per-process and accumulated profiles with cProfile (default: False). **kw : Other options passed to ProblemBase. @@ -524,15 +523,16 @@ class InitialValueSolver(SolverBase): matsolver_default = 'MATRIX_FACTORIZER' matrices = ['M', 'L'] - def __init__(self, problem, timestepper, enforce_real_cadence=100, warmup_iterations=10, profile=PROFILE_SCRIPT, profile_mode=PROFILE_MODE_DEFAULT, **kw): + def __init__(self, problem, timestepper, enforce_real_cadence=100, warmup_iterations=10, profile=PROFILE_DEFAULT, parallel_profile=PARALLEL_PROFILE_DEFAULT, **kw): logger.debug('Beginning IVP instantiation') # Setup timing and profiling self.dist = problem.dist self._bcast_array = np.zeros(1, dtype=float) self.init_time = self.world_time - self.profile = profile - if profile: - self.profile_mode = profile_mode.lower() + if profile or parallel_profile: + parallel_mkdir(PROFILE_DIRECTORY, comm=self.dist.comm) + self.profile = True + self.parallel_profile = parallel_profile self.setup_profiler = cProfile.Profile() self.warmup_profiler = cProfile.Profile() self.run_profiler = cProfile.Profile() @@ -750,6 +750,7 @@ def log_stats(self, format=".4g"): logger.info(f"Timings unavailable because warmup did not complete.") def dump_profiles(self, profiler, name): + "Save profiling data to disk." comm = self.dist.comm # Disable and create stats on each process profiler.create_stats() @@ -759,19 +760,18 @@ def dump_profiles(self, profiler, name): profiles = comm.gather(ProfileWrapper(p.stats), root=0) # Sum stats on root process if comm.rank == 0: - if self.profile_mode=='full': - profile_database = pathlib.Path(f"{name}_profiles") - stats = {'primcalls':defaultdict(list),'totcalls':defaultdict(list),'tottime':defaultdict(list),'cumtime':defaultdict(list)} + if self.parallel_profile: + stats = {'primcalls': defaultdict(list), + 'totcalls': defaultdict(list), + 'tottime': defaultdict(list), + 'cumtime': defaultdict(list)} for profile in profiles: for func, (primcalls, totcalls, tottime, cumtime, callers) in profile.stats.items(): stats['primcalls'][func].append(primcalls) stats['totcalls'][func].append(totcalls) stats['tottime'][func].append(tottime) stats['cumtime'][func].append(cumtime) - with shelve.open(str(profile_database), flag='n') as shelf: - for key in stats: - shelf[key] = stats[key] - - # creation of joint_stats destroys profiles, so do this second + pickle.dump(stats, open(PROFILE_DIRECTORY / f"{name}_parallel.pickle", 'wb')) + # Creation of joint_stats destroys profiles, so do this second joint_stats = pstats.Stats(*profiles) - joint_stats.dump_stats(f"{name}.prof") + joint_stats.dump_stats(PROFILE_DIRECTORY / f"{name}.prof") diff --git a/dedalus/dedalus.cfg b/dedalus/dedalus.cfg index ef4d5baa..edd0595c 100644 --- a/dedalus/dedalus.cfg +++ b/dedalus/dedalus.cfg @@ -118,11 +118,15 @@ # This works around NFS caching issues FILEHANDLER_TOUCH_TMPFILE = False - [profiling] - # Profile script using cProfile - PROFILE_SCRIPT = False + # Default profile setting for solvers + # This saves accumulated profiling data using cProfile + PROFILE_DEFAULT = False + + # Default parallel profile setting for solvers + # This saves per-process and accumulated profiling data using cProfile + PARALLEL_PROFILE_DEFAULT = False - # Level of profiling to conduct (summary, full) - PROFILE_MODE_DEFAULT = full + # Profile directory base (will be expanded to /runtime.prof, etc) + PROFILE_DIRECTORY = profiles diff --git a/dedalus/tools/parallel.py b/dedalus/tools/parallel.py index be21c849..b3d64940 100644 --- a/dedalus/tools/parallel.py +++ b/dedalus/tools/parallel.py @@ -3,6 +3,7 @@ """ +import pathlib from mpi4py import MPI @@ -67,3 +68,12 @@ def __init__(self, stats): def create_stats(self): pass + +def parallel_mkdir(path, comm=MPI.COMM_WORLD): + """Create a directory from root process.""" + path = pathlib.Path(path) + with Sync(comm=comm, enter=False, exit=True) as sync: + if sync.comm.rank == 0: + if not path.exists(): + path.mkdir() +