Skip to content

Commit

Permalink
Merge pull request #278 from DedalusProject/profiling
Browse files Browse the repository at this point in the history
Add built-in cprofile tooling
  • Loading branch information
kburns authored Jan 31, 2024
2 parents b008633 + 09a3bc0 commit 9cb1d35
Show file tree
Hide file tree
Showing 3 changed files with 98 additions and 6 deletions.
71 changes: 65 additions & 6 deletions dedalus/core/solvers.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,23 @@
import h5py
import pathlib
import scipy.linalg
import cProfile
import pstats
from math import prod
from collections import defaultdict
import pickle

from . import subsystems
from . import timesteppers
from .evaluator import Evaluator
from ..libraries.matsolvers import matsolvers
from ..tools.config import config
from ..tools.array import scipy_sparse_eigs
from ..tools.parallel import ProfileWrapper, parallel_mkdir

PROFILE_DEFAULT = config['profiling'].getboolean('PROFILE_DEFAULT')
PARALLEL_PROFILE_DEFAULT = config['profiling'].getboolean('PARALLEL_PROFILE_DEFAULT')
PROFILE_DIRECTORY = pathlib.Path(config['profiling'].get('PROFILE_DIRECTORY'))

import logging
logger = logging.getLogger(__name__.split('.')[-1])
Expand Down Expand Up @@ -485,6 +494,10 @@ class InitialValueSolver(SolverBase):
Iteration cadence for enforcing Hermitian symmetry on real variables (default: 100).
warmup_iterations : int, optional
Number of warmup iterations to disregard when computing runtime statistics (default: 10).
profile : bool, optional
Save accumulated profiles with cProfile (default: False).
parallel_profile : bool, optional
Save per-process and accumulated profiles with cProfile (default: False).
**kw :
Other options passed to ProblemBase.
Expand All @@ -510,15 +523,22 @@ class InitialValueSolver(SolverBase):
matsolver_default = 'MATRIX_FACTORIZER'
matrices = ['M', 'L']

def __init__(self, problem, timestepper, enforce_real_cadence=100, warmup_iterations=10, **kw):
def __init__(self, problem, timestepper, enforce_real_cadence=100, warmup_iterations=10, profile=PROFILE_DEFAULT, parallel_profile=PARALLEL_PROFILE_DEFAULT, **kw):
logger.debug('Beginning IVP instantiation')
super().__init__(problem, **kw)
if np.isrealobj(self.dtype.type()):
self.enforce_real_cadence = enforce_real_cadence
else:
self.enforce_real_cadence = None
# Setup timing and profiling
self.dist = problem.dist
self._bcast_array = np.zeros(1, dtype=float)
self.init_time = self.world_time
if profile or parallel_profile:
parallel_mkdir(PROFILE_DIRECTORY, comm=self.dist.comm)
self.profile = True
self.parallel_profile = parallel_profile
self.setup_profiler = cProfile.Profile()
self.warmup_profiler = cProfile.Profile()
self.run_profiler = cProfile.Profile()
self.setup_profiler.enable()
# Build subsystems and subproblems
super().__init__(problem, **kw)
# Build LHS matrices
self.build_matrices(self.subproblems, ['M', 'L'])
# Compute total modes
Expand All @@ -538,6 +558,10 @@ def __init__(self, problem, timestepper, enforce_real_cadence=100, warmup_iterat
self.sim_time = self.initial_sim_time = problem.time.allreduce_data_max(layout='g')
self.iteration = self.initial_iteration = 0
self.warmup_iterations = warmup_iterations
if np.isrealobj(self.dtype.type()):
self.enforce_real_cadence = enforce_real_cadence
else:
self.enforce_real_cadence = None
# Default integration parameters
self.stop_sim_time = np.inf
self.stop_wall_time = np.inf
Expand Down Expand Up @@ -648,8 +672,14 @@ def step(self, dt):
wall_time = self.wall_time
if self.iteration == self.initial_iteration:
self.start_time = wall_time
if self.profile:
self.dump_profiles(self.setup_profiler, "setup")
self.warmup_profiler.enable()
if self.iteration == self.initial_iteration + self.warmup_iterations:
self.warmup_time = wall_time
if self.profile:
self.dump_profiles(self.warmup_profiler, "warmup")
self.run_profiler.enable()
# Advance using timestepper
self.timestepper.step(dt, wall_time)
# Update iteration
Expand Down Expand Up @@ -704,6 +734,8 @@ def log_stats(self, format=".4g"):
logger.info(f"Final iteration: {self.iteration}")
logger.info(f"Final sim time: {self.sim_time}")
logger.info(f"Setup time (init - iter 0): {self.start_time:{format}} sec")
if self.profile:
self.dump_profiles(self.run_profiler, "runtime")
if self.iteration >= self.initial_iteration + self.warmup_iterations:
warmup_time = self.warmup_time - self.start_time
run_time = log_time - self.warmup_time
Expand All @@ -716,3 +748,30 @@ def log_stats(self, format=".4g"):
logger.info(f"Speed: {(modes*stages/cpus/run_time):{format}} mode-stages/cpu-sec")
else:
logger.info(f"Timings unavailable because warmup did not complete.")

def dump_profiles(self, profiler, name):
"Save profiling data to disk."
comm = self.dist.comm
# Disable and create stats on each process
profiler.create_stats()
p = pstats.Stats(profiler)
p.strip_dirs()
# Gather using wrapper class to avoid pickling issues
profiles = comm.gather(ProfileWrapper(p.stats), root=0)
# Sum stats on root process
if comm.rank == 0:
if self.parallel_profile:
stats = {'primcalls': defaultdict(list),
'totcalls': defaultdict(list),
'tottime': defaultdict(list),
'cumtime': defaultdict(list)}
for profile in profiles:
for func, (primcalls, totcalls, tottime, cumtime, callers) in profile.stats.items():
stats['primcalls'][func].append(primcalls)
stats['totcalls'][func].append(totcalls)
stats['tottime'][func].append(tottime)
stats['cumtime'][func].append(cumtime)
pickle.dump(stats, open(PROFILE_DIRECTORY / f"{name}_parallel.pickle", 'wb'))
# Creation of joint_stats destroys profiles, so do this second
joint_stats = pstats.Stats(*profiles)
joint_stats.dump_stats(PROFILE_DIRECTORY / f"{name}.prof")
12 changes: 12 additions & 0 deletions dedalus/dedalus.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -118,3 +118,15 @@
# This works around NFS caching issues
FILEHANDLER_TOUCH_TMPFILE = False

[profiling]

# Default profile setting for solvers
# This saves accumulated profiling data using cProfile
PROFILE_DEFAULT = False

# Default parallel profile setting for solvers
# This saves per-process and accumulated profiling data using cProfile
PARALLEL_PROFILE_DEFAULT = False

# Profile directory base (will be expanded to <PROFILE_DIRECTORY>/runtime.prof, etc)
PROFILE_DIRECTORY = profiles
21 changes: 21 additions & 0 deletions dedalus/tools/parallel.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
"""

import pathlib
from mpi4py import MPI


Expand Down Expand Up @@ -56,3 +57,23 @@ def __enter__(self):
def __exit__(self, type, value, traceback):
for i in range(self.size-self.rank):
self.comm.Barrier()


class ProfileWrapper:
"""Pickleable wrapper for cProfile.Profile for use with pstats.Stats"""

def __init__(self, stats):
self.stats = stats

def create_stats(self):
pass


def parallel_mkdir(path, comm=MPI.COMM_WORLD):
"""Create a directory from root process."""
path = pathlib.Path(path)
with Sync(comm=comm, enter=False, exit=True) as sync:
if sync.comm.rank == 0:
if not path.exists():
path.mkdir()

0 comments on commit 9cb1d35

Please sign in to comment.