From 0d956a1df6d1ebc5353e6ac0f54265028a27b59d Mon Sep 17 00:00:00 2001 From: Benjamin Bengfort Date: Fri, 15 Jul 2016 17:31:06 -0400 Subject: [PATCH 1/8] multiprocessing example --- .gitignore | 3 ++ examples/multiprocessing_example.py | 55 +++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+) create mode 100644 examples/multiprocessing_example.py diff --git a/.gitignore b/.gitignore index 7499b52..5200c61 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,6 @@ MANIFEST *.egg-info *.pyc *~ + +# Ignore mprof generated files +mprofile_*.dat diff --git a/examples/multiprocessing_example.py b/examples/multiprocessing_example.py new file mode 100644 index 0000000..88299b8 --- /dev/null +++ b/examples/multiprocessing_example.py @@ -0,0 +1,55 @@ +""" +An undecorated example of a script that allocates memory in multiprocessing +workers to demonstrate the use of memory_profiler with multiple processes. + +Run this script with mprof run -C python multiprocessing_example.py +You can then visualize the usage with mprof plot. +""" + +import time +import multiprocessing as mp + +# Big numbers +X6 = 10 ** 6 +X7 = 10 ** 7 + + +def worker(num, wait, amt=X6): + """ + A function that allocates memory over time. + """ + frame = [] + + for idx in range(num): + frame.extend([1] * amt) + time.sleep(wait) + + del frame + + +def main_sequential(): + """ + A sequential version of the work, where one worker is called at a time. + """ + worker(5, 5, X6) + worker(5, 2, X7) + worker(5, 5, X6) + worker(5, 2, X7) + + +def main_multiproc(): + """ + A multiprocessing version of the work, where workers work in their own + child processes and are collected by the master process. + """ + pool = mp.Pool(processes=4) + tasks = [ + pool.apply_async(worker, args) for args in + [(5, 5, X6), (5, 2, X7), (5, 5, X6), (5, 2, X7)] + ] + + results = [p.get() for p in tasks] + + +if __name__ == '__main__': + main_multiproc() From a846fa63db2c32e0b6ce1db70176fb4e268bbdfc Mon Sep 17 00:00:00 2001 From: Benjamin Bengfort Date: Fri, 15 Jul 2016 20:07:28 -0400 Subject: [PATCH 2/8] child process memory now recorded seperately in mpmprof --- memory_profiler.py | 44 ++++++++--- mpmprof | 184 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 218 insertions(+), 10 deletions(-) create mode 100755 mpmprof diff --git a/memory_profiler.py b/memory_profiler.py index 1799a2e..7a21f55 100644 --- a/memory_profiler.py +++ b/memory_profiler.py @@ -73,6 +73,34 @@ def _repr_pretty_(self, p , cycle): p.text(u'') +def _get_child_memory(process, meminfo_attr=None): + """ + Returns a generator that yields memory for all child processes. + """ + if not has_psutil: + raise NotImplementedError(( + "The psutil module is required to monitor the " + "memory usage of child processes." + )) + + # Convert a pid to a process + if isinstance(process, int): + if process == -1: process = os.getpid() + process = psutil.Process(process) + + if not meminfo_attr: + # Use the psutil 2.0 attr if the older version isn't passed in. + meminfo_attr = 'memory_info' if hasattr(process, 'memory_info') else 'get_memory_info' + + # Select the psutil function get the children similar to how we selected + # the memory_info attr (a change from excepting the AttributeError). + children_attr = 'children' if hasattr(process, 'children') else 'get_children' + + # Loop over the child processes and yield their memory + for child in getattr(process, children_attr)(recursive=True): + yield getattr(child, meminfo_attr)()[0] / _TWO_20 + + def _get_memory(pid, timestamps=False, include_children=False): # .. only for current process and only on unix.. @@ -88,13 +116,7 @@ def _get_memory(pid, timestamps=False, include_children=False): meminfo_attr = 'memory_info' if hasattr(process, 'memory_info') else 'get_memory_info' mem = getattr(process, meminfo_attr)()[0] / _TWO_20 if include_children: - try: - for p in process.get_children(recursive=True): - mem += getattr(p, meminfo_attr)()[0] / _TWO_20 - except AttributeError: - # fix for newer psutil - for p in process.children(recursive=True): - mem += getattr(p, meminfo_attr)()[0] / _TWO_20 + mem += sum(_get_child_memory(process, meminfo_attr)) if timestamps: return (mem, time.time()) else: @@ -106,9 +128,11 @@ def _get_memory(pid, timestamps=False, include_children=False): # .. scary stuff .. if os.name == 'posix': if include_children: - raise NotImplementedError('The psutil module is required when to' - ' monitor memory usage of children' - ' processes') + raise NotImplementedError(( + "The psutil module is required to monitor the " + "memory usage of child processes." + )) + warnings.warn("psutil module not found. memory_profiler will be slow") # .. # .. memory usage in MiB .. diff --git a/mpmprof b/mpmprof new file mode 100755 index 0000000..365b909 --- /dev/null +++ b/mpmprof @@ -0,0 +1,184 @@ +#!/usr/bin/env python3 +""" +Multiprocessing version of memory profiling of Python programs. +""" + +import os +import time +import glob +import argparse +import subprocess +import memory_profiler as mp + + +# Command Descriptions and Constants +DESCRIPTION = "Multiprocessing memory profiling over time." +EPILOG = "If there are any bugs or concerns, submit an issue on Github" +VERSION = "mpmprof v{}".format(mp.__version__) +FILETIME = "%Y%m%d%H%M%S" +BLANKS = set(' \t') + + +def run_action(args): + """ + Run the given program and profile its memory usage. + """ + + # Determine where to write the output to + if args.output is None: + args.output = "mprofile_{}.dat".format( + time.strftime(FILETIME, time.localtime()) + ) + + # Determine if the command is a Python command + if args.command[0].endswith('.py') and not args.nopython: + args.python = True + + # Run the executable with the extra features + if args.python: + print("running as a Python program ...") + if not args.command[0].startswith('python'): + args.command.insert(0, 'python') + + # Inform the user we're sampling + print("mpmprof: Sampling memory every {} seconds".format(args.interval)) + + # Put the command back together from the argument parsing + command = " ".join([ + c if BLANKS.isdisjoint(c) else "'{}'".format(c) for c in args.command + ]) + + # Open a subprocess to the given command + proc = subprocess.Popen(args.command) + + # This is where a call to mp.memory_usage should go. + # Instead we're adding the custom code for sampling spawned memory + with open(args.output, "a") as f: + + # Write the command to the data file + f.write("CMDLINE {}\n".format(command)) + + # Continue sampling until the subprocess is over, counting lines + lines = 0 + while True: + # Determine if the subprocess is still running + if proc.poll() is not None: break + + # Collect memory usage of master program and write to profile + mem = mp._get_memory(proc.pid) + f.write("MEM {0:.6f} {1:.4f}\n".format(mem, time.time())) + lines += 1 + + # Collect memory usage of spawned children and write to profile + for idx, mem in enumerate(mp._get_child_memory(proc.pid)): + f.write("CHLD{0} {1:.6f} {2:.4f}\n".format(idx, mem, time.time())) + lines += 1 + + # Flush every 50 lines + if lines > 50: + lines = 0 + f.flush() + + # Sleep for the given interval + time.sleep(args.interval) + + return "memory profile written to {}".format(args.output) + + +def plot_action(args): + """ + Use matplotlib to draw the memory usage of a mprofile .dat file. + """ + raise NotImplementedError("Not implemented yet.") + + +if __name__ == '__main__': + # Create the argument parser and subparsers for each command + parser = argparse.ArgumentParser(description=DESCRIPTION, epilog=EPILOG) + subparsers = parser.add_subparsers(title='commands') + + # Add the version command + parser.add_argument('-v', '--version', action='version', version=VERSION) + + # Commands defined in an dictionary for easy adding + commands = ( + # Run command definition + { + 'name': 'run', + 'action': run_action, + 'help': 'monitor the memory usage of a command', + 'args': { + '--python': { + 'default': False, + 'action': 'store_true', + 'help': 'activates extra features for Python programs', + }, + '--nopython': { + 'default': False, + 'action': 'store_true', + 'help': 'disables extra features for Python programs', + }, + ('-T', '--interval'): { + 'type': float, + 'default': 0.1, + 'metavar': 'S', + 'help': 'sampling period (in seconds), defaults to 0.1', + }, + ('-o', '--output'): { + 'type': str, + 'default': None, + 'metavar': 'PATH', + 'help': 'location to write the memory profiler output to', + }, + 'command': { + 'nargs': argparse.REMAINDER, + 'help': 'command to run and profile memory usage', + } + } + }, + + # Plot command definition + { + 'name': 'plot', + 'action': plot_action, + 'help': 'plot the memory usage of a mprofile data file', + 'args': { + ('-t', '--title'): { + 'type': str, + 'default': None, + 'metavar': 'S', + 'help': 'set the title of the figure', + }, + ('-o', '--output'): { + 'type': str, + 'default': None, + 'metavar': 'PATH', + 'help': 'write the figure as a png to disk' + }, + 'profile': { + 'nargs': '*', + 'help': 'profile to plot, omit to use the latest', + } + } + } + ) + + # Add the commands and their arguments. + for cmd in commands: + # Create the command subparser and add the action + cmd_parser = subparsers.add_parser(cmd['name'], help=cmd['help']) + cmd_parser.set_defaults(func=cmd['action']) + + # Add the arguments + for args, kwargs in cmd['args'].items(): + if isinstance(args, str): + args = (args,) + cmd_parser.add_argument(*args, **kwargs) + + # Handle input from the command line + args = parser.parse_args() # Parse the arguments + # try: + msg = args.func(args) # Call the default function + parser.exit(0, msg+"\n") # Exit cleanly with message + # except Exception as e: + # parser.error(str(e)) # Exit with error From e32679b9c91c66e3eaf9ebfe6882cbf7410233b6 Mon Sep 17 00:00:00 2001 From: Benjamin Bengfort Date: Sat, 16 Jul 2016 07:14:50 -0400 Subject: [PATCH 3/8] plot function for mpmproc --- mpmprof | 128 +++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 127 insertions(+), 1 deletion(-) diff --git a/mpmprof b/mpmprof index 365b909..2ba575e 100755 --- a/mpmprof +++ b/mpmprof @@ -4,12 +4,22 @@ Multiprocessing version of memory profiling of Python programs. """ import os +import re import time import glob import argparse import subprocess import memory_profiler as mp +from collections import defaultdict + +try: + import numpy as np + import matplotlib.pyplot as plt +except ImportError: + plt = None + np = None + # Command Descriptions and Constants DESCRIPTION = "Multiprocessing memory profiling over time." @@ -82,6 +92,7 @@ def run_action(args): # Sleep for the given interval time.sleep(args.interval) + # Return the results of the run action return "memory profile written to {}".format(args.output) @@ -89,7 +100,122 @@ def plot_action(args): """ Use matplotlib to draw the memory usage of a mprofile .dat file. """ - raise NotImplementedError("Not implemented yet.") + if plt is None: + raise ImportError( + "matplotlib is needed for plotting." + ) + + def read_mprofile_file(path): + """ + Reads the specialized version of the mprofile for multiprocessing + """ + # Regular expression line parsers for parsing data + cmdre = re.compile(r"^CMDLINE\s+(.+)$") + memre = re.compile(r"^MEM\s+([\d\.e]+)\s+([\d\.e]+)$") + cldre = re.compile(r"^CHLD(\d+)\s+([\d\.e]+)\s+([\d\.e]+)$") + + # Data structure returned is a series of names (mem, ts) tuples. + series = defaultdict(list) + command = None + + with open(path, 'r') as f: + for line in f: + + # Match children memory usage lines + match = cldre.match(line) + if match: + idx, mem, ts = match.groups() + series["child " + idx].append((float(mem), float(ts))) + continue + + # Match main process memory usage lines + match = memre.match(line) + if match: + series['main'].append(tuple(map(float, match.groups()))) + continue + + # Match command line(s) + # NOTE: mprofile files are openeded for appending, could be multiple + match = cmdre.match(line) + if match: + command = match.groups()[0] + + return command, series + + + def plot_mprofile_file(path, title=None): + """ + Plots an mprofile file that contains specialized child process data. + """ + # Parse the mprofile file to get the data + command, series = read_mprofile_file(path) + title = title or command + + # Create and configure the figure + fig = plt.figure(figsize=(14, 6), dpi=90) + axe = fig.add_axes([0.1, 0.1, 0.6, 0.75]) + axe.set_xlabel("time (in seconds)") + axe.set_ylabel("memory used (in MiB)") + axe.set_title(title) + + # Find the start timestamp for the process and track the maximal memory point + # This currently assumes that the series were written in order + start = series['main'][0][1] + mpoint = (0, 0) + + # Plot all of the series, the main process and the child. + for proc, data in series.items(): + # Create the numpy arrays from the series data + ts = np.asarray([item[1] for item in data]) - start + mem = np.asarray([item[0] for item in data]) + + # Plot the line to the figure + plt.plot(ts, mem, "+-", label=proc) + + # Detect the maximal memory point + max_mem = mem.max() + if max_mem > mpoint[1]: + mpoint = (mem.argmax(), max_mem) + + # Add the marker lines for the maximal memory usage + plt.hlines(mpoint[1], plt.xlim()[0]+0.001, plt.xlim()[1] - 0.001, 'r', '--') + plt.vlines(ts[mpoint[0]], plt.ylim()[0]+0.001, plt.ylim()[1] - 0.001, 'r', '--') + + # Add the legend + legend = axe.legend(loc='center left', bbox_to_anchor=(1, 0.5)) + legend.get_frame().set_alpha(0.5) + axe.grid() + + # Get the latest profile if no profile files were passed in. + if not args.profile: + + # Glob profiles of our format and sort them. + profiles = glob.glob("mprofile_??????????????.dat") + profiles.sort() + + if not profiles: + raise ValueError(( + "No input file found.\nThis program looks for mprofile_*.dat " + "files generated by the `mpmprof run` command." + )) + + # Assign the latest profile to visualize + args.profile = profiles[0:1] + + # Filter out any files that do not exist + args.profile = list(filter(os.path.exists, args.profile)) + if not args.profile: + raise ValueError("No input files found!") + + # For each passed in file, create a figure from the mprofile. + for path in args.profile: + axe = plot_mprofile_file(path, args.title) + if args.output: + plt.savefig(args.output) + else: + plt.show() + + return "{} memory profiles plotted.".format(len(args.profile)) if __name__ == '__main__': From aab15edbc9f45968f1faf54a92a539b7a8a946bb Mon Sep 17 00:00:00 2001 From: Benjamin Bengfort Date: Mon, 20 Mar 2017 09:39:32 -0400 Subject: [PATCH 4/8] merge mpmprof into mprof --- examples/multiprocessing_example.py | 2 +- memory_profiler.py | 26 ++- mpmprof | 310 ---------------------------- mprof | 31 ++- 4 files changed, 53 insertions(+), 316 deletions(-) delete mode 100755 mpmprof diff --git a/examples/multiprocessing_example.py b/examples/multiprocessing_example.py index 88299b8..9818ede 100644 --- a/examples/multiprocessing_example.py +++ b/examples/multiprocessing_example.py @@ -2,7 +2,7 @@ An undecorated example of a script that allocates memory in multiprocessing workers to demonstrate the use of memory_profiler with multiple processes. -Run this script with mprof run -C python multiprocessing_example.py +Run this script with mprof run -M python multiprocessing_example.py You can then visualize the usage with mprof plot. """ diff --git a/memory_profiler.py b/memory_profiler.py index 5039727..c9c1110 100644 --- a/memory_profiler.py +++ b/memory_profiler.py @@ -240,8 +240,8 @@ def run(self): def memory_usage(proc=-1, interval=.1, timeout=None, timestamps=False, - include_children=False, max_usage=False, retval=False, - stream=None, backend=None): + include_children=False, multiprocess=False, max_usage=False, + retval=False, stream=None, backend=None): """ Return the memory usage of a process or piece of code @@ -272,6 +272,12 @@ def memory_usage(proc=-1, interval=.1, timeout=None, timestamps=False, timestamps : bool, optional if True, timestamps of memory usage measurement are collected as well. + include_children : bool, optional + if True, sum the memory of all forked processes as well + + multiprocess : bool, optional + if True, track the memory usage of all forked processes. + stream : File if stream is a File opened with write access, then results are written to this file instead of stored in memory and returned at the end of @@ -343,10 +349,18 @@ def memory_usage(proc=-1, interval=.1, timeout=None, timestamps=False, mem_usage = _get_memory( proc.pid, backend, timestamps=timestamps, include_children=include_children) + if stream is not None: stream.write("MEM {0:.6f} {1:.4f}\n".format(*mem_usage)) + + # Only write children to the stream file, warn if appending to the return. + if multiprocess: + for idx, chldmem in enumerate(_get_child_memory(proc.pid)): + stream.write("CHLD {0} {1:.6f} {2:.4f}\n".format(idx, chldmem, time.time())) else: ret.append(mem_usage) + if multiprocess: + warnings.warn("use include_children not multiprocess without a stream") else: ret = max(ret, _get_memory( @@ -377,8 +391,16 @@ def memory_usage(proc=-1, interval=.1, timeout=None, timestamps=False, include_children=include_children) if stream is not None: stream.write("MEM {0:.6f} {1:.4f}\n".format(*mem_usage)) + + # Only write children to the stream file, warn if appending to the return. + if multiprocess: + for idx, chldmem in enumerate(_get_child_memory(proc.pid)): + stream.write("CHLD {0} {1:.6f} {2:.4f}\n".format(idx, chldmem, time.time())) else: ret.append(mem_usage) + + if multiprocess: + warnings.warn("use include_children not multiprocess without a stream") else: ret = max([ret, _get_memory(proc, backend, include_children=include_children) diff --git a/mpmprof b/mpmprof deleted file mode 100755 index 2ba575e..0000000 --- a/mpmprof +++ /dev/null @@ -1,310 +0,0 @@ -#!/usr/bin/env python3 -""" -Multiprocessing version of memory profiling of Python programs. -""" - -import os -import re -import time -import glob -import argparse -import subprocess -import memory_profiler as mp - -from collections import defaultdict - -try: - import numpy as np - import matplotlib.pyplot as plt -except ImportError: - plt = None - np = None - - -# Command Descriptions and Constants -DESCRIPTION = "Multiprocessing memory profiling over time." -EPILOG = "If there are any bugs or concerns, submit an issue on Github" -VERSION = "mpmprof v{}".format(mp.__version__) -FILETIME = "%Y%m%d%H%M%S" -BLANKS = set(' \t') - - -def run_action(args): - """ - Run the given program and profile its memory usage. - """ - - # Determine where to write the output to - if args.output is None: - args.output = "mprofile_{}.dat".format( - time.strftime(FILETIME, time.localtime()) - ) - - # Determine if the command is a Python command - if args.command[0].endswith('.py') and not args.nopython: - args.python = True - - # Run the executable with the extra features - if args.python: - print("running as a Python program ...") - if not args.command[0].startswith('python'): - args.command.insert(0, 'python') - - # Inform the user we're sampling - print("mpmprof: Sampling memory every {} seconds".format(args.interval)) - - # Put the command back together from the argument parsing - command = " ".join([ - c if BLANKS.isdisjoint(c) else "'{}'".format(c) for c in args.command - ]) - - # Open a subprocess to the given command - proc = subprocess.Popen(args.command) - - # This is where a call to mp.memory_usage should go. - # Instead we're adding the custom code for sampling spawned memory - with open(args.output, "a") as f: - - # Write the command to the data file - f.write("CMDLINE {}\n".format(command)) - - # Continue sampling until the subprocess is over, counting lines - lines = 0 - while True: - # Determine if the subprocess is still running - if proc.poll() is not None: break - - # Collect memory usage of master program and write to profile - mem = mp._get_memory(proc.pid) - f.write("MEM {0:.6f} {1:.4f}\n".format(mem, time.time())) - lines += 1 - - # Collect memory usage of spawned children and write to profile - for idx, mem in enumerate(mp._get_child_memory(proc.pid)): - f.write("CHLD{0} {1:.6f} {2:.4f}\n".format(idx, mem, time.time())) - lines += 1 - - # Flush every 50 lines - if lines > 50: - lines = 0 - f.flush() - - # Sleep for the given interval - time.sleep(args.interval) - - # Return the results of the run action - return "memory profile written to {}".format(args.output) - - -def plot_action(args): - """ - Use matplotlib to draw the memory usage of a mprofile .dat file. - """ - if plt is None: - raise ImportError( - "matplotlib is needed for plotting." - ) - - def read_mprofile_file(path): - """ - Reads the specialized version of the mprofile for multiprocessing - """ - # Regular expression line parsers for parsing data - cmdre = re.compile(r"^CMDLINE\s+(.+)$") - memre = re.compile(r"^MEM\s+([\d\.e]+)\s+([\d\.e]+)$") - cldre = re.compile(r"^CHLD(\d+)\s+([\d\.e]+)\s+([\d\.e]+)$") - - # Data structure returned is a series of names (mem, ts) tuples. - series = defaultdict(list) - command = None - - with open(path, 'r') as f: - for line in f: - - # Match children memory usage lines - match = cldre.match(line) - if match: - idx, mem, ts = match.groups() - series["child " + idx].append((float(mem), float(ts))) - continue - - # Match main process memory usage lines - match = memre.match(line) - if match: - series['main'].append(tuple(map(float, match.groups()))) - continue - - # Match command line(s) - # NOTE: mprofile files are openeded for appending, could be multiple - match = cmdre.match(line) - if match: - command = match.groups()[0] - - return command, series - - - def plot_mprofile_file(path, title=None): - """ - Plots an mprofile file that contains specialized child process data. - """ - # Parse the mprofile file to get the data - command, series = read_mprofile_file(path) - title = title or command - - # Create and configure the figure - fig = plt.figure(figsize=(14, 6), dpi=90) - axe = fig.add_axes([0.1, 0.1, 0.6, 0.75]) - axe.set_xlabel("time (in seconds)") - axe.set_ylabel("memory used (in MiB)") - axe.set_title(title) - - # Find the start timestamp for the process and track the maximal memory point - # This currently assumes that the series were written in order - start = series['main'][0][1] - mpoint = (0, 0) - - # Plot all of the series, the main process and the child. - for proc, data in series.items(): - # Create the numpy arrays from the series data - ts = np.asarray([item[1] for item in data]) - start - mem = np.asarray([item[0] for item in data]) - - # Plot the line to the figure - plt.plot(ts, mem, "+-", label=proc) - - # Detect the maximal memory point - max_mem = mem.max() - if max_mem > mpoint[1]: - mpoint = (mem.argmax(), max_mem) - - # Add the marker lines for the maximal memory usage - plt.hlines(mpoint[1], plt.xlim()[0]+0.001, plt.xlim()[1] - 0.001, 'r', '--') - plt.vlines(ts[mpoint[0]], plt.ylim()[0]+0.001, plt.ylim()[1] - 0.001, 'r', '--') - - # Add the legend - legend = axe.legend(loc='center left', bbox_to_anchor=(1, 0.5)) - legend.get_frame().set_alpha(0.5) - axe.grid() - - # Get the latest profile if no profile files were passed in. - if not args.profile: - - # Glob profiles of our format and sort them. - profiles = glob.glob("mprofile_??????????????.dat") - profiles.sort() - - if not profiles: - raise ValueError(( - "No input file found.\nThis program looks for mprofile_*.dat " - "files generated by the `mpmprof run` command." - )) - - # Assign the latest profile to visualize - args.profile = profiles[0:1] - - # Filter out any files that do not exist - args.profile = list(filter(os.path.exists, args.profile)) - if not args.profile: - raise ValueError("No input files found!") - - # For each passed in file, create a figure from the mprofile. - for path in args.profile: - axe = plot_mprofile_file(path, args.title) - if args.output: - plt.savefig(args.output) - else: - plt.show() - - return "{} memory profiles plotted.".format(len(args.profile)) - - -if __name__ == '__main__': - # Create the argument parser and subparsers for each command - parser = argparse.ArgumentParser(description=DESCRIPTION, epilog=EPILOG) - subparsers = parser.add_subparsers(title='commands') - - # Add the version command - parser.add_argument('-v', '--version', action='version', version=VERSION) - - # Commands defined in an dictionary for easy adding - commands = ( - # Run command definition - { - 'name': 'run', - 'action': run_action, - 'help': 'monitor the memory usage of a command', - 'args': { - '--python': { - 'default': False, - 'action': 'store_true', - 'help': 'activates extra features for Python programs', - }, - '--nopython': { - 'default': False, - 'action': 'store_true', - 'help': 'disables extra features for Python programs', - }, - ('-T', '--interval'): { - 'type': float, - 'default': 0.1, - 'metavar': 'S', - 'help': 'sampling period (in seconds), defaults to 0.1', - }, - ('-o', '--output'): { - 'type': str, - 'default': None, - 'metavar': 'PATH', - 'help': 'location to write the memory profiler output to', - }, - 'command': { - 'nargs': argparse.REMAINDER, - 'help': 'command to run and profile memory usage', - } - } - }, - - # Plot command definition - { - 'name': 'plot', - 'action': plot_action, - 'help': 'plot the memory usage of a mprofile data file', - 'args': { - ('-t', '--title'): { - 'type': str, - 'default': None, - 'metavar': 'S', - 'help': 'set the title of the figure', - }, - ('-o', '--output'): { - 'type': str, - 'default': None, - 'metavar': 'PATH', - 'help': 'write the figure as a png to disk' - }, - 'profile': { - 'nargs': '*', - 'help': 'profile to plot, omit to use the latest', - } - } - } - ) - - # Add the commands and their arguments. - for cmd in commands: - # Create the command subparser and add the action - cmd_parser = subparsers.add_parser(cmd['name'], help=cmd['help']) - cmd_parser.set_defaults(func=cmd['action']) - - # Add the arguments - for args, kwargs in cmd['args'].items(): - if isinstance(args, str): - args = (args,) - cmd_parser.add_argument(*args, **kwargs) - - # Handle input from the command line - args = parser.parse_args() # Parse the arguments - # try: - msg = args.func(args) # Call the default function - parser.exit(0, msg+"\n") # Exit cleanly with message - # except Exception as e: - # parser.error(str(e)) # Exit with error diff --git a/mprof b/mprof index 192ba10..3c81a01 100755 --- a/mprof +++ b/mprof @@ -9,6 +9,7 @@ import copy import time import math +from collections import defaultdict from optparse import OptionParser, OptionValueError import memory_profiler as mp @@ -195,6 +196,9 @@ def run_action(): parser.add_option("--include-children", "-C", dest="include_children", default=False, action="store_true", help="""Monitors forked processes as well (sum up all process memory)""") + parser.add_option("--multiprocess", "-M", dest="multiprocess", + default=False, action="store_true", + help="""Monitors forked processes creating individual plots for each child""") (options, args) = parser.parse_args() @@ -231,7 +235,8 @@ def run_action(): with open(mprofile_output, "a") as f: f.write("CMDLINE {0}\n".format(cmd_line)) mp.memory_usage(proc=p, interval=options.interval, timestamps=True, - include_children=options.include_children, stream=f) + include_children=options.include_children, + multiprocess=options.multiprocess, stream=f) def add_brackets(xloc, yloc, xshift=0, color="r", label=None, options=None): @@ -299,6 +304,7 @@ def read_mprofile_file(filename): func_ts = {} mem_usage = [] timestamp = [] + children = defaultdict(list) cmd_line = None f = open(filename, "r") for l in f: @@ -319,6 +325,13 @@ def read_mprofile_file(filename): float(mem_start), float(mem_end)]) func_ts[f_name] = ts + elif field == "CHLD": + values = value.split(' ') + chldnum = values[0] + children[chldnum].append( + (float(values[1]), float(values[2])) + ) + elif field == "CMDLINE": cmd_line = value else: @@ -327,10 +340,10 @@ def read_mprofile_file(filename): return {"mem_usage": mem_usage, "timestamp": timestamp, "func_timestamp": func_ts, 'filename': filename, - 'cmd_line': cmd_line} + 'cmd_line': cmd_line, 'children': children} -def plot_file(filename, index=0, timestamps=True, options=None): +def plot_file(filename, index=0, timestamps=True, children=True, options=None): try: import pylab as pl except ImportError: @@ -351,6 +364,7 @@ def plot_file(filename, index=0, timestamps=True, options=None): ts = mprofile['func_timestamp'] t = mprofile['timestamp'] mem = mprofile['mem_usage'] + chld = mprofile['children'] if len(ts) > 0: for values in ts.values(): @@ -384,6 +398,17 @@ def plot_file(filename, index=0, timestamps=True, options=None): bottom += 0.001 top -= 0.001 + # plot children, if any + if len(chld) > 0 and children: + for idx, (proc, data) in enumerate(chld.items()): + # Create the numpy arrays from the series data + cts = np.asarray([item[1] for item in data]) - global_start + cmem = np.asarray([item[0] for item in data]) + + # Plot the line to the figure + pl.plot(cts, cmem, "+-" + mem_line_colors[idx+1 % len(mem_line_colors)], + label="child {}".format(proc)) + # plot timestamps, if any if len(ts) > 0 and timestamps: func_num = 0 From f1cee23af020607ff0c76f23db7f902752dd8d6a Mon Sep 17 00:00:00 2001 From: Benjamin Bengfort Date: Mon, 20 Mar 2017 11:34:24 -0400 Subject: [PATCH 5/8] added max child usage marker and update readme --- README.rst | 51 +++++++++++++++++++++++++++++++++++++++++++-------- mprof | 11 +++++++++++ 2 files changed, 54 insertions(+), 8 deletions(-) diff --git a/README.rst b/README.rst index 1cbfee1..3c34ad3 100644 --- a/README.rst +++ b/README.rst @@ -107,10 +107,10 @@ decorator function. Use as follows:: del b return a -If a python script with decorator ``@profile`` is called using ``-m +If a python script with decorator ``@profile`` is called using ``-m memory_profiler`` in the command line, the ``precision`` parameter is ignored. -Time-based memory usage +Time-based memory usage ========================== Sometimes it is useful to have full memory usage reports as a function of time (not line-by-line) of external processes (be it Python scripts or not). @@ -131,14 +131,14 @@ e.g. `mprof run -h`. In the case of a Python script, using the previous command does not give you any information on which function is executed at a given time. Depending on the case, it can be difficult to identify the part -of the code that is causing the highest memory usage. +of the code that is causing the highest memory usage. Adding the `profile` decorator to a function and running the Python -script with +script with mprof run