Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: Independent child process monitoring #118 #134

Merged
merged 9 commits into from
Mar 21, 2017
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,6 @@ MANIFEST
*.egg-info
*.pyc
*~

# Ignore mprof generated files
mprofile_*.dat
55 changes: 55 additions & 0 deletions examples/multiprocessing_example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
"""
An undecorated example of a script that allocates memory in multiprocessing
workers to demonstrate the use of memory_profiler with multiple processes.

Run this script with mprof run -C python multiprocessing_example.py
You can then visualize the usage with mprof plot.
"""

import time
import multiprocessing as mp

# Big numbers
X6 = 10 ** 6
X7 = 10 ** 7


def worker(num, wait, amt=X6):
"""
A function that allocates memory over time.
"""
frame = []

for idx in range(num):
frame.extend([1] * amt)
time.sleep(wait)

del frame


def main_sequential():
"""
A sequential version of the work, where one worker is called at a time.
"""
worker(5, 5, X6)
worker(5, 2, X7)
worker(5, 5, X6)
worker(5, 2, X7)


def main_multiproc():
"""
A multiprocessing version of the work, where workers work in their own
child processes and are collected by the master process.
"""
pool = mp.Pool(processes=4)
tasks = [
pool.apply_async(worker, args) for args in
[(5, 5, X6), (5, 2, X7), (5, 5, X6), (5, 2, X7)]
]

results = [p.get() for p in tasks]


if __name__ == '__main__':
main_multiproc()
47 changes: 38 additions & 9 deletions memory_profiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,38 @@ def _repr_pretty_(self, p, cycle):
p.text(u'<MemitResult : ' + msg + u'>')


def _get_child_memory(process, meminfo_attr=None):
"""
Returns a generator that yields memory for all child processes.
"""
if not has_psutil:
raise NotImplementedError((
"The psutil module is required to monitor the "
"memory usage of child processes."
))

# Convert a pid to a process
if isinstance(process, int):
if process == -1: process = os.getpid()
process = psutil.Process(process)

if not meminfo_attr:
# Use the psutil 2.0 attr if the older version isn't passed in.
meminfo_attr = 'memory_info' if hasattr(process, 'memory_info') else 'get_memory_info'

# Select the psutil function get the children similar to how we selected
# the memory_info attr (a change from excepting the AttributeError).
children_attr = 'children' if hasattr(process, 'children') else 'get_children'

# Loop over the child processes and yield their memory
try:
for child in getattr(process, children_attr)(recursive=True):
yield getattr(child, meminfo_attr)()[0] / _TWO_20
except psutil.NoSuchProcess:
# https://github.com/fabianp/memory_profiler/issues/71
yield 0.0


def _get_memory(pid, backend, timestamps=False, include_children=False, filename=None):
# .. low function to get memory consumption ..
if pid == -1:
Expand All @@ -111,12 +143,7 @@ def ps_util_tool():
else 'get_memory_info'
mem = getattr(process, meminfo_attr)()[0] / _TWO_20
if include_children:
try:
for p in process.children(recursive=True):
mem += getattr(p, meminfo_attr)()[0] / _TWO_20
except psutil.NoSuchProcess:
# https://github.com/fabianp/memory_profiler/issues/71
pass
mem += sum(_get_child_memory(process, meminfo_attr))

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi, quick question, as processes in modern linux share common memory after forking from their parent and only copy it on writes, wouldn't this sum report the wrong number?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unfortunately I'm not sure of the behavior of psutil with regards to linux forks; I simply used the same meminfo_attr as the summation function. If that's true then this is indeed a problem, but I'd guess that we'd have to dig into psutil to figure it out.

if timestamps:
return mem, time.time()
else:
Expand All @@ -128,9 +155,11 @@ def ps_util_tool():
def posix_tool():
# .. scary stuff ..
if include_children:
raise NotImplementedError('The psutil module is required when to'
' monitor memory usage of children'
' processes')
raise NotImplementedError((
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry about this, I think this was just a carry over from the historical merge; I can reset back to the original if needed.

"The psutil module is required to monitor the "
"memory usage of child processes."
))

warnings.warn("psutil module not found. memory_profiler will be slow")
# ..
# .. memory usage in MiB ..
Expand Down
Loading