From 4c3d65a81f5ca2889cef1238f7876a96d9396150 Mon Sep 17 00:00:00 2001 From: Tyler Mathis <35553152+tsmathis@users.noreply.github.com> Date: Fri, 2 Jun 2023 14:54:35 -0700 Subject: [PATCH 01/17] Feat: Basic implementation of memray profiler for mrun --- src/maggma/cli/__init__.py | 50 ++++++++++++++++++++++++++++++++------ 1 file changed, 43 insertions(+), 7 deletions(-) diff --git a/src/maggma/cli/__init__.py b/src/maggma/cli/__init__.py index 899ce1a34..f91562833 100644 --- a/src/maggma/cli/__init__.py +++ b/src/maggma/cli/__init__.py @@ -6,6 +6,7 @@ import logging import sys from itertools import chain +from contextlib import nullcontext import click from monty.serialization import loadfn @@ -16,8 +17,10 @@ from maggma.cli.source_loader import ScriptFinder, load_builder_from_source from maggma.utils import ReportingHandler, TqdmLoggingHandler +from memray import Tracker, FileDestination + sys.meta_path.append(ScriptFinder()) - + @click.command() @click.argument("builders", nargs=-1, type=click.Path(exists=True), required=True) @@ -44,14 +47,17 @@ help="Store in JSON/YAML form to send reporting data to", type=click.Path(exists=True), ) -@click.option("-u", "--url", "url", default=None, type=str, help="URL for the distributed manager") +@click.option( + "-u", "--url", "url", default=None, type=str, help="URL for the distributed manager" +) @click.option( "-p", "--port", "port", default=None, type=int, - help="Port for distributed communication." " mrun will find an open port if None is provided to the manager", + help="Port for distributed communication." + " mrun will find an open port if None is provided to the manager", ) @click.option( "-N", @@ -69,8 +75,12 @@ type=int, help="Number of distributed workers to process chunks", ) -@click.option("--no_bars", is_flag=True, help="Turns of Progress Bars for headless operations") -@click.option("--rabbitmq", is_flag=True, help="Enables the use of RabbitMQ as the work broker") +@click.option( + "--no_bars", is_flag=True, help="Turns of Progress Bars for headless operations" +) +@click.option( + "--rabbitmq", is_flag=True, help="Enables the use of RabbitMQ as the work broker" +) @click.option( "-q", "--queue_prefix", @@ -79,7 +89,17 @@ type=str, help="Prefix to use in queue names when RabbitMQ is select as the broker", ) +@click.option( + "-m", + "--memray", + "memray", + default=False, + type=bool, + help="Option to profile builder memory usage with Memray", +) +@click.pass_context def run( + ctx, builders, verbosity, reporting_store, @@ -91,7 +111,19 @@ def run( num_processes, rabbitmq, queue_prefix, + memray, ): + if memray: + ctx.obj = ctx.with_resource( + Tracker( + destination=FileDestination( + "/home/tsmathis/dev/fermi_builder/memray_logs/log_test.bin" + ), + native_traces=False, + trace_python_allocators=False, + follow_fork=False, + ) + ) # Import proper manager and worker if rabbitmq: from maggma.cli.rabbitmq import manager, worker @@ -104,7 +136,9 @@ def run( root = logging.getLogger() root.setLevel(level) ch = TqdmLoggingHandler() - formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s") + formatter = logging.Formatter( + "%(asctime)s - %(name)s - %(levelname)s - %(message)s" + ) ch.setFormatter(formatter) root.addHandler(ch) @@ -167,4 +201,6 @@ def run( else: loop = asyncio.get_event_loop() for builder in builder_objects: - loop.run_until_complete(multi(builder=builder, num_processes=num_processes, no_bars=no_bars)) + loop.run_until_complete( + multi(builder=builder, num_processes=num_processes, no_bars=no_bars) + ) From 4adddc9f2507e928f460bd7a4269433c1ac7de0d Mon Sep 17 00:00:00 2001 From: Tyler Mathis <35553152+tsmathis@users.noreply.github.com> Date: Fri, 2 Jun 2023 17:49:01 -0700 Subject: [PATCH 02/17] Enable memray to profile forked processes --- src/maggma/cli/__init__.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/maggma/cli/__init__.py b/src/maggma/cli/__init__.py index f91562833..40f108c47 100644 --- a/src/maggma/cli/__init__.py +++ b/src/maggma/cli/__init__.py @@ -6,7 +6,6 @@ import logging import sys from itertools import chain -from contextlib import nullcontext import click from monty.serialization import loadfn @@ -114,14 +113,17 @@ def run( memray, ): if memray: + follow_fork = False + if num_processes > 1: + follow_fork = True ctx.obj = ctx.with_resource( Tracker( destination=FileDestination( - "/home/tsmathis/dev/fermi_builder/memray_logs/log_test.bin" + "/home/tsmathis/dev/fermi_builder/memray_logs/mp_log_test.bin" ), native_traces=False, trace_python_allocators=False, - follow_fork=False, + follow_fork=follow_fork, ) ) # Import proper manager and worker From 220eecaa99daef43b4b60442785446312ae8a9c0 Mon Sep 17 00:00:00 2001 From: Tyler Mathis <35553152+tsmathis@users.noreply.github.com> Date: Tue, 6 Jun 2023 15:32:43 -0700 Subject: [PATCH 03/17] Update cli settings to get system's temp directory to dump .bin files for memory profiler --- src/maggma/cli/settings.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/maggma/cli/settings.py b/src/maggma/cli/settings.py index 674b6f3a0..781406013 100644 --- a/src/maggma/cli/settings.py +++ b/src/maggma/cli/settings.py @@ -1,8 +1,13 @@ +from pathlib import Path +import platform +import tempfile + from pydantic import BaseSettings, Field +tempdir = "/tmp" if platform.system() == "Darwin" else tempfile.gettempdir() -class CLISettings(BaseSettings): +class CLISettings(BaseSettings): WORKER_TIMEOUT: int = Field( 3600, description="Timeout in seconds for a distributed worker", @@ -13,6 +18,11 @@ class CLISettings(BaseSettings): description="Timeout in seconds for the worker manager", ) + TEMP_DIR: str = Field( + tempdir, + description="Directory that memory profile .bin files are dumped to", + ) + class Config: env_prefix = "MAGGMA_" extra = "ignore" From 8482705df3a1d63f211fdfc6b74db38a04d2e8a1 Mon Sep 17 00:00:00 2001 From: Tyler Mathis <35553152+tsmathis@users.noreply.github.com> Date: Tue, 6 Jun 2023 15:33:57 -0700 Subject: [PATCH 04/17] Update requirements.txt and setup.py to include memray dependency for memory profiling --- requirements.txt | 1 + setup.py | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 938e2affa..c9fd1162d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -19,3 +19,4 @@ orjson==3.8.0 boto3==1.24.42 python-dateutil==2.8.2 pydantic +memray==1.7.0 diff --git a/setup.py b/setup.py index 3de499302..c8e65808f 100644 --- a/setup.py +++ b/setup.py @@ -45,7 +45,8 @@ "msgpack>=0.5.6", "orjson>=3.6.0", "boto3>=1.20.41", - "python-dateutil>=2.8.2" + "python-dateutil>=2.8.2", + "memray>=1.7.0", ], extras_require={ "vault": ["hvac>=0.9.5"], From 2e4c2aec28d8e7de535a1ae30791af0c33860ecc Mon Sep 17 00:00:00 2001 From: Tyler Mathis <35553152+tsmathis@users.noreply.github.com> Date: Tue, 6 Jun 2023 16:23:55 -0700 Subject: [PATCH 05/17] Update memray .bin file write destination to default to the user's platform specific temp directory --- src/maggma/cli/__init__.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/maggma/cli/__init__.py b/src/maggma/cli/__init__.py index 40f108c47..65fa3f03f 100644 --- a/src/maggma/cli/__init__.py +++ b/src/maggma/cli/__init__.py @@ -6,6 +6,7 @@ import logging import sys from itertools import chain +from datetime import datetime import click from monty.serialization import loadfn @@ -14,12 +15,15 @@ from maggma.cli.multiprocessing import multi from maggma.cli.serial import serial from maggma.cli.source_loader import ScriptFinder, load_builder_from_source +from maggma.cli.settings import CLISettings from maggma.utils import ReportingHandler, TqdmLoggingHandler from memray import Tracker, FileDestination sys.meta_path.append(ScriptFinder()) +settings = CLISettings() + @click.command() @click.argument("builders", nargs=-1, type=click.Path(exists=True), required=True) @@ -119,7 +123,7 @@ def run( ctx.obj = ctx.with_resource( Tracker( destination=FileDestination( - "/home/tsmathis/dev/fermi_builder/memray_logs/mp_log_test.bin" + f"{settings.TEMP_DIR}/{builders[0]}_{datetime.now().isoformat()}.bin" ), native_traces=False, trace_python_allocators=False, From ce667902d9399d8ec84d34780dcd6b41cd45729e Mon Sep 17 00:00:00 2001 From: Tyler Mathis <35553152+tsmathis@users.noreply.github.com> Date: Fri, 9 Jun 2023 11:04:20 -0700 Subject: [PATCH 06/17] Reverts changes in commit 8482705df3a1d63f211fdfc6b74db38a04d2e8a1, removing memray dependancy from requiremnets.txt and setup.py --- requirements.txt | 1 - setup.py | 3 +-- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/requirements.txt b/requirements.txt index 324efe53f..3568d96a2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -19,5 +19,4 @@ orjson==3.8.14 boto3==1.24.42 python-dateutil==2.8.2 pydantic -memray==1.7.0 ruamel.yaml<0.18 diff --git a/setup.py b/setup.py index 647d25714..f0c60dab2 100644 --- a/setup.py +++ b/setup.py @@ -46,13 +46,12 @@ "orjson>=3.6.0", "boto3>=1.20.41", "python-dateutil>=2.8.2", - "memray>=1.7.0", ], extras_require={ "vault": ["hvac>=0.9.5"], "montydb": ["montydb>=2.3.12"], "notebook_runner": ["IPython>=7.16", "nbformat>=5.0", "regex>=2020.6"], - "azure": ["azure-storage-blob>=12.16.0", "azure-identity>=1.12.0"] + "azure": ["azure-storage-blob>=12.16.0", "azure-identity>=1.12.0"], }, classifiers=[ "Programming Language :: Python :: 3", From 160fcd91fe080969327ece4d75f5327b954318ce Mon Sep 17 00:00:00 2001 From: Tyler Mathis <35553152+tsmathis@users.noreply.github.com> Date: Fri, 9 Jun 2023 11:06:37 -0700 Subject: [PATCH 07/17] Add memray to requirements-optional.txt to keep base maggma installation lightweight --- requirements-optional.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements-optional.txt b/requirements-optional.txt index 972c2eb54..02fbf826b 100644 --- a/requirements-optional.txt +++ b/requirements-optional.txt @@ -6,3 +6,4 @@ regex==2022.9.13 montydb==2.4.0 azure-storage-blob==12.16.0 azure-identity==1.12.0 +memray==1.7.0 From 30b6649f19da0ca430530bdf23c1c748da685e90 Mon Sep 17 00:00:00 2001 From: Tyler Mathis <35553152+tsmathis@users.noreply.github.com> Date: Fri, 9 Jun 2023 11:39:52 -0700 Subject: [PATCH 08/17] Ensure section headings are all title case --- docs/getting_started/running_builders.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/getting_started/running_builders.md b/docs/getting_started/running_builders.md index a5f44c0aa..e3b10bd9b 100644 --- a/docs/getting_started/running_builders.md +++ b/docs/getting_started/running_builders.md @@ -100,7 +100,7 @@ mrun -n 2 -v my_builder.py ``` -## Running multiple builders +## Running Multiple Builders `mrun` can run multiple builders. You can have multiple builders in a single file: `json`, `python`, or `jupyter-notebook`. Or you can chain multiple files in the order you want to run them: ``` shell From c17829d7a81526f7a1ee4885847e0ba29e3768b6 Mon Sep 17 00:00:00 2001 From: Tyler Mathis <35553152+tsmathis@users.noreply.github.com> Date: Fri, 9 Jun 2023 14:01:15 -0700 Subject: [PATCH 09/17] Automatically generate flamegraph after builder finishes for user convenience --- src/maggma/cli/__init__.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/src/maggma/cli/__init__.py b/src/maggma/cli/__init__.py index 65fa3f03f..a98ce2ae2 100644 --- a/src/maggma/cli/__init__.py +++ b/src/maggma/cli/__init__.py @@ -118,13 +118,17 @@ def run( ): if memray: follow_fork = False + if num_processes > 1: follow_fork = True + + memray_file = ( + f"{settings.TEMP_DIR}/{builders[0]}_{datetime.now().isoformat()}.bin" + ) + ctx.obj = ctx.with_resource( Tracker( - destination=FileDestination( - f"{settings.TEMP_DIR}/{builders[0]}_{datetime.now().isoformat()}.bin" - ), + destination=FileDestination(memray_file), native_traces=False, trace_python_allocators=False, follow_fork=follow_fork, @@ -210,3 +214,8 @@ def run( loop.run_until_complete( multi(builder=builder, num_processes=num_processes, no_bars=no_bars) ) + + if memray_file: + import subprocess + + subprocess.run(["memray", "flamegraph", memray_file], shell=False) From 95a32ab4ba8337d553eccf613637d2ad83d3d381 Mon Sep 17 00:00:00 2001 From: Tyler Mathis <35553152+tsmathis@users.noreply.github.com> Date: Fri, 9 Jun 2023 14:49:53 -0700 Subject: [PATCH 10/17] Add Memory Profiling section to Running Builders documentation --- docs/getting_started/running_builders.md | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/docs/getting_started/running_builders.md b/docs/getting_started/running_builders.md index e3b10bd9b..54f07df7d 100644 --- a/docs/getting_started/running_builders.md +++ b/docs/getting_started/running_builders.md @@ -119,3 +119,23 @@ mrun -n 32 -vv my_first_builder.json builder_2_and_3.py last_builder.ipynb * `BUILD_ENDED` - This event tells us the build process finished this specific builder. It also indicates the total number of `errors` and `warnings` that were caught during the process. These event docs also contain the `builder`, a `build_id` which is unique for each time a builder is run and anonymous but unique ID for the machine the builder was run on. + + +## Profiling Memory Usage of Builders + +`mrun` can optionally profile the memory usage of a running builder by using the Memray Python memory profiling tool ([Memray](https://github.com/bloomberg/memray)). To get started, `maggma` will first need to be installed from source ([Maggma installation](https://materialsproject.github.io/maggma/#installation-from-source)) followed by `pip` installing Memray using `pip install memray`, or by installing the optional `maggma` requirements by using `pip install requirements-optional.txt` in the `maggma` base directory. + +Setting the `--memray` (`-m`) option to `on`, or `True`, will signal `mrun` to profile the memory usage of any builders passed to `mrun` as the builders are running. The profiler also supports profiling of both single and forked processes. For example, spawning multiple processes in `mrun` with `-n` will signal the profiler to track any forked child processes spawned from the parent process. + +A basic invocation of the memory profiler using the `mrun` command line tool would look like this: +``` shell +mrun --memray on my_builder.json +``` + +The profiler will generate two files after the builder finishes: +1. An output `.bin` file that is dumped by default into the `temp` directory, which is platform/OS dependent. For Linux/MacOS this will be `/tmp/` and for Windows the target directory will be `C:\TEMP\`.The output file will have a generic naming pattern as follows: `BUILDER_NAME_PASSED_TO_MRUN + BUILDER_START_DATETIME_ISO.bin`, e.g., `my_builder.json_2023-06-09T13:57:48.446361.bin`. +2. A `.html` flamegraph file that will be written to the same directory as the `.bin` dump file. The flamegraph will have a naming pattern similar to the following: `memray-flamegraph-my_builder.json_2023-06-09T13:57:48.446361.html`. The flamegraph can be viewed using any web browser. + +***Note***: Different platforms/operating systems purge their system's `temp` directory at different intervals. It is recommended to move at least the `.bin` file to a more stable location. The `.bin` file can be used to recreate the flamegraph at anytime using the Memray CLI. + +Further data visualization and transform examples can be found in Memray's documentation ([Memray reporters](https://bloomberg.github.io/memray/live.html)). \ No newline at end of file From 6e25bb7d9759f9713873fbaaddf799182507b2c9 Mon Sep 17 00:00:00 2001 From: Tyler Mathis <35553152+tsmathis@users.noreply.github.com> Date: Fri, 9 Jun 2023 17:18:38 -0700 Subject: [PATCH 11/17] Add option for user to supply target directory for profiler output files --- src/maggma/cli/__init__.py | 32 ++++++++++++++++++++++++++------ 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/src/maggma/cli/__init__.py b/src/maggma/cli/__init__.py index a98ce2ae2..be4cb1a44 100644 --- a/src/maggma/cli/__init__.py +++ b/src/maggma/cli/__init__.py @@ -18,7 +18,6 @@ from maggma.cli.settings import CLISettings from maggma.utils import ReportingHandler, TqdmLoggingHandler -from memray import Tracker, FileDestination sys.meta_path.append(ScriptFinder()) @@ -100,6 +99,14 @@ type=bool, help="Option to profile builder memory usage with Memray", ) +@click.option( + "-md", + "--memray-dir", + "memray_dir", + default=None, + type=str, + help="Directory to dump memory profiler output files. Only runs if --memray is True. Will create directory if directory does not exist, mimicking mkdir -p command. If not provided files will be dumped to system's temp directory", +) @click.pass_context def run( ctx, @@ -115,17 +122,29 @@ def run( rabbitmq, queue_prefix, memray, + memray_dir, + memray_file=None, + follow_fork=False, ): + # Import profiler and setup directories to dump profiler output if memray: - follow_fork = False + from memray import Tracker, FileDestination + + if memray_dir: + import os + + os.makedirs(memray_dir, exist_ok=True) + + memray_file = f"{memray_dir}/{builders[0]}_{datetime.now().isoformat()}.bin" + else: + memray_file = ( + f"{settings.TEMP_DIR}/{builders[0]}_{datetime.now().isoformat()}.bin" + ) if num_processes > 1: follow_fork = True - memray_file = ( - f"{settings.TEMP_DIR}/{builders[0]}_{datetime.now().isoformat()}.bin" - ) - + # Click context manager handles creation and clean up of profiler dump files for memray tracker ctx.obj = ctx.with_resource( Tracker( destination=FileDestination(memray_file), @@ -134,6 +153,7 @@ def run( follow_fork=follow_fork, ) ) + # Import proper manager and worker if rabbitmq: from maggma.cli.rabbitmq import manager, worker From 279538f688bcf57dd70e80394d6518de39533fdf Mon Sep 17 00:00:00 2001 From: Tyler Mathis <35553152+tsmathis@users.noreply.github.com> Date: Fri, 9 Jun 2023 19:54:57 -0700 Subject: [PATCH 12/17] Fix --memray-dir help text line length for linting --- src/maggma/cli/__init__.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/maggma/cli/__init__.py b/src/maggma/cli/__init__.py index be4cb1a44..f7033bfd3 100644 --- a/src/maggma/cli/__init__.py +++ b/src/maggma/cli/__init__.py @@ -105,7 +105,9 @@ "memray_dir", default=None, type=str, - help="Directory to dump memory profiler output files. Only runs if --memray is True. Will create directory if directory does not exist, mimicking mkdir -p command. If not provided files will be dumped to system's temp directory", + help="""Directory to dump memory profiler output files. Only runs if --memray is True. + Will create directory if directory does not exist, mimicking mkdir -p command. + If not provided files will be dumped to system's temp directory""", ) @click.pass_context def run( From a4474330feec37ce9c5f028196536078b8716693 Mon Sep 17 00:00:00 2001 From: Tyler Mathis <35553152+tsmathis@users.noreply.github.com> Date: Mon, 12 Jun 2023 14:38:06 -0700 Subject: [PATCH 13/17] Remove unused import --- src/maggma/cli/settings.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/maggma/cli/settings.py b/src/maggma/cli/settings.py index 1006f4f70..882f5eb31 100644 --- a/src/maggma/cli/settings.py +++ b/src/maggma/cli/settings.py @@ -1,4 +1,3 @@ -from pathlib import Path import platform import tempfile From a6e53a9c78695c76861c691b5ea8d7d2d726c6df Mon Sep 17 00:00:00 2001 From: Tyler Mathis <35553152+tsmathis@users.noreply.github.com> Date: Mon, 12 Jun 2023 14:39:05 -0700 Subject: [PATCH 14/17] Remove trailing whitespaces --- src/maggma/cli/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/maggma/cli/__init__.py b/src/maggma/cli/__init__.py index f7033bfd3..13dc7d9ed 100644 --- a/src/maggma/cli/__init__.py +++ b/src/maggma/cli/__init__.py @@ -105,8 +105,8 @@ "memray_dir", default=None, type=str, - help="""Directory to dump memory profiler output files. Only runs if --memray is True. - Will create directory if directory does not exist, mimicking mkdir -p command. + help="""Directory to dump memory profiler output files. Only runs if --memray is True. + Will create directory if directory does not exist, mimicking mkdir -p command. If not provided files will be dumped to system's temp directory""", ) @click.pass_context From cd7b6e00fd5a555753e098727f2bc93f5e6ebeb4 Mon Sep 17 00:00:00 2001 From: Tyler Mathis <35553152+tsmathis@users.noreply.github.com> Date: Mon, 12 Jun 2023 14:41:59 -0700 Subject: [PATCH 15/17] Add tests checking mrun functionality with memray and creation of non-default output dir for memray --- tests/cli/test_init.py | 69 ++++++++++++++++++++++++++++++++++++++---- 1 file changed, 63 insertions(+), 6 deletions(-) diff --git a/tests/cli/test_init.py b/tests/cli/test_init.py index e5ce5c03b..74756f929 100644 --- a/tests/cli/test_init.py +++ b/tests/cli/test_init.py @@ -1,3 +1,4 @@ +import os import shutil from datetime import datetime from pathlib import Path @@ -32,7 +33,6 @@ def reporting_store(): def test_basic_run(): - runner = CliRunner() result = runner.invoke(run, ["--help"]) assert result.exit_code == 0 @@ -43,7 +43,6 @@ def test_basic_run(): def test_run_builder(mongostore): - memorystore = MemoryStore("temp") builder = CopyBuilder(mongostore, memorystore) @@ -81,7 +80,6 @@ def test_run_builder(mongostore): def test_run_builder_chain(mongostore): - memorystore = MemoryStore("temp") builder1 = CopyBuilder(mongostore, memorystore) builder2 = CopyBuilder(mongostore, memorystore) @@ -120,7 +118,6 @@ def test_run_builder_chain(mongostore): def test_reporting(mongostore, reporting_store): - memorystore = MemoryStore("temp") builder = CopyBuilder(mongostore, memorystore) @@ -156,7 +153,6 @@ def test_reporting(mongostore, reporting_store): def test_python_source(): - runner = CliRunner() with runner.isolated_filesystem(): @@ -170,7 +166,6 @@ def test_python_source(): def test_python_notebook_source(): - runner = CliRunner() with runner.isolated_filesystem(): @@ -184,3 +179,65 @@ def test_python_notebook_source(): assert result.exit_code == 0 assert "Ended multiprocessing: DummyBuilder" in result.output + + +def test_memray_run_builder(mongostore): + memorystore = MemoryStore("temp") + builder = CopyBuilder(mongostore, memorystore) + + mongostore.update( + [ + {mongostore.key: i, mongostore.last_updated_field: datetime.utcnow()} + for i in range(10) + ] + ) + + runner = CliRunner() + with runner.isolated_filesystem(): + dumpfn(builder, "test_builder.json") + result = runner.invoke(run, ["-v", "--memray", "on", "test_builder.json"]) + assert result.exit_code == 0 + assert "CopyBuilder" in result.output + assert "SerialProcessor" in result.output + + result = runner.invoke( + run, ["-vvv", "--no_bars", "--memray", "on", "test_builder.json"] + ) + assert result.exit_code == 0 + assert "Get" not in result.output + assert "Update" not in result.output + + result = runner.invoke( + run, ["-v", "-n", "2", "--memray", "on", "test_builder.json"] + ) + assert result.exit_code == 0 + assert "CopyBuilder" in result.output + assert "MultiProcessor" in result.output + + result = runner.invoke( + run, ["-vvv", "-n", "2", "--no_bars", "--memray", "on", "test_builder.json"] + ) + assert result.exit_code == 0 + assert "Get" not in result.output + assert "Update" not in result.output + + +def test_memray_user_output_dir(mongostore): + memorystore = MemoryStore("temp") + builder = CopyBuilder(mongostore, memorystore) + + mongostore.update( + [ + {mongostore.key: i, mongostore.last_updated_field: datetime.utcnow()} + for i in range(10) + ] + ) + + runner = CliRunner() + with runner.isolated_filesystem(): + dumpfn(builder, "test_builder.json") + result = runner.invoke( + run, ["--memray", "on", "-md", "memray_output_dir/", "test_builder.json"] + ) + assert result.exit_code == 0 + assert (Path.cwd() / "memray_output_dir").exists() is True From 1e5277fc24bda6fdce1aec4405d58be4841dc8a0 Mon Sep 17 00:00:00 2001 From: Tyler Mathis <35553152+tsmathis@users.noreply.github.com> Date: Mon, 12 Jun 2023 14:52:30 -0700 Subject: [PATCH 16/17] Add info to running builders docs for -md flag which allows user to specify output directory for memray --- docs/getting_started/running_builders.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/getting_started/running_builders.md b/docs/getting_started/running_builders.md index 54f07df7d..b2c5e19f3 100644 --- a/docs/getting_started/running_builders.md +++ b/docs/getting_started/running_builders.md @@ -138,4 +138,6 @@ The profiler will generate two files after the builder finishes: ***Note***: Different platforms/operating systems purge their system's `temp` directory at different intervals. It is recommended to move at least the `.bin` file to a more stable location. The `.bin` file can be used to recreate the flamegraph at anytime using the Memray CLI. +Using the flag `--memray-dir` (`-md`) allows for specifying an output directory for the `.bin` and `.html` files created by the profiler. The provided directory will be created if the directory does not exist, mimicking the `mkdir -p` command. + Further data visualization and transform examples can be found in Memray's documentation ([Memray reporters](https://bloomberg.github.io/memray/live.html)). \ No newline at end of file From 50cef25f69ff55343ec305d3a8cb59972a121b85 Mon Sep 17 00:00:00 2001 From: Tyler Mathis <35553152+tsmathis@users.noreply.github.com> Date: Tue, 13 Jun 2023 09:33:02 -0700 Subject: [PATCH 17/17] Remove reference to installing maggma from source in running builders doc in favor of just pip installing maggma plus memray --- docs/getting_started/running_builders.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/getting_started/running_builders.md b/docs/getting_started/running_builders.md index b2c5e19f3..0f9e75737 100644 --- a/docs/getting_started/running_builders.md +++ b/docs/getting_started/running_builders.md @@ -123,7 +123,7 @@ These event docs also contain the `builder`, a `build_id` which is unique for ea ## Profiling Memory Usage of Builders -`mrun` can optionally profile the memory usage of a running builder by using the Memray Python memory profiling tool ([Memray](https://github.com/bloomberg/memray)). To get started, `maggma` will first need to be installed from source ([Maggma installation](https://materialsproject.github.io/maggma/#installation-from-source)) followed by `pip` installing Memray using `pip install memray`, or by installing the optional `maggma` requirements by using `pip install requirements-optional.txt` in the `maggma` base directory. +`mrun` can optionally profile the memory usage of a running builder by using the Memray Python memory profiling tool ([Memray](https://github.com/bloomberg/memray)). To get started, Memray should be installed in the same environment as `maggma` using `pip install memray`. Setting the `--memray` (`-m`) option to `on`, or `True`, will signal `mrun` to profile the memory usage of any builders passed to `mrun` as the builders are running. The profiler also supports profiling of both single and forked processes. For example, spawning multiple processes in `mrun` with `-n` will signal the profiler to track any forked child processes spawned from the parent process.