Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Runner profiler update #1348

Merged
merged 29 commits into from
Nov 23, 2021
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
51f0f47
profiler is added to runner
asteyo Nov 6, 2021
cb5b4ea
profiler minimal example import fix
asteyo Nov 8, 2021
de4c5f4
runner docs updated
asteyo Nov 8, 2021
aad700a
logdir added to profiler
asteyo Nov 8, 2021
0fc2564
tensorboard_path fix
asteyo Nov 8, 2021
95e5ade
runner profiler callback updated
asteyo Nov 11, 2021
a2b3f00
Merge branch 'master' into runner_profiler_update
asteyo Nov 11, 2021
51a899d
fix codestyle
asteyo Nov 11, 2021
b54db88
Merge branch 'runner_profiler_update' of https://github.com/asteyo/ca…
asteyo Nov 11, 2021
c015c1c
Merge branch 'master' into runner_profiler_update
asteyo Nov 11, 2021
5b6ed80
key_loader fix
asteyo Nov 11, 2021
5cefd03
Merge branch 'runner_profiler_update' of https://github.com/asteyo/ca…
asteyo Nov 11, 2021
d4d3b81
profiler_kwargs fix
asteyo Nov 11, 2021
64715ef
profiler updated
asteyo Nov 11, 2021
7db8df5
profiler on_experiment_start method refactoring
asteyo Nov 11, 2021
5f16fed
logs path fixed
asteyo Nov 11, 2021
bd03a44
Revert "logs path fixed"
asteyo Nov 11, 2021
45fc5b6
profiler test is added
asteyo Nov 21, 2021
1d69e8c
flake8 update
asteyo Nov 21, 2021
65e831b
Merge branch 'master' into runner_profiler_update
asteyo Nov 21, 2021
a8c4ea7
profiler availability check is added
asteyo Nov 22, 2021
64b390e
profiler reqs is added
asteyo Nov 22, 2021
506ee23
Merge branch 'runner_profiler_update' of https://github.com/asteyo/ca…
asteyo Nov 22, 2021
f6c2d67
setup docs updated
asteyo Nov 22, 2021
023d398
codestyle fix
asteyo Nov 22, 2021
dc6371a
dl_cpu.yml refactoring
asteyo Nov 22, 2021
028c407
Revert "dl_cpu.yml refactoring"
asteyo Nov 22, 2021
9e0fe35
profiler added to dl_cpu.yml
asteyo Nov 22, 2021
fca5fef
Merge branch 'master' into runner_profiler_update
asteyo Nov 22, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions catalyst/callbacks/profiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@ class ProfilerCallback(Callback):
from catalyst import dl
from catalyst.data import ToTensor
from catalyst.contrib.datasets import MNIST
from catalyst.contrib.nn.modules import Flatten

loaders = {
"train": DataLoader(
Expand All @@ -64,7 +63,7 @@ class ProfilerCallback(Callback):
),
}

model = nn.Sequential(Flatten(), nn.Linear(784, 512), nn.ReLU(), nn.Linear(512, 10))
model = nn.Sequential(nn.Flatten(), nn.Linear(784, 512), nn.ReLU(), nn.Linear(512, 10))
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-2)
runner = dl.SupervisedRunner()
Expand Down
22 changes: 22 additions & 0 deletions catalyst/runners/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from catalyst.callbacks.criterion import CriterionCallback, ICriterionCallback
from catalyst.callbacks.misc import CheckRunCallback, TimerCallback, TqdmCallback
from catalyst.callbacks.optimizer import IOptimizerCallback, OptimizerCallback
from catalyst.callbacks.profiler import ProfilerCallback
from catalyst.callbacks.scheduler import ISchedulerCallback, SchedulerCallback
from catalyst.core.callback import Callback
from catalyst.core.logger import ILogger
Expand Down Expand Up @@ -191,6 +192,7 @@ def __init__(self, *args, **kwargs):
self._timeit = False
self._check = False
self._overfit = False
self._profile = False
self._load_best_on_end = False

@property
Expand Down Expand Up @@ -303,6 +305,22 @@ def get_callbacks(self, stage: str) -> "OrderedDict[str, Callback]":
callbacks["_check"] = CheckRunCallback()
if self._overfit and not is_callback_exists(BatchOverfitCallback):
callbacks["_overfit"] = BatchOverfitCallback()
if self._profile and not is_callback_exists(ProfilerCallback):
callbacks["_profile"] = ProfilerCallback(
tensorboard_path=os.path.join(self._logdir, "tb_profile"),
loader_key="train",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
loader_key="train",
epoch=1,

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

epoch=1 default in profiler init

profiler_kwargs=dict(
activities=[
torch.profiler.ProfilerActivity.CPU,
torch.profiler.ProfilerActivity.CUDA,
],
on_trace_ready=torch.profiler.tensorboard_trace_handler(
os.path.join(self._logdir, "tb_profile")
),
with_stack=True,
with_flops=True,
),
)

if self._logdir is not None and not is_callback_exists(ICheckpointCallback):
callbacks["_checkpoint"] = CheckpointCallback(
Expand Down Expand Up @@ -344,6 +362,7 @@ def train(
timeit: bool = False,
check: bool = False,
overfit: bool = False,
profile: bool = False,
load_best_on_end: bool = False,
# engine extra params,
fp16: bool = False,
Expand Down Expand Up @@ -385,6 +404,8 @@ def train(
overfit: if True, then takes only one batch per loader
for model overfitting, for advance usage please check
``BatchOverfitCallback``
profile: if True, then uses ProfilerCallback, for advance usage please check
``ProfilerCallback``
load_best_on_end: if True, Runner will load
best checkpoint state (model, optimizer, etc)
according to validation metrics. Requires specified ``logdir``.
Expand Down Expand Up @@ -510,6 +531,7 @@ def on_loader_end(self, runner):
self._timeit = timeit
self._check = check
self._overfit = overfit
self._profile = profile
self._load_best_on_end = load_best_on_end
# run
self.run()
Expand Down