Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use a temp path to save local checkpoints for remote save path #3673

Merged
merged 5 commits into from
Oct 22, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions composer/callbacks/checkpoint_saver.py
Original file line number Diff line number Diff line change
Expand Up @@ -294,10 +294,15 @@ def __init__(
num_concurrent_uploads: int = 1,
upload_timeout_in_seconds: int = 3600,
):

backend, _, local_folder = parse_uri(str(folder))
if local_folder == '':
local_folder = '.'

is_remote_folder = backend != ''
irenedea marked this conversation as resolved.
Show resolved Hide resolved
if is_remote_folder: # If we are uploading to a remote path, we use a temporary directory to save local checkpoints.
irenedea marked this conversation as resolved.
Show resolved Hide resolved
local_folder = os.path.join(tempfile.mkdtemp(), local_folder)

filename = str(filename)
remote_file_name = str(remote_file_name) if remote_file_name is not None else None
latest_filename = str(latest_filename) if latest_filename is not None else None
Expand Down
3 changes: 2 additions & 1 deletion tests/trainer/test_checkpoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -882,7 +882,8 @@ def _get_tmp_dir(self):

if delete_local:
# delete files locally, forcing trainer to look in object store
shutil.rmtree('first')
assert trainer_1._checkpoint_saver is not None
shutil.rmtree(trainer_1._checkpoint_saver.folder)

trainer_2 = self.get_trainer(
latest_filename=latest_filename,
Expand Down
Loading