Skip to content

Commit

Permalink
Change the bucket for different object (deepjavalibrary#691)
Browse files Browse the repository at this point in the history
  • Loading branch information
sindhuvahinis authored May 5, 2023
1 parent 72fcac8 commit 98897f9
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 8 deletions.
8 changes: 4 additions & 4 deletions .github/workflows/llm_integration.yml
Original file line number Diff line number Diff line change
Expand Up @@ -463,7 +463,7 @@ jobs:
train
# checking if checkpoint files are generated.
sudo /opt/djl/bin/s5cmd --retry-count 1 sync s3://djl-llm/t5-small-tp4/* $PWD/models/partition-test
sudo /opt/djl/bin/s5cmd --retry-count 1 sync s3://djl-llm/t5-small-tp4/ft-aot/* $PWD/models/partition-test
if grep -q t5-small-fp32-4-1 $PWD/models/partition-test/*-gpu/verify ; then echo "checkpoint files generated"; else exit 1; fi
- name: Test t5-small-inference
working-directory: tests/integration
Expand Down Expand Up @@ -590,7 +590,7 @@ jobs:
train
# checking if checkpoint files are generated.
/opt/djl/bin/s5cmd --retry-count 1 sync s3://djl-llm/t5-small-tp4/* $PWD/models/partition-test
/opt/djl/bin/s5cmd --retry-count 1 sync s3://djl-llm/t5-small-tp4/ft-aot-handler/* $PWD/models/partition-test
if grep -q t5-small-fp32-4-1 $PWD/models/partition-test/*-gpu/verify ; then echo "checkpoint files generated"; else exit 1; fi
- name: Test t5-small-inference
working-directory: tests/integration
Expand Down Expand Up @@ -727,7 +727,7 @@ jobs:
# checking if pt files are generated.
mkdir $PWD/models/partition-test
/opt/djl/bin/s5cmd --retry-count 1 sync s3://djl-llm/bloom-7b1-tp4/* $PWD/models/partition-test
/opt/djl/bin/s5cmd --retry-count 1 sync s3://djl-llm/bloom-7b1-tp4/ds-aot/* $PWD/models/partition-test
if ls $PWD/models/partition-test/*.pt &>/dev/null ; then echo "checkpoint files generated"; else exit 1; fi
if ls $PWD/models/partition-test/ds_inference_config.json &>/dev/null ; \
then echo "ds_inference_config.json generated"; else exit 1; fi
Expand Down Expand Up @@ -818,7 +818,7 @@ jobs:
# checking if pt files are generated.
# downloads the uploaded partitioned checkpoints from s3url.
/opt/djl/bin/s5cmd --retry-count 1 sync s3://djl-llm/bloom-7b1-tp4/* $PWD/models/partition-test
/opt/djl/bin/s5cmd --retry-count 1 sync s3://djl-llm/bloom-7b1-tp4/ds-aot-handler/* $PWD/models/partition-test
if ls $PWD/models/partition-test/*.pt &>/dev/null ; then echo "checkpoint files generated"; else exit 1; fi
if ls $PWD/models/partition-test/ds_inference_config.json &>/dev/null ; \
then echo "ds_inference_config.json generated"; else exit 1; fi
Expand Down
8 changes: 4 additions & 4 deletions tests/integration/llm/prepare.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
"option.tensor_parallel_degree": 4,
"option.task": "text-generation",
"option.dtype": "float16",
"option.save_mp_checkpoint_path": "s3://djl-llm/bloom-7b1-tp4"
"option.save_mp_checkpoint_path": "s3://djl-llm/bloom-7b1-tp4/ds-aot/"
}
}

Expand All @@ -50,7 +50,7 @@
"option.tensor_parallel_degree": 4,
"option.task": "text-generation",
"option.dtype": "fp16",
"option.save_mp_checkpoint_path": "s3://djl-llm/bloom-7b1-tp4"
"option.save_mp_checkpoint_path": "s3://djl-llm/bloom-7b1-tp4/ds-aot-handler/"
}
}

Expand Down Expand Up @@ -388,7 +388,7 @@ def build_ft_raw_aot_model(model):
options = ft_model_list[model]
options["engine"] = "FasterTransformer"
if model == 't5-small':
options["option.save_mp_checkpoint_path"] = "s3://djl-llm/t5-small-tp4"
options["option.save_mp_checkpoint_path"] = "s3://djl-llm/t5-small-tp4/ft-aot/"
else:
options["option.save_mp_checkpoint_path"] = "/opt/ml/input/data/training/partition-test"
write_properties(options)
Expand All @@ -404,7 +404,7 @@ def builder_ft_handler_aot_model(model):
options["engine"] = "FasterTransformer"
# options["entryPoint"] = "djl_python.fastertransformer"
if model == 't5-small':
options["option.save_mp_checkpoint_path"] = "s3://djl-llm/t5-small-tp4"
options["option.save_mp_checkpoint_path"] = "s3://djl-llm/t5-small-tp4/ft-aot-handler/"
else:
options["option.save_mp_checkpoint_path"] = "/opt/ml/input/data/training/partition-test"
write_properties(options)
Expand Down

0 comments on commit 98897f9

Please sign in to comment.