diff --git a/tests/functional/dpo.sh b/tests/functional/dpo.sh old mode 100644 new mode 100755 diff --git a/tests/functional/ppo.sh b/tests/functional/ppo.sh old mode 100644 new mode 100755 index a4d703fd3..46f4974b5 --- a/tests/functional/ppo.sh +++ b/tests/functional/ppo.sh @@ -1,5 +1,19 @@ #!/bin/bash +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) cd $SCRIPT_DIR set -eoux pipefail diff --git a/tests/functional/rm.sh b/tests/functional/rm.sh old mode 100644 new mode 100755 index 0a8c41584..0c32c3757 --- a/tests/functional/rm.sh +++ b/tests/functional/rm.sh @@ -1,5 +1,19 @@ #!/bin/bash +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) cd $SCRIPT_DIR set -eoux pipefail @@ -38,7 +52,7 @@ rm_training() { export CUDA_VISIBLE_DEVICES=0,1 export PYTHONPATH="${GPFS}:${PYTHONPATH:-}" export HYDRA_FULL_ERROR=1 -torchrun --nproc-per-node 2 ${GPFS}/examples/nlp/gpt/train_reward_model.py \ +torchrun --nproc_per_node=2 ${GPFS}/examples/nlp/gpt/train_reward_model.py \ --config-path=${CONF_DIR} \ --config-name=${CONF_NAME} \ trainer.num_nodes=1 \ @@ -75,4 +89,5 @@ torchrun --nproc-per-node 2 ${GPFS}/examples/nlp/gpt/train_reward_model.py \ } log_file=$(mktemp /tmp/rm-log-XXXXXX) -rm_training | tee $log_file \ No newline at end of file +rm_training | tee $log_file +echo "[Finished] $0" diff --git a/tests/functional/sft.sh b/tests/functional/sft.sh old mode 100644 new mode 100755 index 8f53aa889..5061edf23 --- a/tests/functional/sft.sh +++ b/tests/functional/sft.sh @@ -1,5 +1,19 @@ #!/bin/bash +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) cd $SCRIPT_DIR set -eoux pipefail @@ -33,7 +47,7 @@ sft() { export CUDA_VISIBLE_DEVICES=0,1 export PYTHONPATH="${GPFS}:${PYTHONPATH:-}" export HYDRA_FULL_ERROR=1 -torchrun --nproc-per-node 2 ${GPFS}/examples/nlp/gpt/train_gpt_sft.py \ +torchrun --nproc_per_node=2 ${GPFS}/examples/nlp/gpt/train_gpt_sft.py \ --config-path=${CONF_DIR} \ --config-name=${CONF_NAME} \ trainer.num_nodes=1 \ @@ -76,4 +90,5 @@ torchrun --nproc-per-node 2 ${GPFS}/examples/nlp/gpt/train_gpt_sft.py \ } log_file=$(mktemp /tmp/sft-log-XXXXXX) -sft | tee $log_file \ No newline at end of file +sft | tee $log_file +echo "[Finished] $0" diff --git a/tests/functional/test_cases/ppo-llama3-pp2-reshard b/tests/functional/test_cases/ppo-llama3-pp2-reshard index 9169b10da..880e3ebce 100755 --- a/tests/functional/test_cases/ppo-llama3-pp2-reshard +++ b/tests/functional/test_cases/ppo-llama3-pp2-reshard @@ -25,4 +25,5 @@ GBS=2 \ RESHARD=True \ RM_NEMO_FILE=${ALIGNER_CI_DIR}/checkpoints/llama3--nlayers4-hidden64-ffn224-dummy_rm-megatron_gpt.nemo \ ACTOR_NEMO_FILE=${ALIGNER_CI_DIR}/checkpoints/tiny-llama3-results-nlayers2-hidden128-ffn448-nhead4-qgroup2-megatron_gpt.nemo \ - bash ../ppo.sh + bash ../ppo.sh \ + 2>&1 | tee $(basename $0).log diff --git a/tests/functional/test_cases/rm-llama3 b/tests/functional/test_cases/rm-llama3 old mode 100644 new mode 100755 index 05caba634..830b2b111 --- a/tests/functional/test_cases/rm-llama3 +++ b/tests/functional/test_cases/rm-llama3 @@ -1,8 +1,24 @@ #!/bin/bash + +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) cd $SCRIPT_DIR set -eoux pipefail PRETRAINED_CHECKPOINT_NEMO_FILE=${ALIGNER_CI_DIR}/checkpoints/tiny-llama3-results-nlayers2-hidden128-ffn448-nhead4-qgroup2-megatron_gpt.nemo \ -bash ../rm.sh \ No newline at end of file +bash ../rm.sh \ + 2>&1 | tee $(basename $0).log \ No newline at end of file diff --git a/tests/functional/test_cases/sft-llama3 b/tests/functional/test_cases/sft-llama3 old mode 100644 new mode 100755 index ef6694306..50a605f38 --- a/tests/functional/test_cases/sft-llama3 +++ b/tests/functional/test_cases/sft-llama3 @@ -1,8 +1,24 @@ #!/bin/bash + +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) cd $SCRIPT_DIR set -eoux pipefail PRETRAINED_CHECKPOINT_NEMO_FILE=${ALIGNER_CI_DIR}/checkpoints/tiny-llama3-results-nlayers2-hidden128-ffn448-nhead4-qgroup2-megatron_gpt.nemo \ -bash ../sft.sh \ No newline at end of file + bash ../sft.sh \ + 2>&1 | tee $(basename $0).log \ No newline at end of file