From eaed07c85cc0f7b14a71f9dfc74d76408135fc84 Mon Sep 17 00:00:00 2001 From: Zhongqiang Huang Date: Thu, 15 Aug 2024 10:57:27 -0700 Subject: [PATCH 1/5] Update default config to ultravox_v0.3 --- mcloud.yaml | 2 +- .../training/configs/llama3_whisper_kd.yaml | 22 ++++++++++++------- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/mcloud.yaml b/mcloud.yaml index 280e258f..e2fe999c 100644 --- a/mcloud.yaml +++ b/mcloud.yaml @@ -13,5 +13,5 @@ command: >- cd ultravox && poetry install --no-dev && poetry run torchrun --nproc_per_node=8 -m ultravox.training.train $TRAIN_ARGS env_variables: MLFLOW_TRACKING_URI: databricks - UV_BRANCH: main + UV_BRANCH: update_default_config_to_ultravox_v0.3 TRAIN_ARGS: --config_path ultravox/training/configs/llama3_whisper_kd.yaml diff --git a/ultravox/training/configs/llama3_whisper_kd.yaml b/ultravox/training/configs/llama3_whisper_kd.yaml index d951f02d..63c9ce95 100644 --- a/ultravox/training/configs/llama3_whisper_kd.yaml +++ b/ultravox/training/configs/llama3_whisper_kd.yaml @@ -13,27 +13,33 @@ loss_config: # Temporarily remove heysquad_human from val_sets as it causes the training to fail. val_sets: ["anyinstruct", "soda", "peoplespeech"] -batch_size: 4 -max_steps: 1000 +batch_size: 24 +max_steps: 7200 # x8x24 = 1,382,400 samples data_sets: [] data_dicts: - path: "fixie-ai/librispeech_asr" name: "clean" splits: - - "train.100" - - "train.360" + - "train.100" # 28_539 samples + - "train.360" # 104_014 samples user_template: "Continue the following text using less than 50 words:\n\n<|audio|>" assistant_template: "{{ continuation }}" transcript_template: "{{ text }}" - weight: 2 - num_samples: 100_000 + weight: 1 - path: "fixie-ai/librispeech_asr" name: "other" splits: - - "train.500" + - "train.500" # 148_688 samples user_template: "Continue the following text using less than 50 words:\n\n<|audio|>" assistant_template: "{{ continuation }}" transcript_template: "{{ text }}" weight: 1 - num_samples: 100_000 + - path: "fixie-ai/common_voice_17_0" + name: "en" + splits: + - "train" # 1_101_170 samples + user_template: "Continue the following text using less than 50 words:\n\n<|audio|>" + assistant_template: "{{ continuation }}" + transcript_template: "{{ text_proc.format_asr_text(sentence) }}" + weight: 8 \ No newline at end of file From bee674de4b7fcf856e3d6f9154468f520c8fefef Mon Sep 17 00:00:00 2001 From: Zhongqiang Huang Date: Thu, 15 Aug 2024 11:33:05 -0700 Subject: [PATCH 2/5] Update --- ultravox/training/configs/meta_config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ultravox/training/configs/meta_config.yaml b/ultravox/training/configs/meta_config.yaml index 62f622d3..9ccb4348 100644 --- a/ultravox/training/configs/meta_config.yaml +++ b/ultravox/training/configs/meta_config.yaml @@ -3,7 +3,7 @@ audio_model: "facebook/wav2vec2-base-960h" data_sets: ["gigaspeech"] val_sets: ["heysquad_human", "anyinstruct", "soda", "peoplespeech"] -stop_strategy: "last_exhausted" +stop_strategy: "LAST_EXHAUSTED" train_on_inputs: False shuffle_data: True From 7859ac035b932e137208ed9cb9d855b05edcac91 Mon Sep 17 00:00:00 2001 From: Zhongqiang Huang Date: Thu, 15 Aug 2024 13:11:22 -0700 Subject: [PATCH 3/5] Add release_config.yaml and use it as the default --- mcloud.yaml | 2 +- .../training/configs/llama3_whisper_kd.yaml | 22 ++++----- ultravox/training/configs/release_config.yaml | 45 +++++++++++++++++++ 3 files changed, 54 insertions(+), 15 deletions(-) create mode 100644 ultravox/training/configs/release_config.yaml diff --git a/mcloud.yaml b/mcloud.yaml index e2fe999c..f44593de 100644 --- a/mcloud.yaml +++ b/mcloud.yaml @@ -14,4 +14,4 @@ command: >- env_variables: MLFLOW_TRACKING_URI: databricks UV_BRANCH: update_default_config_to_ultravox_v0.3 - TRAIN_ARGS: --config_path ultravox/training/configs/llama3_whisper_kd.yaml + TRAIN_ARGS: --config_path ultravox/training/configs/release_config.yaml \ No newline at end of file diff --git a/ultravox/training/configs/llama3_whisper_kd.yaml b/ultravox/training/configs/llama3_whisper_kd.yaml index 63c9ce95..d951f02d 100644 --- a/ultravox/training/configs/llama3_whisper_kd.yaml +++ b/ultravox/training/configs/llama3_whisper_kd.yaml @@ -13,33 +13,27 @@ loss_config: # Temporarily remove heysquad_human from val_sets as it causes the training to fail. val_sets: ["anyinstruct", "soda", "peoplespeech"] -batch_size: 24 -max_steps: 7200 # x8x24 = 1,382,400 samples +batch_size: 4 +max_steps: 1000 data_sets: [] data_dicts: - path: "fixie-ai/librispeech_asr" name: "clean" splits: - - "train.100" # 28_539 samples - - "train.360" # 104_014 samples + - "train.100" + - "train.360" user_template: "Continue the following text using less than 50 words:\n\n<|audio|>" assistant_template: "{{ continuation }}" transcript_template: "{{ text }}" - weight: 1 + weight: 2 + num_samples: 100_000 - path: "fixie-ai/librispeech_asr" name: "other" splits: - - "train.500" # 148_688 samples + - "train.500" user_template: "Continue the following text using less than 50 words:\n\n<|audio|>" assistant_template: "{{ continuation }}" transcript_template: "{{ text }}" weight: 1 - - path: "fixie-ai/common_voice_17_0" - name: "en" - splits: - - "train" # 1_101_170 samples - user_template: "Continue the following text using less than 50 words:\n\n<|audio|>" - assistant_template: "{{ continuation }}" - transcript_template: "{{ text_proc.format_asr_text(sentence) }}" - weight: 8 \ No newline at end of file + num_samples: 100_000 diff --git a/ultravox/training/configs/release_config.yaml b/ultravox/training/configs/release_config.yaml new file mode 100644 index 00000000..2f662f07 --- /dev/null +++ b/ultravox/training/configs/release_config.yaml @@ -0,0 +1,45 @@ +# SLM with ultravox & llama3, trained wtih knowledge distillation. +exp_name: "ultravox-v0_3" + +# Make sure to accept the license agreement on huggingface hub +text_model: "meta-llama/Meta-Llama-3-8B-Instruct" +audio_model: "openai/whisper-small" + + +loss_config: + # Choose from ["KL_Divergence", "CrossEntropy"], default is "KL_Divergence" + loss_function: "KL_Divergence" + +# Temporarily remove heysquad_human from val_sets as it causes the training to fail. +val_sets: ["anyinstruct", "soda", "peoplespeech"] + +batch_size: 24 +max_steps: 7200 # x8x24 = 1,382,400 samples + +data_sets: [] +data_dicts: + - path: "fixie-ai/librispeech_asr" + name: "clean" + splits: + - "train.100" # 28_539 samples + - "train.360" # 104_014 samples + user_template: "Continue the following text using less than 50 words:\n\n<|audio|>" + assistant_template: "{{ continuation }}" + transcript_template: "{{ text }}" + weight: 1 + - path: "fixie-ai/librispeech_asr" + name: "other" + splits: + - "train.500" # 148_688 samples + user_template: "Continue the following text using less than 50 words:\n\n<|audio|>" + assistant_template: "{{ continuation }}" + transcript_template: "{{ text }}" + weight: 1 + - path: "fixie-ai/common_voice_17_0" + name: "en" + splits: + - "train" # 1_101_170 samples + user_template: "Continue the following text using less than 50 words:\n\n<|audio|>" + assistant_template: "{{ continuation }}" + transcript_template: "{{ text_proc.format_asr_text(sentence) }}" + weight: 8 \ No newline at end of file From 206a79c3324b60444fc7ec7878b590a3759c068b Mon Sep 17 00:00:00 2001 From: Zhongqiang Huang Date: Thu, 15 Aug 2024 13:12:27 -0700 Subject: [PATCH 4/5] Update --- ultravox/training/configs/release_config.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ultravox/training/configs/release_config.yaml b/ultravox/training/configs/release_config.yaml index 2f662f07..02e4df5c 100644 --- a/ultravox/training/configs/release_config.yaml +++ b/ultravox/training/configs/release_config.yaml @@ -1,8 +1,8 @@ -# SLM with ultravox & llama3, trained wtih knowledge distillation. +# SLM with ultravox & llama3.1, trained wtih knowledge distillation. exp_name: "ultravox-v0_3" # Make sure to accept the license agreement on huggingface hub -text_model: "meta-llama/Meta-Llama-3-8B-Instruct" +text_model: "meta-llama/Meta-Llama-3.1-8B-Instruct" audio_model: "openai/whisper-small" From f3441d62f0b6614b68d04e8f8675e7244f02bff6 Mon Sep 17 00:00:00 2001 From: Zhongqiang Huang Date: Thu, 15 Aug 2024 17:44:54 -0700 Subject: [PATCH 5/5] Update --- mcloud.yaml | 2 +- ultravox/training/configs/release_config.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/mcloud.yaml b/mcloud.yaml index f44593de..99788954 100644 --- a/mcloud.yaml +++ b/mcloud.yaml @@ -13,5 +13,5 @@ command: >- cd ultravox && poetry install --no-dev && poetry run torchrun --nproc_per_node=8 -m ultravox.training.train $TRAIN_ARGS env_variables: MLFLOW_TRACKING_URI: databricks - UV_BRANCH: update_default_config_to_ultravox_v0.3 + UV_BRANCH: main TRAIN_ARGS: --config_path ultravox/training/configs/release_config.yaml \ No newline at end of file diff --git a/ultravox/training/configs/release_config.yaml b/ultravox/training/configs/release_config.yaml index 02e4df5c..973656a7 100644 --- a/ultravox/training/configs/release_config.yaml +++ b/ultravox/training/configs/release_config.yaml @@ -42,4 +42,4 @@ data_dicts: user_template: "Continue the following text using less than 50 words:\n\n<|audio|>" assistant_template: "{{ continuation }}" transcript_template: "{{ text_proc.format_asr_text(sentence) }}" - weight: 8 \ No newline at end of file + weight: 8