From 79630dd561d19e95fb037978c0c652b5ccefc18a Mon Sep 17 00:00:00 2001 From: Terry Kong Date: Mon, 11 Nov 2024 17:45:35 -0800 Subject: [PATCH 1/5] feat: TRTLLM API handle tokenizers without pad_id (e.g., tiktoken) Signed-off-by: Terry Kong --- nemo_aligner/utils/trt_llm.py | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/nemo_aligner/utils/trt_llm.py b/nemo_aligner/utils/trt_llm.py index 1f879064d..70e85daea 100644 --- a/nemo_aligner/utils/trt_llm.py +++ b/nemo_aligner/utils/trt_llm.py @@ -72,12 +72,6 @@ def __init__( "You are trying to use NeMo-Aligner's TensorRT-LLM acceleration for LLM generation. Please build the dockerfile to enable this feature: https://github.com/NVIDIA/NeMo-Aligner/blob/main/Dockerfile" ) - # If this assert turns out to be a blocker with some tokenizers, potential workarounds could be to: - # - add a config option to allow specifying which token we pass as `end_id` to TRT-LLM (should - # be a token that the model is guaranteed to never generate) - assert ( - tokenizer.pad_id != tokenizer.eos_id - ), f"We require tokenizers to have a different {tokenizer.pad_id=} than {tokenizer.eos_id=} when using TRT-LLM. This is to make sure all code goes into the same path and include the eos_id when the response lengths are computed" assert max_input_len > 0 assert max_generation_length > 0 assert ( @@ -104,7 +98,21 @@ def __init__( rng_generator.manual_seed(seed) self.rng_generator = rng_generator - self.pad_id = tokenizer.pad_id if tokenizer.pad_id is not None else GPTGenerateTRTLLM.DEFAULT_PAD_ID + if hasattr(tokenizer, 'pad_id'): + # If this assert turns out to be a blocker with some tokenizers, potential workarounds could be to: + # - add a config option to allow specifying which token we pass as `end_id` to TRT-LLM (should + # be a token that the model is guaranteed to never generate) + assert ( + tokenizer.pad_id != tokenizer.eos_id + ), ( + f"We require tokenizers to have a different {tokenizer.pad_id=} than {tokenizer.eos_id=} " + "when using TRT-LLM. This is to make sure all code goes into the same path and include the " + "eos_id when the response lengths are computed" + ) + self.pad_id = getattr(tokenizer, 'pad_id', GPTGenerateTRTLLM.DEFAULT_PAD_ID) + else: + # Tiktoken tokenizers doesn't have pad_id + self.pad_id = GPTGenerateTRTLLM.DEFAULT_PAD_ID self.eos_id = tokenizer.eos_id end_strings = list(end_strings) From 72ad72b9802d2591dd80f40f02fdd474e5ef2c51 Mon Sep 17 00:00:00 2001 From: Terry Kong Date: Tue, 12 Nov 2024 03:45:14 +0000 Subject: [PATCH 2/5] ci: adds cherry-pick to enable GPT TRTLLM version with bias=False Signed-off-by: Terry Kong --- Dockerfile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Dockerfile b/Dockerfile index 44a9f8651..0b99d8d69 100644 --- a/Dockerfile +++ b/Dockerfile @@ -130,10 +130,12 @@ git fetch -a # 60e677423667c029dd05875da72bf0719774f844: [feat] Update get_model_parallel_src_rank to support tp-pp-dp ordering NeMo#10652 # 0deaf6716cb4f20766c995ce25d129795f1ae200: fix[export]: update API for disabling device reassignment in TRTLLM for Aligner NeMo#10863 # (superceded by 10863) 148543d6e9c66ff1f8562e84484448202249811d: feat: Migrate GPTSession refit path in Nemo export to ModelRunner for Aligner NeMo#10654 +# 80ff72cfedf77f68272f35773579d9d71e5ed10c: fix(export): GPT models w/ bias=False convert properly NeMo#11255 for pr_and_commit in \ "10651 0c92fe17df4642ffc33d5d8c0c83fda729e3910c" \ "10652 60e677423667c029dd05875da72bf0719774f844" \ "10863 0deaf6716cb4f20766c995ce25d129795f1ae200" \ + "11255 80ff72cfedf77f68272f35773579d9d71e5ed10c" \ ; do pr=$(cut -f1 -d' ' <<<"$pr_and_commit") head_pr_commit=$(cut -f2 -d' ' <<<"$pr_and_commit") From 3dd6eb57fc4d54dda6e0882469785161422a89b3 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 12 Nov 2024 03:58:19 +0000 Subject: [PATCH 3/5] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci Signed-off-by: NeMo-Aligner CI --- nemo_aligner/utils/trt_llm.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/nemo_aligner/utils/trt_llm.py b/nemo_aligner/utils/trt_llm.py index 70e85daea..30facee3d 100644 --- a/nemo_aligner/utils/trt_llm.py +++ b/nemo_aligner/utils/trt_llm.py @@ -98,18 +98,16 @@ def __init__( rng_generator.manual_seed(seed) self.rng_generator = rng_generator - if hasattr(tokenizer, 'pad_id'): + if hasattr(tokenizer, "pad_id"): # If this assert turns out to be a blocker with some tokenizers, potential workarounds could be to: # - add a config option to allow specifying which token we pass as `end_id` to TRT-LLM (should # be a token that the model is guaranteed to never generate) - assert ( - tokenizer.pad_id != tokenizer.eos_id - ), ( + assert tokenizer.pad_id != tokenizer.eos_id, ( f"We require tokenizers to have a different {tokenizer.pad_id=} than {tokenizer.eos_id=} " "when using TRT-LLM. This is to make sure all code goes into the same path and include the " "eos_id when the response lengths are computed" ) - self.pad_id = getattr(tokenizer, 'pad_id', GPTGenerateTRTLLM.DEFAULT_PAD_ID) + self.pad_id = getattr(tokenizer, "pad_id", GPTGenerateTRTLLM.DEFAULT_PAD_ID) else: # Tiktoken tokenizers doesn't have pad_id self.pad_id = GPTGenerateTRTLLM.DEFAULT_PAD_ID From 83f1c0f1e37eee75cad610d0f7306f10c8292fd3 Mon Sep 17 00:00:00 2001 From: Terry Kong Date: Wed, 13 Nov 2024 23:26:12 +0000 Subject: [PATCH 4/5] go with pad_id=-42 all the time Signed-off-by: Terry Kong --- nemo_aligner/utils/trt_llm.py | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) diff --git a/nemo_aligner/utils/trt_llm.py b/nemo_aligner/utils/trt_llm.py index 30facee3d..9f1a9345f 100644 --- a/nemo_aligner/utils/trt_llm.py +++ b/nemo_aligner/utils/trt_llm.py @@ -44,8 +44,9 @@ def append_and_repad_list(list_of_items, item_to_append, pad_id): class GPTGenerateTRTLLM: - # If a tokenizer does not have a pad_id, we use a large negative number and replace - # with self.eos_id after generation. + # Use a reserved negative number since there is variation between tokenizers if + # they (1) have a pad_id (2) don't have a pad_id or (3) have None as the pad_id. + # This pad_id is replaced with eos_id after generation. DEFAULT_PAD_ID = -42 def __init__( @@ -98,19 +99,7 @@ def __init__( rng_generator.manual_seed(seed) self.rng_generator = rng_generator - if hasattr(tokenizer, "pad_id"): - # If this assert turns out to be a blocker with some tokenizers, potential workarounds could be to: - # - add a config option to allow specifying which token we pass as `end_id` to TRT-LLM (should - # be a token that the model is guaranteed to never generate) - assert tokenizer.pad_id != tokenizer.eos_id, ( - f"We require tokenizers to have a different {tokenizer.pad_id=} than {tokenizer.eos_id=} " - "when using TRT-LLM. This is to make sure all code goes into the same path and include the " - "eos_id when the response lengths are computed" - ) - self.pad_id = getattr(tokenizer, "pad_id", GPTGenerateTRTLLM.DEFAULT_PAD_ID) - else: - # Tiktoken tokenizers doesn't have pad_id - self.pad_id = GPTGenerateTRTLLM.DEFAULT_PAD_ID + self.pad_id = GPTGenerateTRTLLM.DEFAULT_PAD_ID self.eos_id = tokenizer.eos_id end_strings = list(end_strings) From bd9fa0b36be41f99fe87afb468126eba4cd7c49e Mon Sep 17 00:00:00 2001 From: Terry Kong Date: Fri, 15 Nov 2024 15:54:21 -0800 Subject: [PATCH 5/5] revert the dockerfile change Signed-off-by: Terry Kong --- Dockerfile | 2 -- 1 file changed, 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index 0b99d8d69..44a9f8651 100644 --- a/Dockerfile +++ b/Dockerfile @@ -130,12 +130,10 @@ git fetch -a # 60e677423667c029dd05875da72bf0719774f844: [feat] Update get_model_parallel_src_rank to support tp-pp-dp ordering NeMo#10652 # 0deaf6716cb4f20766c995ce25d129795f1ae200: fix[export]: update API for disabling device reassignment in TRTLLM for Aligner NeMo#10863 # (superceded by 10863) 148543d6e9c66ff1f8562e84484448202249811d: feat: Migrate GPTSession refit path in Nemo export to ModelRunner for Aligner NeMo#10654 -# 80ff72cfedf77f68272f35773579d9d71e5ed10c: fix(export): GPT models w/ bias=False convert properly NeMo#11255 for pr_and_commit in \ "10651 0c92fe17df4642ffc33d5d8c0c83fda729e3910c" \ "10652 60e677423667c029dd05875da72bf0719774f844" \ "10863 0deaf6716cb4f20766c995ce25d129795f1ae200" \ - "11255 80ff72cfedf77f68272f35773579d9d71e5ed10c" \ ; do pr=$(cut -f1 -d' ' <<<"$pr_and_commit") head_pr_commit=$(cut -f2 -d' ' <<<"$pr_and_commit")