Degert/fix dpo capitalisation (#34)

NVIDIA · Dec 2, 2023 · 8ca2e04 · 8ca2e04
1 parent 1cf5657
commit 8ca2e04
Showing 1 changed file with 1 addition and 1 deletion.
diff --git a/nemo_aligner/models/nlp/gpt/megatron_gpt_dpo_model.py b/nemo_aligner/models/nlp/gpt/megatron_gpt_dpo_model.py
@@ -365,7 +365,7 @@ def get_ref_policy_logprobs(self, list_of_batches):
             [torch.cat((b["chosen_labels"], b["rejected_labels"]), dim=0) for b in list_of_batches], dim=0
         )
         global_batch = [tokens, masks, pos_ids, labels]
-        with cpu_weight_swap(self, self.ref_policy_state_dict, megatron_amp_O2=self.megatron_amp_o2):
+        with cpu_weight_swap(self, self.ref_policy_state_dict, megatron_amp_O2=self.megatron_amp_O2):
             ref_log_probs = self.get_logprob_batch(global_batch)
 
         # return in GPU, trainer needs to move to cpu