PaddlePaddle · wj-Mcat · Oct 31, 2023 · Oct 30, 2023 · Oct 31, 2023 · wj-Mcat
diff --git a/paddlenlp/generation/logits_process.py b/paddlenlp/generation/logits_process.py
@@ -306,10 +306,16 @@ def TopKProcess(probs: paddle.Tensor, top_k: int, min_tokens_to_keep: int):
 
 
 def TopPProcess(probs: paddle.Tensor, top_p: float, min_tokens_to_keep: int):
-    sorted_indices = paddle.argsort(probs, descending=True)
-    if isinstance(sorted_indices, tuple):
-        sorted_probs, sorted_indices = sorted_indices
+    if probs.dtype == paddle.bfloat16:
+        probs = paddle.cast(probs, paddle.float32)
+
+        sorted_indices = paddle.argsort(probs, descending=True)
+        sorted_probs = paddle.sort(probs, descending=True)
+
+        sorted_probs  = paddle.cast(sorted_probs, paddle.bfloat16)
+        sorted_indices = paddle.cast(sorted_indices, paddle.int64)
     else:
+        sorted_indices = paddle.argsort(probs, descending=True)
         sorted_probs = paddle.sort(probs, descending=True)
 
     cumulative_probs = paddle.cumsum(sorted_probs, axis=-1)