diff --git a/csrc/gpu/speculate_decoding_kernels/speculate_save_output.cc b/csrc/gpu/speculate_decoding_kernels/speculate_save_output.cc index 19ebd73c0fd7..b1f0fb292201 100644 --- a/csrc/gpu/speculate_decoding_kernels/speculate_save_output.cc +++ b/csrc/gpu/speculate_decoding_kernels/speculate_save_output.cc @@ -35,9 +35,9 @@ void SpeculateSaveWithOutputMsg(const paddle::Tensor& accept_tokens, int max_draft_tokens = accept_tokens.shape()[1]; - auto accept_tokens_cpu = accept_tokens.copy_to(paddle::CPUPlace(), true); - auto accept_num_cpu = accept_num.copy_to(paddle::CPUPlace(), true); - auto not_need_stop_cpu = not_need_stop.copy_to(paddle::CPUPlace(), true); + auto accept_tokens_cpu = accept_tokens.copy_to(paddle::CPUPlace(), false); + auto accept_num_cpu = accept_num.copy_to(paddle::CPUPlace(), false); + auto not_need_stop_cpu = not_need_stop.copy_to(paddle::CPUPlace(), false); int64_t *accept_tokens_data = accept_tokens_cpu.data(); int *accept_num_data = accept_num_cpu.data();