Skip to content

Commit

Permalink
fix codestyle
Browse files Browse the repository at this point in the history
  • Loading branch information
zhink committed Jun 12, 2024
1 parent 8d67d0f commit 3f2e711
Show file tree
Hide file tree
Showing 10 changed files with 35 additions and 12 deletions.
16 changes: 10 additions & 6 deletions llm/predictor.py
Original file line number Diff line number Diff line change
Expand Up @@ -652,7 +652,7 @@ def _create_predictor(self, predictor_args: PredictorArgument):
config.enable_custom_device(predictor_args.device, device_id)
elif predictor_args.device == "xpu":
raise ValueError(
"you should export xpu static model with --block_attn flag and use predictor with --block_attn too"
"you should export xpu static model with --block_attn flag and use predictor with --block_attn too"
"https://github.com/PaddlePaddle/PaddleNLP/blob/develop/llm/docs/inference.md"
)
else:
Expand Down Expand Up @@ -925,7 +925,9 @@ def _preprocess(self, source):
source = [self.tokenizer.apply_chat_template(sentence, tokenize=False) for sentence in source]

for i, text in enumerate(source):
add_special_tokens = self.tokenizer.chat_template is None or isinstance(self.tokenizer, (ChatGLMv2Tokenizer, ChatGLMTokenizer))
add_special_tokens = self.tokenizer.chat_template is None or isinstance(
self.tokenizer, (ChatGLMv2Tokenizer, ChatGLMTokenizer)
)
add_special_tokens = add_special_tokens if not self.benchmark else False
tokens = self.tokenizer(
text,
Expand Down Expand Up @@ -1087,8 +1089,8 @@ def _create_predictor(self, predictor_args: PredictorArgument):
config.set_xpu_device_id(device_id)
xpu_config = paddle.inference.XpuConfig()
xpu_config.device_id = device_id
xpu_config.l3_size = 63*1024*1024
xpu_config.l3_autotune_size = 63*1024*1024
xpu_config.l3_size = 63 * 1024 * 1024
xpu_config.l3_autotune_size = 63 * 1024 * 1024
config.set_xpu_config(xpu_config)
config.enable_new_executor()
else:
Expand Down Expand Up @@ -1348,7 +1350,7 @@ def create_predictor(
else:
if predictor_args.device == "xpu":
raise ValueError(
"you should run xpu dynamic model with --block_attn flag"
"you should run xpu dynamic model with --block_attn flag"
"https://github.com/PaddlePaddle/PaddleNLP/blob/develop/llm/docs/inference.md"
)
from paddlenlp.experimental.transformers import (
Expand Down Expand Up @@ -1608,7 +1610,9 @@ def predict():

def benchmark(predictor, predictor_args, model_args):
# Just construct a simple benchmark input. We pad input to the src_length.
benchmark_texts = [predictor.tokenizer.pad_token * predictor_args.src_length for _ in range(predictor_args.batch_size)]
benchmark_texts = [
predictor.tokenizer.pad_token * predictor_args.src_length for _ in range(predictor_args.batch_size)
]

batch_benchmark_texts = batchfy_text(benchmark_texts, predictor_args.batch_size)
print("***********Start Benchmark**********")
Expand Down
2 changes: 2 additions & 0 deletions paddlenlp/experimental/transformers/bloom/modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,7 @@ def remove_padding(self, input_ids, seq_lens_this_time):
cum_offsets_now = paddle.cumsum(paddle.max(seq_lens_this_time) - seq_lens_this_time)
token_num = paddle.sum(seq_lens_this_time)
from paddlenlp_ops import get_padding_offset

Check warning on line 221 in paddlenlp/experimental/transformers/bloom/modeling.py

View check run for this annotation

Codecov / codecov/patch

paddlenlp/experimental/transformers/bloom/modeling.py#L221

Added line #L221 was not covered by tests

ids_remove_padding, cum_offsets, padding_offset = get_padding_offset(
input_ids, cum_offsets_now, token_num, seq_lens_this_time
)
Expand Down Expand Up @@ -593,6 +594,7 @@ def remove_padding(self, input_ids, seq_lens_this_time):
cum_offsets_now = paddle.cumsum(self.max_seq_len - seq_lens_this_time)
token_num = paddle.sum(seq_lens_this_time)
from paddlenlp_ops import get_padding_offset_v2

Check warning on line 596 in paddlenlp/experimental/transformers/bloom/modeling.py

View check run for this annotation

Codecov / codecov/patch

paddlenlp/experimental/transformers/bloom/modeling.py#L596

Added line #L596 was not covered by tests

ids_remove_padding, cum_offsets, padding_offset, cu_seqlens_q, cu_seqlens_k = get_padding_offset_v2(
input_ids, cum_offsets_now, token_num, seq_lens_this_time
)
Expand Down
1 change: 1 addition & 0 deletions paddlenlp/experimental/transformers/chatglm/modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,7 @@ def remove_padding(self, input_ids, seq_lens_this_time):
cum_offsets_now = paddle.cumsum(paddle.max(seq_lens_this_time) - seq_lens_this_time)
token_num = paddle.sum(seq_lens_this_time)
from paddlenlp_ops import get_padding_offset

Check warning on line 275 in paddlenlp/experimental/transformers/chatglm/modeling.py

View check run for this annotation

Codecov / codecov/patch

paddlenlp/experimental/transformers/chatglm/modeling.py#L275

Added line #L275 was not covered by tests

ids_remove_padding, cum_offsets, padding_offset = get_padding_offset(
input_ids, cum_offsets_now, token_num, seq_lens_this_time
)
Expand Down
1 change: 1 addition & 0 deletions paddlenlp/experimental/transformers/chatglm_v2/modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,7 @@ def remove_padding(self, input_ids, seq_lens_this_time):
cum_offsets_now = paddle.cumsum(paddle.max(seq_lens_this_time) - seq_lens_this_time)
token_num = paddle.sum(seq_lens_this_time)
from paddlenlp_ops import get_padding_offset

Check warning on line 204 in paddlenlp/experimental/transformers/chatglm_v2/modeling.py

View check run for this annotation

Codecov / codecov/patch

paddlenlp/experimental/transformers/chatglm_v2/modeling.py#L204

Added line #L204 was not covered by tests

ids_remove_padding, cum_offsets, padding_offset = get_padding_offset(
input_ids, cum_offsets_now, token_num, seq_lens_this_time
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

import paddle
import paddle.distributed as dist
from paddle.framework import LayerHelper, in_dynamic_mode, core
from paddle.framework import LayerHelper, core, in_dynamic_mode

Check warning on line 18 in paddlenlp/experimental/transformers/fused_transformer_layers.py

View check run for this annotation

Codecov / codecov/patch

paddlenlp/experimental/transformers/fused_transformer_layers.py#L18

Added line #L18 was not covered by tests
from paddle.incubate.nn.functional import (
fused_layer_norm,
fused_rms_norm,
Expand All @@ -25,11 +25,10 @@
from paddle.nn import Layer
from paddle.nn.initializer import Constant
from paddle.nn.quant import weight_only_linear
from paddlenlp_ops import rebuild_padding_v2

Check warning on line 28 in paddlenlp/experimental/transformers/fused_transformer_layers.py

View check run for this annotation

Codecov / codecov/patch

paddlenlp/experimental/transformers/fused_transformer_layers.py#L28

Added line #L28 was not covered by tests

from paddlenlp.utils.import_utils import is_paddlenlp_ops_available
from paddlenlp.utils.log import logger
from paddlenlp_ops import rebuild_padding_v2


if not is_paddlenlp_ops_available():
logger.warning(

Check warning on line 34 in paddlenlp/experimental/transformers/fused_transformer_layers.py

View check run for this annotation

Codecov / codecov/patch

paddlenlp/experimental/transformers/fused_transformer_layers.py#L33-L34

Added lines #L33 - L34 were not covered by tests
Expand Down Expand Up @@ -1350,8 +1349,8 @@ class FusedBlockMultiTransformer(FusedMultiTransformerBase):
def __init__(self, config: FusedMultiTransformerConfig):
super().__init__(config)
if not core.is_compiled_with_cuda():
self.cache_k_per_batch_maxs = paddle.full(shape=[10, 6], fill_value=0, dtype='float32')
self.cache_v_per_batch_maxs = paddle.full(shape=[10, 6], fill_value=0, dtype='float32')
self.cache_k_per_batch_maxs = paddle.full(shape=[10, 6], fill_value=0, dtype="float32")
self.cache_v_per_batch_maxs = paddle.full(shape=[10, 6], fill_value=0, dtype="float32")

Check warning on line 1353 in paddlenlp/experimental/transformers/fused_transformer_layers.py

View check run for this annotation

Codecov / codecov/patch

paddlenlp/experimental/transformers/fused_transformer_layers.py#L1351-L1353

Added lines #L1351 - L1353 were not covered by tests

def compute_attn(
self,
Expand Down
11 changes: 10 additions & 1 deletion paddlenlp/experimental/transformers/generation_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,7 @@ def update_model_kwargs_for_generation(self, cache, just_decoder, next_tokens, e
if cache is None:
next_tokens = paddle.where(just_decoder, paddle.full_like(next_tokens, -1), next_tokens)
from paddlenlp_ops import set_stop_value_multi_ends

Check warning on line 200 in paddlenlp/experimental/transformers/generation_utils.py

View check run for this annotation

Codecov / codecov/patch

paddlenlp/experimental/transformers/generation_utils.py#L200

Added line #L200 was not covered by tests

next_tokens, model_kwargs["stop_flags"] = set_stop_value_multi_ends(
next_tokens, model_kwargs["stop_flags"], eos_token_id, 2
) # multi ends
Expand Down Expand Up @@ -296,6 +297,7 @@ def _post_process_(outputs, top_p, temperature, step_idx_ori, model_kwargs):
else:
step_idx = model_kwargs["step_idx"]
from paddlenlp_ops import set_value_by_flags_and_idx

Check warning on line 299 in paddlenlp/experimental/transformers/generation_utils.py

View check run for this annotation

Codecov / codecov/patch

paddlenlp/experimental/transformers/generation_utils.py#L299

Added line #L299 was not covered by tests

model_kwargs["stop_flags"] = set_value_by_flags_and_idx(
model_kwargs["pre_ids"],
model_kwargs["tgt_ids"],
Expand All @@ -308,6 +310,7 @@ def _post_process_(outputs, top_p, temperature, step_idx_ori, model_kwargs):
logits = logits_processors(model_kwargs["all_input_ids"], logits, decoding_step=step_idx_ori)

from paddlenlp_ops import get_token_penalty_multi_scores

Check warning on line 312 in paddlenlp/experimental/transformers/generation_utils.py

View check run for this annotation

Codecov / codecov/patch

paddlenlp/experimental/transformers/generation_utils.py#L312

Added line #L312 was not covered by tests

logits = get_token_penalty_multi_scores(
model_kwargs["pre_ids"],
logits,
Expand All @@ -319,7 +322,7 @@ def _post_process_(outputs, top_p, temperature, step_idx_ori, model_kwargs):
eos_token_id,
)
logits = logits / temperature

# sample
probs = F.softmax(logits)

Expand All @@ -340,6 +343,7 @@ def _post_process_(outputs, top_p, temperature, step_idx_ori, model_kwargs):
model_kwargs["all_input_ids"] = paddle.concat([model_kwargs["all_input_ids"], next_tokens], axis=1)

from paddlenlp_ops import save_with_output

Check warning on line 345 in paddlenlp/experimental/transformers/generation_utils.py

View check run for this annotation

Codecov / codecov/patch

paddlenlp/experimental/transformers/generation_utils.py#L345

Added line #L345 was not covered by tests

save_with_output(
next_tokens,
batch_idx,
Expand Down Expand Up @@ -629,6 +633,7 @@ def _post_process_(
):
step_idx = model_kwargs["step_idx"]
from paddlenlp_ops import set_value_by_flags_and_idx_v2

Check warning on line 635 in paddlenlp/experimental/transformers/generation_utils.py

View check run for this annotation

Codecov / codecov/patch

paddlenlp/experimental/transformers/generation_utils.py#L635

Added line #L635 was not covered by tests

set_value_by_flags_and_idx_v2(
model_kwargs["pre_ids"],
model_kwargs["input_ids"],
Expand All @@ -643,6 +648,7 @@ def _post_process_(

# pre-process distribution
from paddlenlp_ops import get_token_penalty_multi_scores_v2

Check warning on line 650 in paddlenlp/experimental/transformers/generation_utils.py

View check run for this annotation

Codecov / codecov/patch

paddlenlp/experimental/transformers/generation_utils.py#L650

Added line #L650 was not covered by tests

logits = get_token_penalty_multi_scores_v2(
model_kwargs["pre_ids"],
logits,
Expand All @@ -669,12 +675,14 @@ def _post_process_(
length_cond = paddle.greater_equal(step_idx, model_kwargs["max_dec_len"])
stop_flags = paddle.logical_or(model_kwargs["stop_flags"], length_cond)
from paddlenlp_ops import set_stop_value_multi_ends_v2

Check warning on line 677 in paddlenlp/experimental/transformers/generation_utils.py

View check run for this annotation

Codecov / codecov/patch

paddlenlp/experimental/transformers/generation_utils.py#L677

Added line #L677 was not covered by tests

set_stop_value_multi_ends_v2(
next_tokens, stop_flags, model_kwargs["seq_lens_this_time"], eos_token_id, model_kwargs["next_tokens"]
) # multi ends
paddle.assign(stop_flags, model_kwargs["stop_flags"])
# update inputs
from paddlenlp_ops import update_inputs

Check warning on line 684 in paddlenlp/experimental/transformers/generation_utils.py

View check run for this annotation

Codecov / codecov/patch

paddlenlp/experimental/transformers/generation_utils.py#L684

Added line #L684 was not covered by tests

update_inputs(
stop_flags,
model_kwargs["not_need_stop"],
Expand All @@ -687,6 +695,7 @@ def _post_process_(
model_kwargs["is_block_step"],
)
from paddlenlp_ops import save_output

Check warning on line 697 in paddlenlp/experimental/transformers/generation_utils.py

View check run for this annotation

Codecov / codecov/patch

paddlenlp/experimental/transformers/generation_utils.py#L697

Added line #L697 was not covered by tests

save_output(next_tokens, model_kwargs["not_need_stop"], self.config.tensor_parallel_rank)
return next_tokens

Expand Down
1 change: 1 addition & 0 deletions paddlenlp/experimental/transformers/gpt/modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,7 @@ def remove_padding(self, input_ids, seq_lens_this_time):
cum_offsets_now = paddle.cumsum(paddle.max(seq_lens_this_time) - seq_lens_this_time)
token_num = paddle.sum(seq_lens_this_time)
from paddlenlp_ops import get_padding_offset

Check warning on line 205 in paddlenlp/experimental/transformers/gpt/modeling.py

View check run for this annotation

Codecov / codecov/patch

paddlenlp/experimental/transformers/gpt/modeling.py#L205

Added line #L205 was not covered by tests

ids_remove_padding, cum_offsets, padding_offset = get_padding_offset(
input_ids, cum_offsets_now, token_num, seq_lens_this_time
)
Expand Down
3 changes: 3 additions & 0 deletions paddlenlp/experimental/transformers/llama/modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -346,6 +346,7 @@ def remove_padding(self, input_ids, seq_lens_this_time):
cum_offsets_now = paddle.cumsum(paddle.max(seq_lens_this_time) - seq_lens_this_time)
token_num = paddle.sum(seq_lens_this_time)
from paddlenlp_ops import get_padding_offset

Check warning on line 348 in paddlenlp/experimental/transformers/llama/modeling.py

View check run for this annotation

Codecov / codecov/patch

paddlenlp/experimental/transformers/llama/modeling.py#L348

Added line #L348 was not covered by tests

ids_remove_padding, cum_offsets, padding_offset = get_padding_offset(
input_ids, cum_offsets_now, token_num, seq_lens_this_time
)
Expand Down Expand Up @@ -433,6 +434,7 @@ def forward(
if not is_decoder and pre_caches is not None:
position_offset = 128
from paddlenlp_ops import fused_get_rotary_embedding

Check warning on line 436 in paddlenlp/experimental/transformers/llama/modeling.py

View check run for this annotation

Codecov / codecov/patch

paddlenlp/experimental/transformers/llama/modeling.py#L436

Added line #L436 was not covered by tests

new_rope = fused_get_rotary_embedding(
input_ids, position_ids, self.head_dim_shape_tensor, position_offset, theta, True
)
Expand Down Expand Up @@ -825,6 +827,7 @@ def remove_padding(self, input_ids, seq_lens_this_time):
cum_offsets_now = paddle.cumsum(self.max_seq_len - seq_lens_this_time)
token_num = paddle.sum(seq_lens_this_time)
from paddlenlp_ops import get_padding_offset_v2

Check warning on line 829 in paddlenlp/experimental/transformers/llama/modeling.py

View check run for this annotation

Codecov / codecov/patch

paddlenlp/experimental/transformers/llama/modeling.py#L829

Added line #L829 was not covered by tests

ids_remove_padding, cum_offsets, padding_offset, cu_seqlens_q, cu_seqlens_k = get_padding_offset_v2(
input_ids, cum_offsets_now, token_num, seq_lens_this_time
)
Expand Down
1 change: 1 addition & 0 deletions paddlenlp/experimental/transformers/opt/modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,7 @@ def remove_padding(self, input_ids, seq_lens_this_time):
cum_offsets_now = paddle.cumsum(paddle.max(seq_lens_this_time) - seq_lens_this_time)
token_num = paddle.sum(seq_lens_this_time)
from paddlenlp_ops import get_padding_offset

Check warning on line 149 in paddlenlp/experimental/transformers/opt/modeling.py

View check run for this annotation

Codecov / codecov/patch

paddlenlp/experimental/transformers/opt/modeling.py#L149

Added line #L149 was not covered by tests

ids_remove_padding, cum_offsets, padding_offset = get_padding_offset(
input_ids, cum_offsets_now, token_num, seq_lens_this_time
)
Expand Down
2 changes: 2 additions & 0 deletions paddlenlp/experimental/transformers/qwen/modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,7 @@ def remove_padding(self, input_ids, seq_lens_this_time):
cum_offsets_now = paddle.cumsum(paddle.max(seq_lens_this_time) - seq_lens_this_time)
token_num = paddle.sum(seq_lens_this_time)
from paddlenlp_ops import get_padding_offset

Check warning on line 241 in paddlenlp/experimental/transformers/qwen/modeling.py

View check run for this annotation

Codecov / codecov/patch

paddlenlp/experimental/transformers/qwen/modeling.py#L241

Added line #L241 was not covered by tests

ids_remove_padding, cum_offsets, padding_offset = get_padding_offset(
input_ids, cum_offsets_now, token_num, seq_lens_this_time
)
Expand Down Expand Up @@ -325,6 +326,7 @@ def forward(
position_offset = 128

from paddlenlp_ops import fused_get_rotary_embedding

Check warning on line 328 in paddlenlp/experimental/transformers/qwen/modeling.py

View check run for this annotation

Codecov / codecov/patch

paddlenlp/experimental/transformers/qwen/modeling.py#L328

Added line #L328 was not covered by tests

new_rope = fused_get_rotary_embedding(
input_ids, position_ids, self.head_dim_shape_tensor, position_offset, theta, True
)
Expand Down

0 comments on commit 3f2e711

Please sign in to comment.