Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[PIR] fix pir open bugs #9248

Merged
merged 1 commit into from
Oct 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 9 additions & 4 deletions llm/predict/predictor.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
import numpy as np
import paddle
import paddle.incubate.multiprocessing as mp
from paddle.base.framework import in_cinn_mode, in_pir_executor_mode
from paddle.base.framework import in_cinn_mode, in_pir_executor_mode, use_pir_api
from paddle.distributed import fleet

from paddlenlp.generation import GenerationConfig, TextIteratorStreamer
Expand Down Expand Up @@ -624,8 +624,10 @@ def _create_predictor(self, predictor_args: PredictorArgument):
infer_model_path = llm_utils.get_infer_model_path(
predictor_args.model_name_or_path, predictor_args.model_prefix
)

config = paddle.inference.Config(infer_model_path + ".pdmodel", infer_model_path + ".pdiparams")
if use_pir_api():
config = paddle.inference.Config(infer_model_path + ".json", infer_model_path + ".pdiparams")
else:
config = paddle.inference.Config(infer_model_path + ".pdmodel", infer_model_path + ".pdiparams")

config.switch_ir_optim(True)
# remove `gpu_cpu_map_matmul_v2_to_matmul_pass` to avoid mapping matmul_v2 -> matmul op
Expand Down Expand Up @@ -1057,7 +1059,10 @@ def _create_predictor(self, predictor_args: PredictorArgument):
predictor_args.model_name_or_path, predictor_args.model_prefix
)

config = paddle.inference.Config(infer_model_path + ".pdmodel", infer_model_path + ".pdiparams")
if use_pir_api():
config = paddle.inference.Config(infer_model_path + ".json", infer_model_path + ".pdiparams")
else:
config = paddle.inference.Config(infer_model_path + ".pdmodel", infer_model_path + ".pdiparams")

config.switch_ir_optim(False)
if predictor_args.device in paddle.device.get_all_custom_device_type():
Expand Down
10 changes: 5 additions & 5 deletions paddlenlp/experimental/transformers/chatglm/modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -294,19 +294,19 @@
time_step=None,
**kwargs,
):
is_decoder = cache is not None

Check warning on line 297 in paddlenlp/experimental/transformers/chatglm/modeling.py

View check run for this annotation

Codecov / codecov/patch

paddlenlp/experimental/transformers/chatglm/modeling.py#L297

Added line #L297 was not covered by tests
if input_ids is not None and inputs_embeds is not None:
raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time")
elif input_ids is not None:
batch_size, seq_length = input_ids.shape[:2]
elif inputs_embeds is not None:
batch_size, seq_length, _ = inputs_embeds.shape[:2]
batch_size, seq_length, _ = inputs_embeds.shape[:3]

Check warning on line 303 in paddlenlp/experimental/transformers/chatglm/modeling.py

View check run for this annotation

Codecov / codecov/patch

paddlenlp/experimental/transformers/chatglm/modeling.py#L303

Added line #L303 was not covered by tests
else:
raise ValueError("You have to specify either input_ids or inputs_embeds")

encode_seq_length = input_ids.shape[1]
seq_lens = seq_len_decoder if encode_seq_length == 1 else seq_len_encoder
seq_lens = seq_len_decoder if is_decoder else seq_len_encoder

Check warning on line 307 in paddlenlp/experimental/transformers/chatglm/modeling.py

View check run for this annotation

Codecov / codecov/patch

paddlenlp/experimental/transformers/chatglm/modeling.py#L307

Added line #L307 was not covered by tests

if encode_seq_length > 1:
if not is_decoder:

Check warning on line 309 in paddlenlp/experimental/transformers/chatglm/modeling.py

View check run for this annotation

Codecov / codecov/patch

paddlenlp/experimental/transformers/chatglm/modeling.py#L309

Added line #L309 was not covered by tests
ids_remove_padding, padding_offset, cum_offsets = self.remove_padding(input_ids, seq_len_encoder)
else:
ids_remove_padding = input_ids
Expand Down Expand Up @@ -354,7 +354,7 @@
hidden_states = self.input_layernorm(hidden_states)

position_offset = 0
if encode_seq_length > 1 and pre_caches is not None:
if not is_decoder and pre_caches is not None:

Check warning on line 357 in paddlenlp/experimental/transformers/chatglm/modeling.py

View check run for this annotation

Codecov / codecov/patch

paddlenlp/experimental/transformers/chatglm/modeling.py#L357

Added line #L357 was not covered by tests
position_offset = 128

with dy2st_nocheck_guard_context():
Expand Down
Loading