Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add MarkTrtEngineOutputs API #56188

Merged
merged 7 commits into from
Aug 17, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions paddle/fluid/inference/analysis/argument.h
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,10 @@ struct Argument {
DECL_ARGUMENT_FIELD(tensorrt_max_batch_size, TensorRtMaxBatchSize, int);
DECL_ARGUMENT_FIELD(tensorrt_workspace_size, TensorRtWorkspaceSize, int64_t);
DECL_ARGUMENT_FIELD(tensorrt_min_subgraph_size, TensorRtMinSubgraphSize, int);
DECL_ARGUMENT_FIELD(trt_mark_output, TRTMarkOutput, bool);
DECL_ARGUMENT_FIELD(trt_output_tensor_names,
TRTOutputTensorNames,
std::vector<std::string>);
DECL_ARGUMENT_FIELD(tensorrt_disabled_ops,
TensorRtDisabledOPs,
std::vector<std::string>);
Expand Down
4 changes: 4 additions & 0 deletions paddle/fluid/inference/analysis/ir_pass_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,10 @@ void IRPassManager::CreatePasses(Argument *argument,
pass->Set("max_batch_size", new int(argument->tensorrt_max_batch_size()));
pass->Set("min_subgraph_size",
new int(argument->tensorrt_min_subgraph_size()));
pass->Set("mark_output", new bool(argument->trt_mark_output()));
pass->Set(
"output_tensor_names",
new std::vector<std::string>(argument->trt_output_tensor_names()));
pass->Set("program",
new framework::ProgramDesc *(&argument->main_program()));
pass->Set("predictor_id", new int(argument->predictor_id()));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -371,6 +371,40 @@ std::string TensorRtSubgraphPass::CreateTensorRTOp(
// record the origin output data type
std::vector<int> origin_outputs_dtype;
std::map<std::string, int> map_origin_outputs_dtype;

// Whether to mark Outpus
auto mark_output = Get<bool>("mark_output");
auto output_tensor_name =
Get<std::vector<std::string>>("output_tensor_names");
VLOG(1) << "mark Output: " << mark_output;

if (mark_output == 1) {
VLOG(1) << "begin to mark output ...";
for (auto node : subgraph) {
if (node->NodeType() == Node::Type::kOperation) {
if (node->Op()->Outputs().count("Xshape")) continue;
for (auto *x : node->outputs) {
if (std::count(parameters.begin(), parameters.end(), x->Name()) > 0)
continue;
if (!output_tensor_name.empty() &&
std::count(output_tensor_name.begin(),
output_tensor_name.end(),
x->Name())) {
VLOG(1) << "output " << x->Name() << " has been marked";
std::string output_name_withid =
x->Name() + std::to_string(x->id());
output_names.insert(x->Name());
output_names_with_id.insert(output_name_withid);
origin_name_output_rank[x->Name()] = x->Var()->GetShape().size();
trt_outputs.insert(x);
map_origin_outputs_dtype[x->Name()] =
static_cast<int>(x->Var()->GetDataType());
}
}
}
}
}

for (auto *x : node->outputs) {
output_names.insert(x->Name());
output_names_with_id.insert(x->Name() + std::to_string(x->id()));
Expand Down
10 changes: 10 additions & 0 deletions paddle/fluid/inference/api/analysis_config.cc
Original file line number Diff line number Diff line change
Expand Up @@ -459,6 +459,8 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
CP_MEMBER(tensorrt_max_batchsize_);
CP_MEMBER(tensorrt_min_subgraph_size_);
CP_MEMBER(tensorrt_precision_mode_);
CP_MEMBER(trt_mark_output_);
CP_MEMBER(trt_output_tensor_names_);
CP_MEMBER(trt_disabled_ops_);
CP_MEMBER(trt_use_dla_);
CP_MEMBER(trt_dla_core_);
Expand Down Expand Up @@ -757,6 +759,12 @@ void AnalysisConfig::EnableTensorRtEngine(int64_t workspace_size,
#endif
}

void AnalysisConfig::MarkTrtEngineOutputs(
const std::vector<std::string> &output_tensor_names) {
trt_mark_output_ = true;
trt_output_tensor_names_ = output_tensor_names;
}

void AnalysisConfig::EnableTensorRTMemoryOptim(bool engine_memory_sharing,
int sharing_identifier) {
PADDLE_ENFORCE_EQ(
Expand Down Expand Up @@ -1050,6 +1058,7 @@ std::string AnalysisConfig::SerializeInfoCache() {
ss << tensorrt_workspace_size_;
ss << tensorrt_max_batchsize_;
ss << tensorrt_min_subgraph_size_;
ss << trt_mark_output_;

ss << use_dlnne_;
ss << dlnne_min_subgraph_size_;
Expand Down Expand Up @@ -1331,6 +1340,7 @@ std::string AnalysisConfig::Summary() {
}
os.InsertRow({"trt_engine_memory_sharing",
trt_engine_memory_sharing_ ? "true" : "false"});
os.InsertRow({"trt_mark_output", trt_mark_output_ ? "true" : "false"});
#endif
}
}
Expand Down
8 changes: 4 additions & 4 deletions paddle/fluid/inference/api/analysis_predictor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1386,6 +1386,8 @@ void AnalysisPredictor::PrepareArgument() {
argument_->SetTensorRtWorkspaceSize(config_.tensorrt_workspace_size_);
argument_->SetTensorRtMaxBatchSize(config_.tensorrt_max_batchsize_);
argument_->SetTensorRtMinSubgraphSize(config_.tensorrt_min_subgraph_size_);
argument_->SetTRTMarkOutput(config_.trt_mark_output_);
argument_->SetTRTOutputTensorNames(config_.trt_output_tensor_names_);
argument_->SetTensorRtDisabledOPs(config_.trt_disabled_ops_);
argument_->SetTensorRtUseDLA(config_.trt_use_dla_);
argument_->SetTensorRtDLACore(config_.trt_dla_core_);
Expand Down Expand Up @@ -2695,8 +2697,7 @@ void AnalysisPredictor::SaveOptimModel(const std::string &dir) {
}

void AnalysisPredictor::RegisterInputHook(const InputTensorHookFunc &hookfunc) {
static std::once_flag register_input_hook_flag;
std::call_once(register_input_hook_flag, [this] {
std::call_once(register_input_hook_flag_, [this] {
executor_->RegisterInputHook(
[this](framework::OperatorBase *op, framework::Scope *scope) {
for (auto &input : op->Inputs()) {
Expand All @@ -2719,8 +2720,7 @@ void AnalysisPredictor::RegisterInputHook(const InputTensorHookFunc &hookfunc) {

void AnalysisPredictor::RegisterOutputHook(
const OutputTensorHookFunc &hookfunc) {
static std::once_flag register_output_hook_flag;
std::call_once(register_output_hook_flag, [this] {
std::call_once(register_output_hook_flag_, [this] {
executor_->RegisterOutputHook(
[this](framework::OperatorBase *op, framework::Scope *scope) {
for (auto &output : op->Outputs()) {
Expand Down
2 changes: 2 additions & 0 deletions paddle/fluid/inference/api/analysis_predictor.h
Original file line number Diff line number Diff line change
Expand Up @@ -571,6 +571,8 @@ class AnalysisPredictor : public PaddlePredictor {
std::map<size_t, std::string> idx2feeds_;
std::vector<framework::OpDesc *> fetches_;
std::map<size_t, std::string> idx2fetches_;
std::once_flag register_input_hook_flag_;
std::once_flag register_output_hook_flag_;

phi::DataType model_precision_{phi::DataType::FLOAT32};

Expand Down
9 changes: 9 additions & 0 deletions paddle/fluid/inference/api/paddle_analysis_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -690,6 +690,13 @@ struct PD_INFER_DECL AnalysisConfig {
///
bool tensorrt_engine_enabled() const { return use_tensorrt_; }
///
/// \brief Whether to get the intermediate output of TensorRT Engine.
///
/// \param output_tensor_names The name of the Tensor that needs to be marked
///
void MarkTrtEngineOutputs(
const std::vector<std::string>& output_tensor_names = {});
///
/// \brief Turn on the TensorRT memory optimization.
///
/// \param engine_memory_sharing Whether to enable TensorRT memory
Expand Down Expand Up @@ -1204,6 +1211,8 @@ struct PD_INFER_DECL AnalysisConfig {
bool trt_use_cuda_graph_{false};
bool trt_use_varseqlen_{false};
bool trt_with_interleaved_{false};
bool trt_mark_output_{false};
std::vector<std::string> trt_output_tensor_names_{};
std::string tensorrt_transformer_posid_{""};
std::string tensorrt_transformer_maskid_{""};
bool trt_use_dla_{false};
Expand Down
3 changes: 3 additions & 0 deletions paddle/fluid/pybind/inference_api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -871,6 +871,9 @@ void BindAnalysisConfig(py::module *m) {
py::arg("disable_trt_plugin_fp16") = false)
.def("tensorrt_dynamic_shape_enabled",
&AnalysisConfig::tensorrt_dynamic_shape_enabled)
.def("mark_trt_engine_outputs",
&AnalysisConfig::MarkTrtEngineOutputs,
py::arg("output_tensor_names") = std::vector<std::string>({}))
.def("enable_tensorrt_varseqlen", &AnalysisConfig::EnableVarseqlen)
.def("tensorrt_varseqlen_enabled",
&AnalysisConfig::tensorrt_varseqlen_enabled)
Expand Down
10 changes: 10 additions & 0 deletions test/cpp/inference/api/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -978,6 +978,14 @@ if(WITH_TESTING AND WITH_INFERENCE_API_TEST)
paddle_inference_shared
ARGS
--infer_model=${TRT_MODEL_INSTALL_DIR}/trt_inference_test_models)
inference_analysis_test(
trt_mark_trt_engine_outputs_test
SRCS
trt_mark_trt_engine_outputs_test.cc
EXTRA_DEPS
paddle_inference_shared
ARGS
--infer_model=${TRT_MODEL_INSTALL_DIR}/trt_inference_test_models)
inference_analysis_test(
trt_fc_prelu_test
SRCS
Expand Down Expand Up @@ -1370,6 +1378,8 @@ if(WITH_TESTING AND WITH_INFERENCE_API_TEST)
set_tests_properties(test_trt_dynamic_shape_ernie_fp16_ser_deser
PROPERTIES TIMEOUT 300)
set_tests_properties(test_trt_dynamic_shape_ernie PROPERTIES TIMEOUT 480)
set_tests_properties(trt_mark_trt_engine_outputs_test PROPERTIES TIMEOUT
300)
endif()

if(WITH_MKLDNN)
Expand Down
45 changes: 45 additions & 0 deletions test/cpp/inference/api/trt_mark_trt_engine_outputs_test.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
/* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#include <glog/logging.h>
#include <gtest/gtest.h>

#include "gflags/gflags.h"
#include "test/cpp/inference/api/trt_test_helper.h"

namespace paddle {
namespace inference {

TEST(TensorRT, mark_trt_engine_outputs) {
std::string model_dir = FLAGS_infer_model + "/resnet50";
AnalysisConfig config;
config.SetModel(model_dir);
config.EnableUseGpu(100, 0);
config.EnableTensorRtEngine(
1 << 30, 1, 5, AnalysisConfig::Precision::kFloat32, false, false);
// The name of the tensor that needs to be marked, the default is empty (all
// marks)
std::vector<std::string> markOutput = {"fc_0.tmp_0", "fc_0.tmp_1"};
config.MarkTrtEngineOutputs(markOutput);

std::vector<std::vector<PaddleTensor>> inputs_all;
auto predictor = CreatePaddlePredictor(config);
SetFakeImageInput(&inputs_all, model_dir, false, "__model__", "");

std::vector<PaddleTensor> outputs;
for (auto &input : inputs_all) {
ASSERT_TRUE(predictor->Run(input, &outputs));
predictor->ClearIntermediateTensor();
}
}

} // namespace inference
} // namespace paddle