Skip to content

Commit

Permalink
Add MarkTrtEngineOutputs API (#56188)
Browse files Browse the repository at this point in the history
* [paddle-TRT] support mark output

* [fix bug] hook function only call one in different predictor

* add api test
  • Loading branch information
ming1753 authored Aug 17, 2023
1 parent df445c1 commit 2abf432
Show file tree
Hide file tree
Showing 10 changed files with 125 additions and 4 deletions.
4 changes: 4 additions & 0 deletions paddle/fluid/inference/analysis/argument.h
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,10 @@ struct Argument {
DECL_ARGUMENT_FIELD(tensorrt_max_batch_size, TensorRtMaxBatchSize, int);
DECL_ARGUMENT_FIELD(tensorrt_workspace_size, TensorRtWorkspaceSize, int64_t);
DECL_ARGUMENT_FIELD(tensorrt_min_subgraph_size, TensorRtMinSubgraphSize, int);
DECL_ARGUMENT_FIELD(trt_mark_output, TRTMarkOutput, bool);
DECL_ARGUMENT_FIELD(trt_output_tensor_names,
TRTOutputTensorNames,
std::vector<std::string>);
DECL_ARGUMENT_FIELD(tensorrt_disabled_ops,
TensorRtDisabledOPs,
std::vector<std::string>);
Expand Down
4 changes: 4 additions & 0 deletions paddle/fluid/inference/analysis/ir_pass_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,10 @@ void IRPassManager::CreatePasses(Argument *argument,
pass->Set("max_batch_size", new int(argument->tensorrt_max_batch_size()));
pass->Set("min_subgraph_size",
new int(argument->tensorrt_min_subgraph_size()));
pass->Set("mark_output", new bool(argument->trt_mark_output()));
pass->Set(
"output_tensor_names",
new std::vector<std::string>(argument->trt_output_tensor_names()));
pass->Set("program",
new framework::ProgramDesc *(&argument->main_program()));
pass->Set("predictor_id", new int(argument->predictor_id()));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -371,6 +371,40 @@ std::string TensorRtSubgraphPass::CreateTensorRTOp(
// record the origin output data type
std::vector<int> origin_outputs_dtype;
std::map<std::string, int> map_origin_outputs_dtype;

// Whether to mark Outpus
auto mark_output = Get<bool>("mark_output");
auto output_tensor_name =
Get<std::vector<std::string>>("output_tensor_names");
VLOG(1) << "mark Output: " << mark_output;

if (mark_output == 1) {
VLOG(1) << "begin to mark output ...";
for (auto node : subgraph) {
if (node->NodeType() == Node::Type::kOperation) {
if (node->Op()->Outputs().count("Xshape")) continue;
for (auto *x : node->outputs) {
if (std::count(parameters.begin(), parameters.end(), x->Name()) > 0)
continue;
if (!output_tensor_name.empty() &&
std::count(output_tensor_name.begin(),
output_tensor_name.end(),
x->Name())) {
VLOG(1) << "output " << x->Name() << " has been marked";
std::string output_name_withid =
x->Name() + std::to_string(x->id());
output_names.insert(x->Name());
output_names_with_id.insert(output_name_withid);
origin_name_output_rank[x->Name()] = x->Var()->GetShape().size();
trt_outputs.insert(x);
map_origin_outputs_dtype[x->Name()] =
static_cast<int>(x->Var()->GetDataType());
}
}
}
}
}

for (auto *x : node->outputs) {
output_names.insert(x->Name());
output_names_with_id.insert(x->Name() + std::to_string(x->id()));
Expand Down
10 changes: 10 additions & 0 deletions paddle/fluid/inference/api/analysis_config.cc
Original file line number Diff line number Diff line change
Expand Up @@ -459,6 +459,8 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
CP_MEMBER(tensorrt_max_batchsize_);
CP_MEMBER(tensorrt_min_subgraph_size_);
CP_MEMBER(tensorrt_precision_mode_);
CP_MEMBER(trt_mark_output_);
CP_MEMBER(trt_output_tensor_names_);
CP_MEMBER(trt_disabled_ops_);
CP_MEMBER(trt_use_dla_);
CP_MEMBER(trt_dla_core_);
Expand Down Expand Up @@ -757,6 +759,12 @@ void AnalysisConfig::EnableTensorRtEngine(int64_t workspace_size,
#endif
}

void AnalysisConfig::MarkTrtEngineOutputs(
const std::vector<std::string> &output_tensor_names) {
trt_mark_output_ = true;
trt_output_tensor_names_ = output_tensor_names;
}

void AnalysisConfig::EnableTensorRTMemoryOptim(bool engine_memory_sharing,
int sharing_identifier) {
PADDLE_ENFORCE_EQ(
Expand Down Expand Up @@ -1050,6 +1058,7 @@ std::string AnalysisConfig::SerializeInfoCache() {
ss << tensorrt_workspace_size_;
ss << tensorrt_max_batchsize_;
ss << tensorrt_min_subgraph_size_;
ss << trt_mark_output_;

ss << use_dlnne_;
ss << dlnne_min_subgraph_size_;
Expand Down Expand Up @@ -1331,6 +1340,7 @@ std::string AnalysisConfig::Summary() {
}
os.InsertRow({"trt_engine_memory_sharing",
trt_engine_memory_sharing_ ? "true" : "false"});
os.InsertRow({"trt_mark_output", trt_mark_output_ ? "true" : "false"});
#endif
}
}
Expand Down
8 changes: 4 additions & 4 deletions paddle/fluid/inference/api/analysis_predictor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1386,6 +1386,8 @@ void AnalysisPredictor::PrepareArgument() {
argument_->SetTensorRtWorkspaceSize(config_.tensorrt_workspace_size_);
argument_->SetTensorRtMaxBatchSize(config_.tensorrt_max_batchsize_);
argument_->SetTensorRtMinSubgraphSize(config_.tensorrt_min_subgraph_size_);
argument_->SetTRTMarkOutput(config_.trt_mark_output_);
argument_->SetTRTOutputTensorNames(config_.trt_output_tensor_names_);
argument_->SetTensorRtDisabledOPs(config_.trt_disabled_ops_);
argument_->SetTensorRtUseDLA(config_.trt_use_dla_);
argument_->SetTensorRtDLACore(config_.trt_dla_core_);
Expand Down Expand Up @@ -2695,8 +2697,7 @@ void AnalysisPredictor::SaveOptimModel(const std::string &dir) {
}

void AnalysisPredictor::RegisterInputHook(const InputTensorHookFunc &hookfunc) {
static std::once_flag register_input_hook_flag;
std::call_once(register_input_hook_flag, [this] {
std::call_once(register_input_hook_flag_, [this] {
executor_->RegisterInputHook(
[this](framework::OperatorBase *op, framework::Scope *scope) {
for (auto &input : op->Inputs()) {
Expand All @@ -2719,8 +2720,7 @@ void AnalysisPredictor::RegisterInputHook(const InputTensorHookFunc &hookfunc) {

void AnalysisPredictor::RegisterOutputHook(
const OutputTensorHookFunc &hookfunc) {
static std::once_flag register_output_hook_flag;
std::call_once(register_output_hook_flag, [this] {
std::call_once(register_output_hook_flag_, [this] {
executor_->RegisterOutputHook(
[this](framework::OperatorBase *op, framework::Scope *scope) {
for (auto &output : op->Outputs()) {
Expand Down
2 changes: 2 additions & 0 deletions paddle/fluid/inference/api/analysis_predictor.h
Original file line number Diff line number Diff line change
Expand Up @@ -571,6 +571,8 @@ class AnalysisPredictor : public PaddlePredictor {
std::map<size_t, std::string> idx2feeds_;
std::vector<framework::OpDesc *> fetches_;
std::map<size_t, std::string> idx2fetches_;
std::once_flag register_input_hook_flag_;
std::once_flag register_output_hook_flag_;

phi::DataType model_precision_{phi::DataType::FLOAT32};

Expand Down
9 changes: 9 additions & 0 deletions paddle/fluid/inference/api/paddle_analysis_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -690,6 +690,13 @@ struct PD_INFER_DECL AnalysisConfig {
///
bool tensorrt_engine_enabled() const { return use_tensorrt_; }
///
/// \brief Whether to get the intermediate output of TensorRT Engine.
///
/// \param output_tensor_names The name of the Tensor that needs to be marked
///
void MarkTrtEngineOutputs(
const std::vector<std::string>& output_tensor_names = {});
///
/// \brief Turn on the TensorRT memory optimization.
///
/// \param engine_memory_sharing Whether to enable TensorRT memory
Expand Down Expand Up @@ -1204,6 +1211,8 @@ struct PD_INFER_DECL AnalysisConfig {
bool trt_use_cuda_graph_{false};
bool trt_use_varseqlen_{false};
bool trt_with_interleaved_{false};
bool trt_mark_output_{false};
std::vector<std::string> trt_output_tensor_names_{};
std::string tensorrt_transformer_posid_{""};
std::string tensorrt_transformer_maskid_{""};
bool trt_use_dla_{false};
Expand Down
3 changes: 3 additions & 0 deletions paddle/fluid/pybind/inference_api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -871,6 +871,9 @@ void BindAnalysisConfig(py::module *m) {
py::arg("disable_trt_plugin_fp16") = false)
.def("tensorrt_dynamic_shape_enabled",
&AnalysisConfig::tensorrt_dynamic_shape_enabled)
.def("mark_trt_engine_outputs",
&AnalysisConfig::MarkTrtEngineOutputs,
py::arg("output_tensor_names") = std::vector<std::string>({}))
.def("enable_tensorrt_varseqlen", &AnalysisConfig::EnableVarseqlen)
.def("tensorrt_varseqlen_enabled",
&AnalysisConfig::tensorrt_varseqlen_enabled)
Expand Down
10 changes: 10 additions & 0 deletions test/cpp/inference/api/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -978,6 +978,14 @@ if(WITH_TESTING AND WITH_INFERENCE_API_TEST)
paddle_inference_shared
ARGS
--infer_model=${TRT_MODEL_INSTALL_DIR}/trt_inference_test_models)
inference_analysis_test(
trt_mark_trt_engine_outputs_test
SRCS
trt_mark_trt_engine_outputs_test.cc
EXTRA_DEPS
paddle_inference_shared
ARGS
--infer_model=${TRT_MODEL_INSTALL_DIR}/trt_inference_test_models)
inference_analysis_test(
trt_fc_prelu_test
SRCS
Expand Down Expand Up @@ -1370,6 +1378,8 @@ if(WITH_TESTING AND WITH_INFERENCE_API_TEST)
set_tests_properties(test_trt_dynamic_shape_ernie_fp16_ser_deser
PROPERTIES TIMEOUT 300)
set_tests_properties(test_trt_dynamic_shape_ernie PROPERTIES TIMEOUT 480)
set_tests_properties(trt_mark_trt_engine_outputs_test PROPERTIES TIMEOUT
300)
endif()

if(WITH_MKLDNN)
Expand Down
45 changes: 45 additions & 0 deletions test/cpp/inference/api/trt_mark_trt_engine_outputs_test.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
/* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#include <glog/logging.h>
#include <gtest/gtest.h>

#include "gflags/gflags.h"
#include "test/cpp/inference/api/trt_test_helper.h"

namespace paddle {
namespace inference {

TEST(TensorRT, mark_trt_engine_outputs) {
std::string model_dir = FLAGS_infer_model + "/resnet50";
AnalysisConfig config;
config.SetModel(model_dir);
config.EnableUseGpu(100, 0);
config.EnableTensorRtEngine(
1 << 30, 1, 5, AnalysisConfig::Precision::kFloat32, false, false);
// The name of the tensor that needs to be marked, the default is empty (all
// marks)
std::vector<std::string> markOutput = {"fc_0.tmp_0", "fc_0.tmp_1"};
config.MarkTrtEngineOutputs(markOutput);

std::vector<std::vector<PaddleTensor>> inputs_all;
auto predictor = CreatePaddlePredictor(config);
SetFakeImageInput(&inputs_all, model_dir, false, "__model__", "");

std::vector<PaddleTensor> outputs;
for (auto &input : inputs_all) {
ASSERT_TRUE(predictor->Run(input, &outputs));
predictor->ClearIntermediateTensor();
}
}

} // namespace inference
} // namespace paddle

0 comments on commit 2abf432

Please sign in to comment.