From 338897511b75454e4c058c476972e3da7e143d31 Mon Sep 17 00:00:00 2001 From: xiaoxiaoehehe001 Date: Tue, 13 Dec 2022 08:57:51 +0000 Subject: [PATCH 01/19] argument_ --- .../fluid/inference/api/analysis_predictor.cc | 226 +++++++++--------- .../fluid/inference/api/analysis_predictor.h | 2 +- 2 files changed, 115 insertions(+), 113 deletions(-) diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index 983506c7c02bcf..092bcf59551905 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -1085,101 +1085,103 @@ bool AnalysisPredictor::GetFetch(std::vector *outputs, } void AnalysisPredictor::PrepareArgument() { - argument_.SetUseGPU(config_.use_gpu()); - argument_.SetUseFcPadding(config_.use_fc_padding()); - argument_.SetGPUDeviceId(config_.gpu_device_id()); - argument_.SetEnableIrOptim(config_.enable_ir_optim_); - argument_.SetEnableMemoryOptim(config_.enable_memory_optim()); - argument_.SetModelFromMemory(config_.model_from_memory_); + // Init std::unique_ptr argument_. + argument_.reset(new Argument); + argument_->SetUseGPU(config_.use_gpu()); + argument_->SetUseFcPadding(config_.use_fc_padding()); + argument_->SetGPUDeviceId(config_.gpu_device_id()); + argument_->SetEnableIrOptim(config_.enable_ir_optim_); + argument_->SetEnableMemoryOptim(config_.enable_memory_optim()); + argument_->SetModelFromMemory(config_.model_from_memory_); // Analyze inference_program - argument_.SetPredictorID(predictor_id_); - argument_.SetRootPredictorID(root_predictor_id_); - argument_.SetOptimCacheDir(config_.opt_cache_dir_); + argument_->SetPredictorID(predictor_id_); + argument_->SetRootPredictorID(root_predictor_id_); + argument_->SetOptimCacheDir(config_.opt_cache_dir_); if (!config_.model_dir().empty()) { - argument_.SetModelDir(config_.model_dir()); + argument_->SetModelDir(config_.model_dir()); } else { PADDLE_ENFORCE_EQ(config_.prog_file().empty(), false, platform::errors::PreconditionNotMet( "Either model_dir or prog_file should be set.")); - argument_.SetModelProgramPath(config_.prog_file()); - argument_.SetModelParamsPath(config_.params_file()); + argument_->SetModelProgramPath(config_.prog_file()); + argument_->SetModelParamsPath(config_.params_file()); } // For JITLayer - argument_.SetSkipLoadParams(config_.skip_load_params_); - - argument_.SetTensorRtPrecisionMode(config_.tensorrt_precision_mode_); - argument_.SetTensorRtUseOSS(config_.trt_use_varseqlen_); - argument_.SetTensorRtWithInterleaved(config_.trt_with_interleaved_); - argument_.SetTensorRtTransformerPosid(config_.tensorrt_transformer_posid_); - argument_.SetTensorRtTransformerMaskid(config_.tensorrt_transformer_maskid_); - argument_.SetMinInputShape(config_.min_input_shape_); - argument_.SetMaxInputShape(config_.max_input_shape_); - argument_.SetOptimInputShape(config_.optim_input_shape_); - argument_.SetTensorRtTunedDynamicShape( + argument_->SetSkipLoadParams(config_.skip_load_params_); + + argument_->SetTensorRtPrecisionMode(config_.tensorrt_precision_mode_); + argument_->SetTensorRtUseOSS(config_.trt_use_varseqlen_); + argument_->SetTensorRtWithInterleaved(config_.trt_with_interleaved_); + argument_->SetTensorRtTransformerPosid(config_.tensorrt_transformer_posid_); + argument_->SetTensorRtTransformerMaskid(config_.tensorrt_transformer_maskid_); + argument_->SetMinInputShape(config_.min_input_shape_); + argument_->SetMaxInputShape(config_.max_input_shape_); + argument_->SetOptimInputShape(config_.optim_input_shape_); + argument_->SetTensorRtTunedDynamicShape( config_.tuned_tensorrt_dynamic_shape()); if (config_.use_gpu() && config_.tensorrt_engine_enabled()) { LOG(INFO) << "TensorRT subgraph engine is enabled"; - argument_.SetUseTensorRT(true); - argument_.SetTensorRtWorkspaceSize(config_.tensorrt_workspace_size_); - argument_.SetTensorRtMaxBatchSize(config_.tensorrt_max_batchsize_); - argument_.SetTensorRtMinSubgraphSize(config_.tensorrt_min_subgraph_size_); - argument_.SetTensorRtDisabledOPs(config_.trt_disabled_ops_); - argument_.SetTensorRtUseDLA(config_.trt_use_dla_); - argument_.SetTensorRtDLACore(config_.trt_dla_core_); - argument_.SetTensorRtUseStaticEngine(config_.trt_use_static_engine_); - argument_.SetTensorRtUseCalibMode(config_.trt_use_calib_mode_); - argument_.SetCloseTrtPluginFp16(config_.disable_trt_plugin_fp16_); - argument_.SetTensorRtShapeRangeInfoPath(config_.shape_range_info_path()); - argument_.SetTensorRtAllowBuildAtRuntime( + argument_->SetUseTensorRT(true); + argument_->SetTensorRtWorkspaceSize(config_.tensorrt_workspace_size_); + argument_->SetTensorRtMaxBatchSize(config_.tensorrt_max_batchsize_); + argument_->SetTensorRtMinSubgraphSize(config_.tensorrt_min_subgraph_size_); + argument_->SetTensorRtDisabledOPs(config_.trt_disabled_ops_); + argument_->SetTensorRtUseDLA(config_.trt_use_dla_); + argument_->SetTensorRtDLACore(config_.trt_dla_core_); + argument_->SetTensorRtUseStaticEngine(config_.trt_use_static_engine_); + argument_->SetTensorRtUseCalibMode(config_.trt_use_calib_mode_); + argument_->SetCloseTrtPluginFp16(config_.disable_trt_plugin_fp16_); + argument_->SetTensorRtShapeRangeInfoPath(config_.shape_range_info_path()); + argument_->SetTensorRtAllowBuildAtRuntime( config_.trt_allow_build_at_runtime()); - argument_.SetTensorRtUseInspector(config_.trt_use_inspector_); - argument_.SetTrtEngineMemorySharing(config_.trt_engine_memory_sharing()); + argument_->SetTensorRtUseInspector(config_.trt_use_inspector_); + argument_->SetTrtEngineMemorySharing(config_.trt_engine_memory_sharing()); } if (config_.dlnne_enabled()) { LOG(INFO) << "Dlnne subgraph is enabled"; - argument_.SetUseDlnne(true); - argument_.SetDlnneMinSubgraphSize(config_.dlnne_min_subgraph_size_); - argument_.SetDlnneMaxBatchSize(config_.dlnne_max_batchsize_); - argument_.SetDlnneUseStaticBatch(config_.dlnne_use_static_batch_); - argument_.SetDlnneWeightShareMode(config_.dlnne_weight_share_mode_); - argument_.SetDlnneDisableNodesByOutputs( + argument_->SetUseDlnne(true); + argument_->SetDlnneMinSubgraphSize(config_.dlnne_min_subgraph_size_); + argument_->SetDlnneMaxBatchSize(config_.dlnne_max_batchsize_); + argument_->SetDlnneUseStaticBatch(config_.dlnne_use_static_batch_); + argument_->SetDlnneWeightShareMode(config_.dlnne_weight_share_mode_); + argument_->SetDlnneDisableNodesByOutputs( config_.dlnne_disable_nodes_by_outputs_); - argument_.SetDlnneInputShapeDict(config_.dlnne_input_shape_dict_); - argument_.SetDlnneUseCalibMode(config_.dlnne_use_calib_mode_); - argument_.SetDlnnePrecisionMode(config_.dlnne_precision_mode_); + argument_->SetDlnneInputShapeDict(config_.dlnne_input_shape_dict_); + argument_->SetDlnneUseCalibMode(config_.dlnne_use_calib_mode_); + argument_->SetDlnnePrecisionMode(config_.dlnne_precision_mode_); } if (config_.lite_engine_enabled()) { - argument_.SetCpuMathLibraryNumThreads( + argument_->SetCpuMathLibraryNumThreads( config_.cpu_math_library_num_threads()); - argument_.SetLitePrecisionMode(config_.lite_precision_mode_); - argument_.SetLitePassesFilter(config_.lite_passes_filter_); - argument_.SetLiteOpsFilter(config_.lite_ops_filter_); - argument_.SetLiteZeroCopy(config_.lite_zero_copy_); - argument_.SetUseXpu(config_.use_xpu_); - argument_.SetXpuL3WorkspaceSize(config_.xpu_l3_workspace_size_); - argument_.SetXpuLocked(config_.xpu_locked_); - argument_.SetXpuAutotune(config_.xpu_autotune_); - argument_.SetXpuAutotuneFile(config_.xpu_autotune_file_); - argument_.SetXpuPrecision(config_.xpu_precision_); - argument_.SetXpuAdaptiveSeqlen(config_.xpu_adaptive_seqlen_); - argument_.SetXpuDeviceId(config_.xpu_device_id_); - argument_.SetXpuEnableMultiStream(config_.xpu_enable_multi_stream_); - argument_.SetUseOpenCL(config_.use_opencl_); + argument_->SetLitePrecisionMode(config_.lite_precision_mode_); + argument_->SetLitePassesFilter(config_.lite_passes_filter_); + argument_->SetLiteOpsFilter(config_.lite_ops_filter_); + argument_->SetLiteZeroCopy(config_.lite_zero_copy_); + argument_->SetUseXpu(config_.use_xpu_); + argument_->SetXpuL3WorkspaceSize(config_.xpu_l3_workspace_size_); + argument_->SetXpuLocked(config_.xpu_locked_); + argument_->SetXpuAutotune(config_.xpu_autotune_); + argument_->SetXpuAutotuneFile(config_.xpu_autotune_file_); + argument_->SetXpuPrecision(config_.xpu_precision_); + argument_->SetXpuAdaptiveSeqlen(config_.xpu_adaptive_seqlen_); + argument_->SetXpuDeviceId(config_.xpu_device_id_); + argument_->SetXpuEnableMultiStream(config_.xpu_enable_multi_stream_); + argument_->SetUseOpenCL(config_.use_opencl_); // NNAdapter related - argument_.SetUseNNAdapter(config_.NNAdapter().use_nnadapter); - argument_.SetNNAdapterDeviceNames( + argument_->SetUseNNAdapter(config_.NNAdapter().use_nnadapter); + argument_->SetNNAdapterDeviceNames( config_.NNAdapter().nnadapter_device_names); - argument_.SetNNAdapterContextProperties( + argument_->SetNNAdapterContextProperties( config_.NNAdapter().nnadapter_context_properties); - argument_.SetNNAdapterModelCacheDir( + argument_->SetNNAdapterModelCacheDir( config_.NNAdapter().nnadapter_model_cache_dir); - argument_.SetNNAdapterSubgraphPartitionConfigBuffer( + argument_->SetNNAdapterSubgraphPartitionConfigBuffer( config_.NNAdapter().nnadapter_subgraph_partition_config_buffer); - argument_.SetNNAdapterSubgraphPartitionConfigPath( + argument_->SetNNAdapterSubgraphPartitionConfigPath( config_.NNAdapter().nnadapter_subgraph_partition_config_path); std::vector buffer_keys; std::vector> buffer_vals; @@ -1187,67 +1189,67 @@ void AnalysisPredictor::PrepareArgument() { buffer_keys.emplace_back(it.first); buffer_vals.emplace_back(it.second); } - argument_.SetNNAdapterModelCacheToken(buffer_keys); - argument_.SetNNAdapterModelCacheBuffer(buffer_vals); + argument_->SetNNAdapterModelCacheToken(buffer_keys); + argument_->SetNNAdapterModelCacheBuffer(buffer_vals); LOG(INFO) << "Lite subgraph engine is enabled"; } #ifdef PADDLE_WITH_IPU - argument_.SetUseIpu(config_.use_ipu_); - argument_.SetIpuDeviceNum(config_.ipu_device_num()); - argument_.SetIpuMicroBatchSize(config_.ipu_micro_batch_size_); - argument_.SetIpuEnablePipelining(config_.ipu_enable_pipelining_); - argument_.SetIpuBatchesPerStep(config_.ipu_batches_per_step_); - argument_.SetIpuEnableFp16(config_.ipu_enable_fp16_); - argument_.SetIpuReplicaNum(config_.ipu_replica_num_); - argument_.SetIpuAvailableMemoryProportion( + argument_->SetUseIpu(config_.use_ipu_); + argument_->SetIpuDeviceNum(config_.ipu_device_num()); + argument_->SetIpuMicroBatchSize(config_.ipu_micro_batch_size_); + argument_->SetIpuEnablePipelining(config_.ipu_enable_pipelining_); + argument_->SetIpuBatchesPerStep(config_.ipu_batches_per_step_); + argument_->SetIpuEnableFp16(config_.ipu_enable_fp16_); + argument_->SetIpuReplicaNum(config_.ipu_replica_num_); + argument_->SetIpuAvailableMemoryProportion( config_.ipu_available_memory_proportion_); - argument_.SetIpuEnableHalfPartial(config_.ipu_enable_half_partial_); - argument_.SetIpuEnableModelRuntimeExecutor( + argument_->SetIpuEnableHalfPartial(config_.ipu_enable_half_partial_); + argument_->SetIpuEnableModelRuntimeExecutor( config_.ipu_enable_model_runtime_executor_); - argument_.SetIpuCustomOpsInfo(config_.ipu_custom_ops_info_); - argument_.SetIpuCustomPatterns(config_.ipu_custom_patterns_); + argument_->SetIpuCustomOpsInfo(config_.ipu_custom_ops_info_); + argument_->SetIpuCustomPatterns(config_.ipu_custom_patterns_); #endif - argument_.SetUseNpu(config_.use_npu_); - argument_.SetNPUDeviceId(config_.npu_device_id()); + argument_->SetUseNpu(config_.use_npu_); + argument_->SetNPUDeviceId(config_.npu_device_id()); if (config_.use_mkldnn_) { LOG(INFO) << "MKLDNN is enabled"; - argument_.SetMKLDNNEnabledOpTypes(config_.mkldnn_enabled_op_types_); + argument_->SetMKLDNNEnabledOpTypes(config_.mkldnn_enabled_op_types_); } if (config_.use_cinn_compiler_) { - argument_.SetUseCinnCompiler(config_.use_cinn_compiler_); + argument_->SetUseCinnCompiler(config_.use_cinn_compiler_); } #ifdef PADDLE_WITH_MKLDNN if (config_.mkldnn_quantizer_enabled()) { LOG(INFO) << "Quantization is enabled"; - argument_.SetQuantizeEnabledOpTypes( + argument_->SetQuantizeEnabledOpTypes( config_.mkldnn_quantizer_config()->enabled_op_types()); - argument_.SetQuantizeExcludedOpIds( + argument_->SetQuantizeExcludedOpIds( config_.mkldnn_quantizer_config()->excluded_op_ids()); } if (config_.use_mkldnn_bfloat16_) { LOG(INFO) << "Bfloat16 is enabled"; - argument_.SetBfloat16EnabledOpTypes(config_.bfloat16_enabled_op_types_); + argument_->SetBfloat16EnabledOpTypes(config_.bfloat16_enabled_op_types_); } if (config_.use_mkldnn_int8_) { LOG(INFO) << "Int8 is enabled"; - argument_.SetQuantizeEnabledOpTypes(config_.quantize_enabled_op_types_); - argument_.SetQuantizeExcludedOpIds(config_.quantize_excluded_op_ids_); - argument_.SetQuantVarScales({}); + argument_->SetQuantizeEnabledOpTypes(config_.quantize_enabled_op_types_); + argument_->SetQuantizeExcludedOpIds(config_.quantize_excluded_op_ids_); + argument_->SetQuantVarScales({}); } #endif #ifdef PADDLE_WITH_CUSTOM_DEVICE - argument_.SetUseCustomDevice(config_.use_custom_device()); + argument_->SetUseCustomDevice(config_.use_custom_device()); if (config_.use_custom_device()) { LOG(INFO) << "CustomDevice is enabled"; - argument_.SetCustomDeviceType(config_.custom_device_type()); - argument_.SetCustomDeviceId(config_.custom_device_id()); + argument_->SetCustomDeviceType(config_.custom_device_type()); + argument_->SetCustomDeviceId(config_.custom_device_id()); } #endif @@ -1276,9 +1278,9 @@ void AnalysisPredictor::PrepareArgument() { } if (!config_.ir_optim()) { - argument_.SetEnableIrOptim(false); + argument_->SetEnableIrOptim(false); if (config_.enable_gpu_half_) { - argument_.SetEnableIrOptim(true); + argument_->SetEnableIrOptim(true); pass_builder->ClearPasses(); pass_builder->AppendPass("float_to_half_pass"); LOG(INFO) @@ -1295,16 +1297,16 @@ void AnalysisPredictor::PrepareArgument() { LOG(INFO) << "This model run in Paddle-GPU mixed precision mode."; } } - argument_.SetDisableLogs(config_.glog_info_disabled()); - argument_.SetIrAnalysisPasses(pass_builder->AllPasses()); - argument_.SetAnalysisPasses(pass_builder->AnalysisPasses()); - argument_.SetScopeNotOwned(scope_.get()); + argument_->SetDisableLogs(config_.glog_info_disabled()); + argument_->SetIrAnalysisPasses(pass_builder->AllPasses()); + argument_->SetAnalysisPasses(pass_builder->AnalysisPasses()); + argument_->SetScopeNotOwned(scope_.get()); // mixed precison. - argument_.SetModelPrecision(static_cast(model_precision_)); - argument_.SetMixedBlackList(config_.mixed_black_list_); - argument_.SetEnableGPUHalf(config_.enable_gpu_half_); - argument_.SetMixedPrecisionMode(static_cast( + argument_->SetModelPrecision(static_cast(model_precision_)); + argument_->SetMixedBlackList(config_.mixed_black_list_); + argument_->SetEnableGPUHalf(config_.enable_gpu_half_); + argument_->SetMixedPrecisionMode(static_cast( paddle::ConvertPrecision(config_.mixed_precision_mode_))); } @@ -1321,16 +1323,16 @@ void AnalysisPredictor::OptimizeInferenceProgram() { } #endif - Analyzer().Run(&argument_); + Analyzer().Run(argument_.get()); PADDLE_ENFORCE_EQ( - argument_.scope_valid(), + argument_->scope_valid(), true, platform::errors::InvalidArgument("The argument scope should be valid.")); VLOG(5) << "to prepare executor"; - ARGUMENT_CHECK_FIELD((&argument_), ir_analyzed_program); + ARGUMENT_CHECK_FIELD((argument_.get()), ir_analyzed_program); inference_program_.reset( - new framework::ProgramDesc(argument_.ir_analyzed_program()), + new framework::ProgramDesc(argument_->ir_analyzed_program()), [](framework::ProgramDesc *prog) { // Note, please do NOT use any member variables, because member variables may // have been destructed in multiple threads. @@ -1358,8 +1360,8 @@ void AnalysisPredictor::OptimizeInferenceProgram() { }); // The config and argument take a lot of storage, // when the predictor settings are complete, we release these stores. - argument_.PartiallyRelease(); config_.PartiallyRelease(); + argument_.reset(nullptr); LOG(INFO) << "======= optimize end ======="; } @@ -2082,9 +2084,9 @@ bool AnalysisPredictor::SaveTrtCalibToDisk() { } std::string model_opt_cache_dir = - argument_.Has("model_dir") - ? argument_.model_dir() - : inference::analysis::GetDirRoot(argument_.model_program_path()); + argument_->Has("model_dir") ? argument_->model_dir() + : inference::analysis::GetDirRoot( + argument_->model_program_path()); std::string calibration_table_data_path = inference::analysis::GetTrtCalibPath( diff --git a/paddle/fluid/inference/api/analysis_predictor.h b/paddle/fluid/inference/api/analysis_predictor.h index 09e1b43377cdff..0cddf3b80fc86a 100644 --- a/paddle/fluid/inference/api/analysis_predictor.h +++ b/paddle/fluid/inference/api/analysis_predictor.h @@ -484,7 +484,7 @@ class AnalysisPredictor : public PaddlePredictor { private: AnalysisConfig config_; - Argument argument_; + std::unique_ptr argument_; std::unique_ptr executor_; platform::Place place_; std::shared_ptr scope_; From f96ff09517fe59f386ae1995399e444539e95e4f Mon Sep 17 00:00:00 2001 From: xiaoxiaoehehe001 Date: Tue, 13 Dec 2022 09:03:01 +0000 Subject: [PATCH 02/19] argument_ --- paddle/fluid/inference/api/analysis_predictor.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/fluid/inference/api/analysis_predictor.h b/paddle/fluid/inference/api/analysis_predictor.h index 0cddf3b80fc86a..3af17360c20d24 100644 --- a/paddle/fluid/inference/api/analysis_predictor.h +++ b/paddle/fluid/inference/api/analysis_predictor.h @@ -249,7 +249,7 @@ class AnalysisPredictor : public PaddlePredictor { /// /// \return the argument obtained by config /// - Argument &analysis_argument() { return argument_; } + Argument &analysis_argument() { return *argument_; } /// /// \brief Clone to get the new predictor. thread safe. /// From 276c7cfaf7bf232dad2bc1e057e1cb7cdf6367ea Mon Sep 17 00:00:00 2001 From: xiaoxiaoehehe001 Date: Tue, 13 Dec 2022 13:00:06 +0000 Subject: [PATCH 03/19] fix_mkldnn --- .../fluid/inference/api/mkldnn_quantizer.cc | 26 +++++++++---------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/paddle/fluid/inference/api/mkldnn_quantizer.cc b/paddle/fluid/inference/api/mkldnn_quantizer.cc index 53ed8c8134937d..86e6b394ad76ce 100644 --- a/paddle/fluid/inference/api/mkldnn_quantizer.cc +++ b/paddle/fluid/inference/api/mkldnn_quantizer.cc @@ -588,15 +588,15 @@ void AnalysisPredictor::MkldnnQuantizer::ClearDeviceContext() const { void AnalysisPredictor::MkldnnQuantizer::PrepareArgument() const { auto& arg = predictor_.argument_; - if (!arg.scope_valid()) arg.SetScope(new framework::Scope); - arg.SetMainProgramNotOwned(predictor_.inference_program_.get()); - auto graph = std::unique_ptr(new Graph(arg.main_program())); - arg.SetMainGraph(graph.release()); - auto* scope_ptr = arg.scope_ptr(); + if (!arg->scope_valid()) arg->SetScope(new framework::Scope); + arg->SetMainProgramNotOwned(predictor_.inference_program_.get()); + auto graph = std::unique_ptr(new Graph(arg->main_program())); + arg->SetMainGraph(graph.release()); + auto* scope_ptr = arg->scope_ptr(); PADDLE_ENFORCE_NOT_NULL( scope_ptr, platform::errors::PreconditionNotMet("The scope should not be nullptr.")); - arg.main_graph().SetNotOwned(framework::ir::kParamScopeAttr, scope_ptr); + arg->main_graph().SetNotOwned(framework::ir::kParamScopeAttr, scope_ptr); auto* builder = predictor_.config_.pass_builder(); builder->SetPasses({"cpu_quantize_pass", @@ -605,10 +605,10 @@ void AnalysisPredictor::MkldnnQuantizer::PrepareArgument() const { "params_quantization_mkldnn_pass"}); if (predictor_.config_.ir_debug_) builder->TurnOnDebug(); auto passes = builder->AllPasses(); - predictor_.argument_.SetIrAnalysisPasses(passes); - predictor_.argument_.SetAnalysisPasses( + predictor_.argument_->SetIrAnalysisPasses(passes); + predictor_.argument_->SetAnalysisPasses( {"ir_analysis_pass", "memory_optimize_pass", "ir_graph_to_program_pass"}); - predictor_.argument_.SetQuantVarScales(scales_); + predictor_.argument_->SetQuantVarScales(scales_); } bool AnalysisPredictor::MkldnnQuantizer::Quantize() { @@ -628,15 +628,15 @@ bool AnalysisPredictor::MkldnnQuantizer::RunQuantizePasses() const { *predictor_.inference_program_, 0, true, predictor_.sub_scope_); PrepareArgument(); auto& arg = predictor_.argument_; - Analyzer().Run(&arg); + Analyzer().Run(arg.get()); PADDLE_ENFORCE_EQ( - arg.scope_valid(), + arg->scope_valid(), true, platform::errors::PreconditionNotMet("The scope should be valid.")); VLOG(5) << "to prepare executor"; - ARGUMENT_CHECK_FIELD((&arg), ir_analyzed_program); + ARGUMENT_CHECK_FIELD(arg.get(), ir_analyzed_program); predictor_.inference_program_.reset( - new framework::ProgramDesc(arg.ir_analyzed_program())); + new framework::ProgramDesc(arg->ir_analyzed_program())); LOG(INFO) << "== optimize 2 end =="; predictor_.executor_->CreateVariables( *predictor_.inference_program_, 0, false, predictor_.sub_scope_); From f167a2fdaadac39f6541f72ff75c5ea444099ccf Mon Sep 17 00:00:00 2001 From: xiaoxiaoehehe001 Date: Wed, 14 Dec 2022 07:37:19 +0000 Subject: [PATCH 04/19] DeleteGetFuseStatis --- .../fluid/inference/tests/api/CMakeLists.txt | 16 ------ .../tests/api/analyzer_bert_tester.cc | 13 ----- .../tests/api/analyzer_dam_tester.cc | 12 ----- .../tests/api/analyzer_ernie_tester.cc | 26 ---------- .../analyzer_image_classification_tester.cc | 11 ----- .../tests/api/analyzer_lac_tester.cc | 16 ------ .../tests/api/analyzer_ner_tester.cc | 16 ------ .../tests/api/analyzer_pyramid_dnn_tester.cc | 11 ----- .../tests/api/analyzer_rnn1_tester.cc | 17 ------- .../tests/api/analyzer_save_model_tester.cc | 22 --------- .../tests/api/analyzer_seq_conv1_tester.cc | 15 ------ .../analyzer_seq_pool1_fuse_statis_tester.cc | 49 ------------------- .../api/analyzer_transformer_fuse_tester.cc | 36 -------------- .../tests/api/analyzer_vis_tester.cc | 9 ---- .../tests/api/analyzer_vit_ocr_tester.cc | 16 ------ .../fluid/inference/tests/api/tester_helper.h | 23 --------- tools/parallel_UT_rule.py | 4 -- 17 files changed, 312 deletions(-) delete mode 100644 paddle/fluid/inference/tests/api/analyzer_seq_pool1_fuse_statis_tester.cc delete mode 100644 paddle/fluid/inference/tests/api/analyzer_transformer_fuse_tester.cc diff --git a/paddle/fluid/inference/tests/api/CMakeLists.txt b/paddle/fluid/inference/tests/api/CMakeLists.txt index a5cdfda3243eb4..8c8d9912a2dbde 100644 --- a/paddle/fluid/inference/tests/api/CMakeLists.txt +++ b/paddle/fluid/inference/tests/api/CMakeLists.txt @@ -318,9 +318,6 @@ if(NOT APPLE AND WITH_MKLML) inference_analysis_api_test( test_analyzer_seq_pool1_fuse_compare_zero_copy ${SEQ_POOL1_INSTALL_DIR} analyzer_seq_pool1_fuse_compare_zero_copy_tester.cc) - inference_analysis_api_test( - test_analyzer_seq_pool1_fuse_statis ${SEQ_POOL1_INSTALL_DIR} - analyzer_seq_pool1_fuse_statis_tester.cc) inference_analysis_api_test( test_analyzer_seq_pool1_profile ${SEQ_POOL1_INSTALL_DIR} analyzer_seq_pool1_profile_tester.cc) @@ -330,8 +327,6 @@ if(NOT APPLE AND WITH_MKLML) set_tests_properties(test_analyzer_seq_pool1 PROPERTIES TIMEOUT 120) set_tests_properties(test_analyzer_seq_pool1_fuse_compare_zero_copy PROPERTIES TIMEOUT 120) - set_tests_properties(test_analyzer_seq_pool1_fuse_statis PROPERTIES TIMEOUT - 120) set_tests_properties(test_analyzer_seq_pool1_profile PROPERTIES TIMEOUT 120) endif() else() @@ -486,17 +481,6 @@ inference_analysis_test( --infer_data=${TRANSFORMER_INSTALL_DIR}/data.txt --batch_size=8 --cpu_num_threads=${CPU_NUM_THREADS_ON_CI}) -inference_analysis_test( - test_analyzer_transformer_fuse - SRCS - analyzer_transformer_fuse_tester.cc - EXTRA_DEPS - paddle_inference_shared - ARGS - --infer_model=${TRANSFORMER_INSTALL_DIR}/model - --infer_data=${TRANSFORMER_INSTALL_DIR}/data.txt - --batch_size=8 - --cpu_num_threads=${CPU_NUM_THREADS_ON_CI}) inference_analysis_test( test_analyzer_transformer_profile SRCS diff --git a/paddle/fluid/inference/tests/api/analyzer_bert_tester.cc b/paddle/fluid/inference/tests/api/analyzer_bert_tester.cc index e7462786c40376..252b9acd7eb977 100644 --- a/paddle/fluid/inference/tests/api/analyzer_bert_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_bert_tester.cc @@ -60,19 +60,6 @@ TEST(Analyzer_bert, profile_mkldnn_bf16) { } #endif -// Check the fuse status -TEST(Analyzer_bert, fuse_statis) { -#if !defined(_WIN32) - setenv("NVIDIA_TF32_OVERRIDE", "0", 1); -#endif - auto cfg(SetConfig()); - int num_ops; - auto predictor = CreatePaddlePredictor(cfg); - auto fuse_statis = GetFuseStatis( - static_cast(predictor.get()), &num_ops); - LOG(INFO) << "num_ops: " << num_ops; -} - TEST(Analyzer_bert, compare) { #if !defined(_WIN32) setenv("NVIDIA_TF32_OVERRIDE", "0", 1); diff --git a/paddle/fluid/inference/tests/api/analyzer_dam_tester.cc b/paddle/fluid/inference/tests/api/analyzer_dam_tester.cc index 36a2dfcb715720..f50442742a2e05 100644 --- a/paddle/fluid/inference/tests/api/analyzer_dam_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_dam_tester.cc @@ -252,18 +252,6 @@ TEST(Analyzer_dam, profile) { profile(); } TEST(Analyzer_dam, profile_mkldnn) { profile(true /* use_mkldnn */); } #endif -// Check the fuse status -TEST(Analyzer_dam, fuse_statis) { - AnalysisConfig cfg; - SetConfig(&cfg); - - int num_ops; - auto predictor = CreatePaddlePredictor(cfg); - auto fuse_statis = GetFuseStatis( - static_cast(predictor.get()), &num_ops); - ASSERT_TRUE(fuse_statis.count("fc_fuse")); -} - // Compare result of NativeConfig and AnalysisConfig void compare(bool use_mkldnn = false) { AnalysisConfig cfg; diff --git a/paddle/fluid/inference/tests/api/analyzer_ernie_tester.cc b/paddle/fluid/inference/tests/api/analyzer_ernie_tester.cc index 79a6c840ea3fcf..8d85fafe39a3fd 100644 --- a/paddle/fluid/inference/tests/api/analyzer_ernie_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_ernie_tester.cc @@ -59,32 +59,6 @@ TEST(Analyzer_ernie, profile_gpu) { } #endif -// Check the fuse status -TEST(Analyzer_Ernie, fuse_statis) { -#if !defined(_WIN32) - setenv("NVIDIA_TF32_OVERRIDE", "0", 1); -#endif - AnalysisConfig cfg; - SetConfig(&cfg); - - auto pass_builder = cfg.pass_builder(); - pass_builder->DeletePass("constant_folding_pass"); - - int num_ops; - auto predictor = CreatePaddlePredictor(cfg); - auto fuse_statis = GetFuseStatis( - static_cast(predictor.get()), &num_ops); - ASSERT_TRUE(fuse_statis.count("fc_fuse")); - LOG(INFO) << "num_ops: " << num_ops; - if (FLAGS_ernie_large) { - ASSERT_EQ(fuse_statis.at("fc_fuse"), 146); - EXPECT_EQ(num_ops, 859); - } else { - ASSERT_EQ(fuse_statis.at("fc_fuse"), 74); - EXPECT_EQ(num_ops, 295); - } -} - // Compare result of NativeConfig and AnalysisConfig void compare(bool use_mkldnn = false) { std::vector> inputs; diff --git a/paddle/fluid/inference/tests/api/analyzer_image_classification_tester.cc b/paddle/fluid/inference/tests/api/analyzer_image_classification_tester.cc index e25c78bd2874eb..6b3e2ba7f1c207 100644 --- a/paddle/fluid/inference/tests/api/analyzer_image_classification_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_image_classification_tester.cc @@ -65,17 +65,6 @@ void profile(bool use_mkldnn = false) { FLAGS_num_threads); } -// Check the fuse status -TEST(Analyzer_resnet50, fuse_statis) { - AnalysisConfig cfg; - SetConfig(&cfg); - int num_ops; - auto predictor = CreatePaddlePredictor(cfg); - auto fuse_statis = GetFuseStatis( - static_cast(predictor.get()), &num_ops); - LOG(INFO) << "num_ops: " << num_ops; -} - TEST(Analyzer_resnet50, profile) { profile(); } #ifdef PADDLE_WITH_MKLDNN TEST(Analyzer_resnet50, profile_mkldnn) { profile(true /* use_mkldnn */); } diff --git a/paddle/fluid/inference/tests/api/analyzer_lac_tester.cc b/paddle/fluid/inference/tests/api/analyzer_lac_tester.cc index 8113b40c8879f2..1962d7207a4b5e 100644 --- a/paddle/fluid/inference/tests/api/analyzer_lac_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_lac_tester.cc @@ -165,22 +165,6 @@ TEST(Analyzer_LAC, profile) { } } -// Check the fuse status -TEST(Analyzer_LAC, fuse_statis) { - AnalysisConfig cfg; - SetConfig(&cfg); - - int num_ops; - auto predictor = CreatePaddlePredictor(cfg); - auto fuse_statis = GetFuseStatis( - static_cast(predictor.get()), &num_ops); - ASSERT_TRUE(fuse_statis.count("fc_fuse")); - ASSERT_TRUE(fuse_statis.count("fc_gru_fuse")); - EXPECT_EQ(fuse_statis.at("fc_fuse"), 1); - EXPECT_EQ(fuse_statis.at("fc_gru_fuse"), 4); - EXPECT_EQ(num_ops, 11); -} - // Compare result of NativeConfig and AnalysisConfig TEST(Analyzer_LAC, compare) { AnalysisConfig cfg; diff --git a/paddle/fluid/inference/tests/api/analyzer_ner_tester.cc b/paddle/fluid/inference/tests/api/analyzer_ner_tester.cc index 7309c1ba5602ab..cb9827d9733189 100644 --- a/paddle/fluid/inference/tests/api/analyzer_ner_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_ner_tester.cc @@ -148,22 +148,6 @@ TEST(Analyzer_Chinese_ner, profile_memory_load) { profile(true /* memory_load */); } -// Check the fuse status -TEST(Analyzer_Chinese_ner, fuse_statis) { - AnalysisConfig cfg; - SetConfig(&cfg); - - int num_ops; - auto predictor = CreatePaddlePredictor(cfg); - auto fuse_statis = GetFuseStatis( - static_cast(predictor.get()), &num_ops); - ASSERT_TRUE(fuse_statis.count("fc_fuse")); - ASSERT_TRUE(fuse_statis.count("fc_gru_fuse")); - EXPECT_EQ(fuse_statis.at("fc_fuse"), 1); - EXPECT_EQ(fuse_statis.at("fc_gru_fuse"), 2); - EXPECT_EQ(num_ops, 14); -} - // Compare result of NativeConfig and AnalysisConfig TEST(Analyzer_Chinese_ner, compare) { AnalysisConfig cfg; diff --git a/paddle/fluid/inference/tests/api/analyzer_pyramid_dnn_tester.cc b/paddle/fluid/inference/tests/api/analyzer_pyramid_dnn_tester.cc index 578d3c57273720..2b5da5c619771f 100644 --- a/paddle/fluid/inference/tests/api/analyzer_pyramid_dnn_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_pyramid_dnn_tester.cc @@ -164,17 +164,6 @@ TEST(Analyzer_Pyramid_DNN, profile) { } } -// Check the fuse status -TEST(Analyzer_Pyramid_DNN, fuse_statis) { - AnalysisConfig cfg; - SetConfig(&cfg); - - int num_ops; - auto predictor = CreatePaddlePredictor(cfg); - auto fuse_statis = GetFuseStatis( - static_cast(predictor.get()), &num_ops); -} - // Compare result of NativeConfig and AnalysisConfig TEST(Analyzer_Pyramid_DNN, compare) { AnalysisConfig cfg; diff --git a/paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc b/paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc index 537b5ef1e6e03e..12706788331643 100644 --- a/paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc @@ -244,23 +244,6 @@ TEST(Analyzer_rnn1, profile) { FLAGS_num_threads); } -// Check the fuse status -TEST(Analyzer_rnn1, fuse_statis) { - AnalysisConfig cfg; - SetConfig(&cfg); - - int num_ops; - auto predictor = CreatePaddlePredictor(cfg); - auto fuse_statis = GetFuseStatis( - static_cast(predictor.get()), &num_ops); - ASSERT_TRUE(fuse_statis.count("fc_fuse")); - EXPECT_EQ(fuse_statis.at("fc_fuse"), 1); - EXPECT_EQ(fuse_statis.at("fc_nobias_lstm_fuse"), 2); // bi-directional LSTM - EXPECT_EQ(fuse_statis.at("seq_concat_fc_fuse"), 1); - EXPECT_EQ(num_ops, - 13); // After graph optimization, only 13 operators exists. -} - // Compare result of NativeConfig and AnalysisConfig TEST(Analyzer_rnn1, compare) { AnalysisConfig cfg; diff --git a/paddle/fluid/inference/tests/api/analyzer_save_model_tester.cc b/paddle/fluid/inference/tests/api/analyzer_save_model_tester.cc index e0310d3bf1a858..9af32219de7c94 100644 --- a/paddle/fluid/inference/tests/api/analyzer_save_model_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_save_model_tester.cc @@ -24,13 +24,6 @@ void SetConfig(AnalysisConfig *cfg) { cfg->SwitchIrDebug(); } -int GetNumOps(const AnalysisConfig &cfg) { - int num_ops; - auto predictor = CreatePaddlePredictor(cfg); - GetFuseStatis(static_cast(predictor.get()), &num_ops); - return num_ops; -} - /* * this model is unreasonable, it set a output tensor persistable, so * ridiculous! so I disable constant_folding_pass @@ -48,21 +41,6 @@ TEST(Analyzer, save_model) { std::string optimModelPath = FLAGS_infer_model + "/only_for_save_model_test"; MKDIR(optimModelPath.c_str()); SaveOptimModel(&cfg, optimModelPath); - - // Each config can only be applied to one predictor. - AnalysisConfig cfg2; - SetConfig(&cfg2); - cfg2.pass_builder()->ClearPasses(); - cfg2.SetModel(optimModelPath + "/model", optimModelPath + "/params"); - int origin_num_ops = GetNumOps(cfg2); - - AnalysisConfig cfg3; - SetConfig(&cfg3); - auto pass_builder3 = cfg3.pass_builder(); - pass_builder3->DeletePass("constant_folding_pass"); - cfg3.SetModel(optimModelPath + "/model", optimModelPath + "/params"); - int fused_num_ops = GetNumOps(cfg3); - CHECK_LE(fused_num_ops, origin_num_ops); } } // namespace inference diff --git a/paddle/fluid/inference/tests/api/analyzer_seq_conv1_tester.cc b/paddle/fluid/inference/tests/api/analyzer_seq_conv1_tester.cc index 9f3706edc56ccd..b396379a391f5a 100644 --- a/paddle/fluid/inference/tests/api/analyzer_seq_conv1_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_seq_conv1_tester.cc @@ -149,21 +149,6 @@ TEST(Analyzer_seq_conv1, profile) { } } -// Check the fuse status -TEST(Analyzer_seq_conv1, fuse_statis) { - AnalysisConfig cfg; - SetConfig(&cfg); - int num_ops; - auto predictor = CreatePaddlePredictor(cfg); - - auto fuse_statis = GetFuseStatis(predictor.get(), &num_ops); - ASSERT_TRUE(fuse_statis.count("fc_fuse")); - ASSERT_TRUE(fuse_statis.count("seqconv_eltadd_relu_fuse")); - EXPECT_EQ(fuse_statis.at("fc_fuse"), 2); - EXPECT_EQ(fuse_statis.at("seqconv_eltadd_relu_fuse"), 6); - EXPECT_EQ(num_ops, 31); -} - // Compare result of NativeConfig and AnalysisConfig TEST(Analyzer_seq_conv1, compare) { AnalysisConfig cfg; diff --git a/paddle/fluid/inference/tests/api/analyzer_seq_pool1_fuse_statis_tester.cc b/paddle/fluid/inference/tests/api/analyzer_seq_pool1_fuse_statis_tester.cc deleted file mode 100644 index 185b37a7cc836f..00000000000000 --- a/paddle/fluid/inference/tests/api/analyzer_seq_pool1_fuse_statis_tester.cc +++ /dev/null @@ -1,49 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - -http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include -#include - -#include "paddle/fluid/inference/tests/api/analyzer_seq_pool1_tester_helper.h" -#include "paddle/fluid/inference/tests/api/tester_helper.h" - -namespace paddle { -namespace inference { -namespace analysis { -namespace seq_pool1_tester { - -// Check the fuse status -TEST(Analyzer_seq_pool1_fuse_statis, fuse_statis) { - AnalysisConfig cfg; - SetConfig(&cfg); - int num_ops; - auto predictor = CreatePaddlePredictor(cfg); - auto fuse_statis = GetFuseStatis(predictor.get(), &num_ops); - ASSERT_TRUE(fuse_statis.count("fc_fuse")); - ASSERT_TRUE(fuse_statis.count("seqpool_concat_fuse")); - ASSERT_TRUE(fuse_statis.count("squared_mat_sub_fuse")); - ASSERT_TRUE(fuse_statis.count("repeated_fc_relu_fuse")); - ASSERT_EQ(fuse_statis.at("fc_fuse"), 10); - EXPECT_EQ(fuse_statis.at("seqpool_concat_fuse"), 2); - EXPECT_EQ(fuse_statis.at("squared_mat_sub_fuse"), 0); - EXPECT_EQ(fuse_statis.at("repeated_fc_relu_fuse"), 2); - LOG(INFO) << "num_ops: " << num_ops; - EXPECT_EQ(num_ops, 181); -} - -} // namespace seq_pool1_tester -} // namespace analysis -} // namespace inference -} // namespace paddle diff --git a/paddle/fluid/inference/tests/api/analyzer_transformer_fuse_tester.cc b/paddle/fluid/inference/tests/api/analyzer_transformer_fuse_tester.cc deleted file mode 100644 index 4e5484c9ea01df..00000000000000 --- a/paddle/fluid/inference/tests/api/analyzer_transformer_fuse_tester.cc +++ /dev/null @@ -1,36 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/inference/tests/api/analyzer_transformer_tester_helper.h" - -namespace paddle { -namespace inference { -namespace analysis { -namespace transformer_tester { - -// Check the fuse status -TEST(Analyzer_Transformer, fuse_statis) { - AnalysisConfig cfg; - SetConfig(&cfg); - - int num_ops; - auto predictor = CreatePaddlePredictor(cfg); - auto fuse_statis = GetFuseStatis( - static_cast(predictor.get()), &num_ops); -} - -} // namespace transformer_tester -} // namespace analysis -} // namespace inference -} // namespace paddle diff --git a/paddle/fluid/inference/tests/api/analyzer_vis_tester.cc b/paddle/fluid/inference/tests/api/analyzer_vis_tester.cc index 0581eb614a416b..fe2f50104a3a65 100644 --- a/paddle/fluid/inference/tests/api/analyzer_vis_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_vis_tester.cc @@ -125,15 +125,6 @@ TEST(Analyzer_vis, profile) { profile(); } TEST(Analyzer_vis, profile_mkldnn) { profile(true /* use_mkldnn */); } #endif -// Check the fuse status -TEST(Analyzer_vis, fuse_statis) { - AnalysisConfig cfg; - SetConfig(&cfg); - int num_ops; - auto predictor = CreatePaddlePredictor(cfg); - GetFuseStatis(predictor.get(), &num_ops); -} - // Compare result of NativeConfig and AnalysisConfig void compare(bool use_mkldnn = false) { AnalysisConfig cfg; diff --git a/paddle/fluid/inference/tests/api/analyzer_vit_ocr_tester.cc b/paddle/fluid/inference/tests/api/analyzer_vit_ocr_tester.cc index 8180d951050ce3..e726604c614dc5 100644 --- a/paddle/fluid/inference/tests/api/analyzer_vit_ocr_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_vit_ocr_tester.cc @@ -92,22 +92,6 @@ TEST(Analyzer_vit_ocr, compare) { compare(); } TEST(Analyzer_vit_ocr, compare_mkldnn) { compare(true /* use_mkldnn */); } #endif -#ifdef PADDLE_WITH_MKLDNN -// Check the fuse status -TEST(Analyzer_vit_ocr, fuse_status) { - AnalysisConfig cfg; - SetConfig(&cfg, true); - int num_ops; - auto predictor = CreatePaddlePredictor(cfg); - auto fuse_statis = GetFuseStatis( - static_cast(predictor.get()), &num_ops); - - CHECK_EQ(fuse_statis.at("fc_mkldnn_pass"), 33); - CHECK_EQ(fuse_statis.at("fused_conv2d_gelu_mkldnn_fuse_pass"), 2); - CHECK_EQ(fuse_statis.at("fc_elementwise_add_mkldnn_fuse"), 16); -} -#endif - } // namespace analysis } // namespace inference } // namespace paddle diff --git a/paddle/fluid/inference/tests/api/tester_helper.h b/paddle/fluid/inference/tests/api/tester_helper.h index e412504e3eae8a..a59766873be9b7 100644 --- a/paddle/fluid/inference/tests/api/tester_helper.h +++ b/paddle/fluid/inference/tests/api/tester_helper.h @@ -368,29 +368,6 @@ std::unique_ptr CreateTestPredictor( size_t GetSize(const PaddleTensor &out) { return VecReduceToInt(out.shape); } -std::unordered_map GetFuseStatis(PaddlePredictor *predictor, - int *num_ops) { - std::unordered_map res; - auto *analysis_predictor = static_cast(predictor); - auto *fusion_status = - analysis_predictor->analysis_argument().fusion_statis_ptr(); - if (!fusion_status) { - return res; - } - for (auto &item : *fusion_status) { - LOG(INFO) << "fused " << item.first << " " << item.second; - } - int num = 0; - for (auto &node : - analysis_predictor->analysis_argument().main_graph().Nodes()) { - if (node->IsOp()) { - ++num; - } - } - *num_ops = num; - return *fusion_status; -} - void SetFakeImageInput(std::vector> *inputs, const std::string &dirname, bool is_combined = true, diff --git a/tools/parallel_UT_rule.py b/tools/parallel_UT_rule.py index 4e552bebb75000..edb9ccefeb3b03 100755 --- a/tools/parallel_UT_rule.py +++ b/tools/parallel_UT_rule.py @@ -315,7 +315,6 @@ 'complex_test', 'test_fleet_static_mp_layers', 'test_aligned_allocator', - 'test_analyzer_transformer_fuse', 'test_sequence_topk_avg_pooling', 'test_analyzer_lexical_gru', 'test_broadcast_error', @@ -717,7 +716,6 @@ 'test_static_save_load_large', 'test_bipartite_match_op', 'test_conv2d_layer', - 'test_analyzer_seq_pool1_fuse_statis', 'test_split_plugin', 'test_analyzer_small_dam', 'test_analyzer_capi_exp_gpu', @@ -2152,7 +2150,6 @@ 'test_dist_fleet_ps2', 'test_dist_fleet_grad_clip', 'test_custom_concat', - 'test_analyzer_seq_pool1_fuse_statis', 'test_fleet_ps', 'test_analyzer_multi_model_prediction', 'test_fleet_base_3', @@ -3174,7 +3171,6 @@ 'test_auto_parallel_graph', 'test_auto_parallel_completion_gpt', 'test_auto_parallel_completion', - 'test_analyzer_transformer_fuse', 'test_analyzer_save_model', 'test_analyzer_lexical_gru_int8_multi_gru', 'test_analyzer_lexical_gru_int8', From db33ea0e660f15796ab432ffafff9895073e8ca7 Mon Sep 17 00:00:00 2001 From: xiaoxiaohehe001 <49090790+xiaoxiaohehe001@users.noreply.github.com> Date: Wed, 14 Dec 2022 15:49:52 +0800 Subject: [PATCH 05/19] update changes --- .../fluid/inference/tests/api/tester_helper.h | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/paddle/fluid/inference/tests/api/tester_helper.h b/paddle/fluid/inference/tests/api/tester_helper.h index a59766873be9b7..e412504e3eae8a 100644 --- a/paddle/fluid/inference/tests/api/tester_helper.h +++ b/paddle/fluid/inference/tests/api/tester_helper.h @@ -368,6 +368,29 @@ std::unique_ptr CreateTestPredictor( size_t GetSize(const PaddleTensor &out) { return VecReduceToInt(out.shape); } +std::unordered_map GetFuseStatis(PaddlePredictor *predictor, + int *num_ops) { + std::unordered_map res; + auto *analysis_predictor = static_cast(predictor); + auto *fusion_status = + analysis_predictor->analysis_argument().fusion_statis_ptr(); + if (!fusion_status) { + return res; + } + for (auto &item : *fusion_status) { + LOG(INFO) << "fused " << item.first << " " << item.second; + } + int num = 0; + for (auto &node : + analysis_predictor->analysis_argument().main_graph().Nodes()) { + if (node->IsOp()) { + ++num; + } + } + *num_ops = num; + return *fusion_status; +} + void SetFakeImageInput(std::vector> *inputs, const std::string &dirname, bool is_combined = true, From 0a6fe4cbefc87a5c5c0cd85b013af37eda427e13 Mon Sep 17 00:00:00 2001 From: xiaoxiaoehehe001 Date: Wed, 14 Dec 2022 07:50:56 +0000 Subject: [PATCH 06/19] DeleteGetFuseStatis --- .../fluid/inference/tests/api/tester_helper.h | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/paddle/fluid/inference/tests/api/tester_helper.h b/paddle/fluid/inference/tests/api/tester_helper.h index a59766873be9b7..e412504e3eae8a 100644 --- a/paddle/fluid/inference/tests/api/tester_helper.h +++ b/paddle/fluid/inference/tests/api/tester_helper.h @@ -368,6 +368,29 @@ std::unique_ptr CreateTestPredictor( size_t GetSize(const PaddleTensor &out) { return VecReduceToInt(out.shape); } +std::unordered_map GetFuseStatis(PaddlePredictor *predictor, + int *num_ops) { + std::unordered_map res; + auto *analysis_predictor = static_cast(predictor); + auto *fusion_status = + analysis_predictor->analysis_argument().fusion_statis_ptr(); + if (!fusion_status) { + return res; + } + for (auto &item : *fusion_status) { + LOG(INFO) << "fused " << item.first << " " << item.second; + } + int num = 0; + for (auto &node : + analysis_predictor->analysis_argument().main_graph().Nodes()) { + if (node->IsOp()) { + ++num; + } + } + *num_ops = num; + return *fusion_status; +} + void SetFakeImageInput(std::vector> *inputs, const std::string &dirname, bool is_combined = true, From c895758d77a9146889249f4e1795a55bafb67e9a Mon Sep 17 00:00:00 2001 From: xiaoxiaoehehe001 Date: Thu, 15 Dec 2022 01:36:10 +0000 Subject: [PATCH 07/19] argument_ --- .../fluid/inference/tests/api/CMakeLists.txt | 16 +++++++++++ .../analyzer_seq_pool1_fuse_statis_tester.cc | 28 +++++++++++++++++++ .../api/analyzer_transformer_fuse_tester.cc | 23 +++++++++++++++ 3 files changed, 67 insertions(+) create mode 100644 paddle/fluid/inference/tests/api/analyzer_seq_pool1_fuse_statis_tester.cc create mode 100644 paddle/fluid/inference/tests/api/analyzer_transformer_fuse_tester.cc diff --git a/paddle/fluid/inference/tests/api/CMakeLists.txt b/paddle/fluid/inference/tests/api/CMakeLists.txt index 8c8d9912a2dbde..a5cdfda3243eb4 100644 --- a/paddle/fluid/inference/tests/api/CMakeLists.txt +++ b/paddle/fluid/inference/tests/api/CMakeLists.txt @@ -318,6 +318,9 @@ if(NOT APPLE AND WITH_MKLML) inference_analysis_api_test( test_analyzer_seq_pool1_fuse_compare_zero_copy ${SEQ_POOL1_INSTALL_DIR} analyzer_seq_pool1_fuse_compare_zero_copy_tester.cc) + inference_analysis_api_test( + test_analyzer_seq_pool1_fuse_statis ${SEQ_POOL1_INSTALL_DIR} + analyzer_seq_pool1_fuse_statis_tester.cc) inference_analysis_api_test( test_analyzer_seq_pool1_profile ${SEQ_POOL1_INSTALL_DIR} analyzer_seq_pool1_profile_tester.cc) @@ -327,6 +330,8 @@ if(NOT APPLE AND WITH_MKLML) set_tests_properties(test_analyzer_seq_pool1 PROPERTIES TIMEOUT 120) set_tests_properties(test_analyzer_seq_pool1_fuse_compare_zero_copy PROPERTIES TIMEOUT 120) + set_tests_properties(test_analyzer_seq_pool1_fuse_statis PROPERTIES TIMEOUT + 120) set_tests_properties(test_analyzer_seq_pool1_profile PROPERTIES TIMEOUT 120) endif() else() @@ -481,6 +486,17 @@ inference_analysis_test( --infer_data=${TRANSFORMER_INSTALL_DIR}/data.txt --batch_size=8 --cpu_num_threads=${CPU_NUM_THREADS_ON_CI}) +inference_analysis_test( + test_analyzer_transformer_fuse + SRCS + analyzer_transformer_fuse_tester.cc + EXTRA_DEPS + paddle_inference_shared + ARGS + --infer_model=${TRANSFORMER_INSTALL_DIR}/model + --infer_data=${TRANSFORMER_INSTALL_DIR}/data.txt + --batch_size=8 + --cpu_num_threads=${CPU_NUM_THREADS_ON_CI}) inference_analysis_test( test_analyzer_transformer_profile SRCS diff --git a/paddle/fluid/inference/tests/api/analyzer_seq_pool1_fuse_statis_tester.cc b/paddle/fluid/inference/tests/api/analyzer_seq_pool1_fuse_statis_tester.cc new file mode 100644 index 00000000000000..fa2503b637ccab --- /dev/null +++ b/paddle/fluid/inference/tests/api/analyzer_seq_pool1_fuse_statis_tester.cc @@ -0,0 +1,28 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include +#include + +#include "paddle/fluid/inference/tests/api/analyzer_seq_pool1_tester_helper.h" +#include "paddle/fluid/inference/tests/api/tester_helper.h" + +namespace paddle { +namespace inference { +namespace analysis { +namespace seq_pool1_tester {} // namespace seq_pool1_tester +} // namespace analysis +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/tests/api/analyzer_transformer_fuse_tester.cc b/paddle/fluid/inference/tests/api/analyzer_transformer_fuse_tester.cc new file mode 100644 index 00000000000000..fa87da71ec2314 --- /dev/null +++ b/paddle/fluid/inference/tests/api/analyzer_transformer_fuse_tester.cc @@ -0,0 +1,23 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/inference/tests/api/analyzer_transformer_tester_helper.h" + +namespace paddle { +namespace inference { +namespace analysis { +namespace transformer_tester {} // namespace transformer_tester +} // namespace analysis +} // namespace inference +} // namespace paddle From d148912e48a7485b51ea93be8b494c3102ba239a Mon Sep 17 00:00:00 2001 From: xiaoxiaoehehe001 Date: Thu, 15 Dec 2022 01:41:56 +0000 Subject: [PATCH 08/19] argument_ --- tools/parallel_UT_rule.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/parallel_UT_rule.py b/tools/parallel_UT_rule.py index b3e83390f6da4d..2225ae33021d84 100755 --- a/tools/parallel_UT_rule.py +++ b/tools/parallel_UT_rule.py @@ -313,6 +313,7 @@ 'complex_test', 'test_fleet_static_mp_layers', 'test_aligned_allocator', + 'test_analyzer_transformer_fuse', 'test_sequence_topk_avg_pooling', 'test_analyzer_lexical_gru', 'test_broadcast_error', @@ -714,6 +715,7 @@ 'test_static_save_load_large', 'test_bipartite_match_op', 'test_conv2d_layer', + 'test_analyzer_seq_pool1_fuse_statis', 'test_split_plugin', 'test_analyzer_small_dam', 'test_analyzer_capi_exp_gpu', @@ -2145,6 +2147,7 @@ 'test_dist_fleet_ps2', 'test_dist_fleet_grad_clip', 'test_custom_concat', + 'test_analyzer_seq_pool1_fuse_statis', 'test_fleet_ps', 'test_analyzer_multi_model_prediction', 'test_fleet_base_3', @@ -3166,6 +3169,7 @@ 'test_auto_parallel_graph', 'test_auto_parallel_completion_gpt', 'test_auto_parallel_completion', + 'test_analyzer_transformer_fuse', 'test_analyzer_save_model', 'test_analyzer_lexical_gru_int8_multi_gru', 'test_analyzer_lexical_gru_int8', From 98e61ec8bdced48ce0b099428cf6dadc61a7f03b Mon Sep 17 00:00:00 2001 From: xiaoxiaoehehe001 Date: Thu, 15 Dec 2022 03:39:41 +0000 Subject: [PATCH 09/19] argument_ --- paddle/fluid/inference/api/analysis_predictor.cc | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index 691ab9e366ec04..1ea8831b6dd6dc 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -1360,8 +1360,14 @@ void AnalysisPredictor::OptimizeInferenceProgram() { }); // The config and argument take a lot of storage, // when the predictor settings are complete, we release these stores. - config_.PartiallyRelease(); + +#if defined(PADDLE_WITH_MKLDNN) || defined(_WIN32) + argument_->PartiallyRelease(); +#else argument_.reset(nullptr); +#endif + + config_.PartiallyRelease(); LOG(INFO) << "======= optimize end ======="; } From acbeb9c8f18b411b30e94fb43cf36f2241d7f9cc Mon Sep 17 00:00:00 2001 From: xiaoxiaoehehe001 Date: Thu, 15 Dec 2022 03:57:35 +0000 Subject: [PATCH 10/19] DeleteGetFuseStatis --- paddle/fluid/inference/api/analysis_predictor.cc | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index 1ea8831b6dd6dc..70ac06ea2df7bc 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -1361,10 +1361,18 @@ void AnalysisPredictor::OptimizeInferenceProgram() { // The config and argument take a lot of storage, // when the predictor settings are complete, we release these stores. -#if defined(PADDLE_WITH_MKLDNN) || defined(_WIN32) +#if defined(_WIN32) argument_->PartiallyRelease(); +#else +#if defined(PADDLE_WITH_MKLDNN) + if (argument_->use_mkldnn_int8()) { + argument_->PartiallyRelease(); + } else { + argument_.reset(nullptr); + } #else argument_.reset(nullptr); +#endif #endif config_.PartiallyRelease(); From 0cd840e7e0fda6a4dc60eaa7747d4370f00ff051 Mon Sep 17 00:00:00 2001 From: xiaoxiaoehehe001 Date: Thu, 15 Dec 2022 04:52:31 +0000 Subject: [PATCH 11/19] argument_ --- paddle/fluid/inference/api/analysis_predictor.cc | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index 70ac06ea2df7bc..cbb555719d57a1 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -1360,22 +1360,16 @@ void AnalysisPredictor::OptimizeInferenceProgram() { }); // The config and argument take a lot of storage, // when the predictor settings are complete, we release these stores. - + config_.PartiallyRelease(); #if defined(_WIN32) argument_->PartiallyRelease(); #else -#if defined(PADDLE_WITH_MKLDNN) - if (argument_->use_mkldnn_int8()) { + if (config_.mkldnn_enabled()) { argument_->PartiallyRelease(); } else { argument_.reset(nullptr); } -#else - argument_.reset(nullptr); #endif -#endif - - config_.PartiallyRelease(); LOG(INFO) << "======= optimize end ======="; } From 2e4a148e4764c2daff579f542e176c1a2fb81510 Mon Sep 17 00:00:00 2001 From: xiaoxiaoehehe001 Date: Fri, 16 Dec 2022 05:50:10 +0000 Subject: [PATCH 12/19] argument_ --- .../fluid/inference/api/analysis_predictor.cc | 1 + .../fluid/inference/api/analysis_predictor.h | 8 +++++++ .../tests/api/analyzer_bert_tester.cc | 13 ++++++++++ .../tests/api/analyzer_dam_tester.cc | 12 ++++++++++ .../tests/api/analyzer_ernie_tester.cc | 24 +++++++++++++++++++ .../analyzer_image_classification_tester.cc | 11 +++++++++ .../tests/api/analyzer_lac_tester.cc | 15 ++++++++++++ .../tests/api/analyzer_ner_tester.cc | 15 ++++++++++++ .../tests/api/analyzer_pyramid_dnn_tester.cc | 11 +++++++++ .../tests/api/analyzer_rnn1_tester.cc | 15 ++++++++++++ .../tests/api/analyzer_seq_conv1_tester.cc | 14 +++++++++++ .../analyzer_seq_pool1_fuse_statis_tester.cc | 22 ++++++++++++++++- .../api/analyzer_transformer_fuse_tester.cc | 15 +++++++++++- .../tests/api/analyzer_vis_tester.cc | 9 +++++++ .../tests/api/analyzer_vit_ocr_tester.cc | 16 +++++++++++++ .../fluid/inference/tests/api/tester_helper.h | 20 +++++----------- 16 files changed, 205 insertions(+), 16 deletions(-) diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index cbb555719d57a1..7ee932ab0c24b8 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -1367,6 +1367,7 @@ void AnalysisPredictor::OptimizeInferenceProgram() { if (config_.mkldnn_enabled()) { argument_->PartiallyRelease(); } else { + fusion_statis_ = *argument_->fusion_statis_ptr(); argument_.reset(nullptr); } #endif diff --git a/paddle/fluid/inference/api/analysis_predictor.h b/paddle/fluid/inference/api/analysis_predictor.h index 3af17360c20d24..8decb802887b85 100644 --- a/paddle/fluid/inference/api/analysis_predictor.h +++ b/paddle/fluid/inference/api/analysis_predictor.h @@ -276,6 +276,13 @@ class AnalysisPredictor : public PaddlePredictor { /// std::string GetSerializedProgram() const override; + /// + /// \brief Get the fusion_statis_t + /// + /// \return the fusion_statis_t + /// + Argument::fusion_statis_t fusion_statis() { return fusion_statis_; } + /// /// \brief Register a output hook function to operate the intermediate tensor /// of op output. when using this function, memory reuse should be tured off. @@ -485,6 +492,7 @@ class AnalysisPredictor : public PaddlePredictor { private: AnalysisConfig config_; std::unique_ptr argument_; + Argument::fusion_statis_t fusion_statis_; std::unique_ptr executor_; platform::Place place_; std::shared_ptr scope_; diff --git a/paddle/fluid/inference/tests/api/analyzer_bert_tester.cc b/paddle/fluid/inference/tests/api/analyzer_bert_tester.cc index 252b9acd7eb977..e7462786c40376 100644 --- a/paddle/fluid/inference/tests/api/analyzer_bert_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_bert_tester.cc @@ -60,6 +60,19 @@ TEST(Analyzer_bert, profile_mkldnn_bf16) { } #endif +// Check the fuse status +TEST(Analyzer_bert, fuse_statis) { +#if !defined(_WIN32) + setenv("NVIDIA_TF32_OVERRIDE", "0", 1); +#endif + auto cfg(SetConfig()); + int num_ops; + auto predictor = CreatePaddlePredictor(cfg); + auto fuse_statis = GetFuseStatis( + static_cast(predictor.get()), &num_ops); + LOG(INFO) << "num_ops: " << num_ops; +} + TEST(Analyzer_bert, compare) { #if !defined(_WIN32) setenv("NVIDIA_TF32_OVERRIDE", "0", 1); diff --git a/paddle/fluid/inference/tests/api/analyzer_dam_tester.cc b/paddle/fluid/inference/tests/api/analyzer_dam_tester.cc index f50442742a2e05..36a2dfcb715720 100644 --- a/paddle/fluid/inference/tests/api/analyzer_dam_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_dam_tester.cc @@ -252,6 +252,18 @@ TEST(Analyzer_dam, profile) { profile(); } TEST(Analyzer_dam, profile_mkldnn) { profile(true /* use_mkldnn */); } #endif +// Check the fuse status +TEST(Analyzer_dam, fuse_statis) { + AnalysisConfig cfg; + SetConfig(&cfg); + + int num_ops; + auto predictor = CreatePaddlePredictor(cfg); + auto fuse_statis = GetFuseStatis( + static_cast(predictor.get()), &num_ops); + ASSERT_TRUE(fuse_statis.count("fc_fuse")); +} + // Compare result of NativeConfig and AnalysisConfig void compare(bool use_mkldnn = false) { AnalysisConfig cfg; diff --git a/paddle/fluid/inference/tests/api/analyzer_ernie_tester.cc b/paddle/fluid/inference/tests/api/analyzer_ernie_tester.cc index 8d85fafe39a3fd..8159633f514eac 100644 --- a/paddle/fluid/inference/tests/api/analyzer_ernie_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_ernie_tester.cc @@ -59,6 +59,30 @@ TEST(Analyzer_ernie, profile_gpu) { } #endif +// Check the fuse status +TEST(Analyzer_Ernie, fuse_statis) { +#if !defined(_WIN32) + setenv("NVIDIA_TF32_OVERRIDE", "0", 1); +#endif + AnalysisConfig cfg; + SetConfig(&cfg); + + auto pass_builder = cfg.pass_builder(); + pass_builder->DeletePass("constant_folding_pass"); + + int num_ops; + auto predictor = CreatePaddlePredictor(cfg); + auto fuse_statis = GetFuseStatis( + static_cast(predictor.get()), &num_ops); + ASSERT_TRUE(fuse_statis.count("fc_fuse")); + LOG(INFO) << "num_ops: " << num_ops; + if (FLAGS_ernie_large) { + ASSERT_EQ(fuse_statis.at("fc_fuse"), 146); + } else { + ASSERT_EQ(fuse_statis.at("fc_fuse"), 74); + } +} + // Compare result of NativeConfig and AnalysisConfig void compare(bool use_mkldnn = false) { std::vector> inputs; diff --git a/paddle/fluid/inference/tests/api/analyzer_image_classification_tester.cc b/paddle/fluid/inference/tests/api/analyzer_image_classification_tester.cc index 6b3e2ba7f1c207..e25c78bd2874eb 100644 --- a/paddle/fluid/inference/tests/api/analyzer_image_classification_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_image_classification_tester.cc @@ -65,6 +65,17 @@ void profile(bool use_mkldnn = false) { FLAGS_num_threads); } +// Check the fuse status +TEST(Analyzer_resnet50, fuse_statis) { + AnalysisConfig cfg; + SetConfig(&cfg); + int num_ops; + auto predictor = CreatePaddlePredictor(cfg); + auto fuse_statis = GetFuseStatis( + static_cast(predictor.get()), &num_ops); + LOG(INFO) << "num_ops: " << num_ops; +} + TEST(Analyzer_resnet50, profile) { profile(); } #ifdef PADDLE_WITH_MKLDNN TEST(Analyzer_resnet50, profile_mkldnn) { profile(true /* use_mkldnn */); } diff --git a/paddle/fluid/inference/tests/api/analyzer_lac_tester.cc b/paddle/fluid/inference/tests/api/analyzer_lac_tester.cc index 1962d7207a4b5e..1bbd4d14173c34 100644 --- a/paddle/fluid/inference/tests/api/analyzer_lac_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_lac_tester.cc @@ -165,6 +165,21 @@ TEST(Analyzer_LAC, profile) { } } +// Check the fuse status +TEST(Analyzer_LAC, fuse_statis) { + AnalysisConfig cfg; + SetConfig(&cfg); + + int num_ops; + auto predictor = CreatePaddlePredictor(cfg); + auto fuse_statis = GetFuseStatis( + static_cast(predictor.get()), &num_ops); + ASSERT_TRUE(fuse_statis.count("fc_fuse")); + ASSERT_TRUE(fuse_statis.count("fc_gru_fuse")); + EXPECT_EQ(fuse_statis.at("fc_fuse"), 1); + EXPECT_EQ(fuse_statis.at("fc_gru_fuse"), 4); +} + // Compare result of NativeConfig and AnalysisConfig TEST(Analyzer_LAC, compare) { AnalysisConfig cfg; diff --git a/paddle/fluid/inference/tests/api/analyzer_ner_tester.cc b/paddle/fluid/inference/tests/api/analyzer_ner_tester.cc index cb9827d9733189..191b883a5d5d08 100644 --- a/paddle/fluid/inference/tests/api/analyzer_ner_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_ner_tester.cc @@ -148,6 +148,21 @@ TEST(Analyzer_Chinese_ner, profile_memory_load) { profile(true /* memory_load */); } +// Check the fuse status +TEST(Analyzer_Chinese_ner, fuse_statis) { + AnalysisConfig cfg; + SetConfig(&cfg); + + int num_ops; + auto predictor = CreatePaddlePredictor(cfg); + auto fuse_statis = GetFuseStatis( + static_cast(predictor.get()), &num_ops); + ASSERT_TRUE(fuse_statis.count("fc_fuse")); + ASSERT_TRUE(fuse_statis.count("fc_gru_fuse")); + EXPECT_EQ(fuse_statis.at("fc_fuse"), 1); + EXPECT_EQ(fuse_statis.at("fc_gru_fuse"), 2); +} + // Compare result of NativeConfig and AnalysisConfig TEST(Analyzer_Chinese_ner, compare) { AnalysisConfig cfg; diff --git a/paddle/fluid/inference/tests/api/analyzer_pyramid_dnn_tester.cc b/paddle/fluid/inference/tests/api/analyzer_pyramid_dnn_tester.cc index 2b5da5c619771f..578d3c57273720 100644 --- a/paddle/fluid/inference/tests/api/analyzer_pyramid_dnn_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_pyramid_dnn_tester.cc @@ -164,6 +164,17 @@ TEST(Analyzer_Pyramid_DNN, profile) { } } +// Check the fuse status +TEST(Analyzer_Pyramid_DNN, fuse_statis) { + AnalysisConfig cfg; + SetConfig(&cfg); + + int num_ops; + auto predictor = CreatePaddlePredictor(cfg); + auto fuse_statis = GetFuseStatis( + static_cast(predictor.get()), &num_ops); +} + // Compare result of NativeConfig and AnalysisConfig TEST(Analyzer_Pyramid_DNN, compare) { AnalysisConfig cfg; diff --git a/paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc b/paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc index 12706788331643..2317d02331fadd 100644 --- a/paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc @@ -244,6 +244,21 @@ TEST(Analyzer_rnn1, profile) { FLAGS_num_threads); } +// Check the fuse status +TEST(Analyzer_rnn1, fuse_statis) { + AnalysisConfig cfg; + SetConfig(&cfg); + + int num_ops; + auto predictor = CreatePaddlePredictor(cfg); + auto fuse_statis = GetFuseStatis( + static_cast(predictor.get()), &num_ops); + ASSERT_TRUE(fuse_statis.count("fc_fuse")); + EXPECT_EQ(fuse_statis.at("fc_fuse"), 1); + EXPECT_EQ(fuse_statis.at("fc_nobias_lstm_fuse"), 2); // bi-directional LSTM + EXPECT_EQ(fuse_statis.at("seq_concat_fc_fuse"), 1); +} + // Compare result of NativeConfig and AnalysisConfig TEST(Analyzer_rnn1, compare) { AnalysisConfig cfg; diff --git a/paddle/fluid/inference/tests/api/analyzer_seq_conv1_tester.cc b/paddle/fluid/inference/tests/api/analyzer_seq_conv1_tester.cc index b396379a391f5a..f5a583343ac54c 100644 --- a/paddle/fluid/inference/tests/api/analyzer_seq_conv1_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_seq_conv1_tester.cc @@ -149,6 +149,20 @@ TEST(Analyzer_seq_conv1, profile) { } } +// Check the fuse status +TEST(Analyzer_seq_conv1, fuse_statis) { + AnalysisConfig cfg; + SetConfig(&cfg); + int num_ops; + auto predictor = CreatePaddlePredictor(cfg); + + auto fuse_statis = GetFuseStatis(predictor.get(), &num_ops); + ASSERT_TRUE(fuse_statis.count("fc_fuse")); + ASSERT_TRUE(fuse_statis.count("seqconv_eltadd_relu_fuse")); + EXPECT_EQ(fuse_statis.at("fc_fuse"), 2); + EXPECT_EQ(fuse_statis.at("seqconv_eltadd_relu_fuse"), 6); +} + // Compare result of NativeConfig and AnalysisConfig TEST(Analyzer_seq_conv1, compare) { AnalysisConfig cfg; diff --git a/paddle/fluid/inference/tests/api/analyzer_seq_pool1_fuse_statis_tester.cc b/paddle/fluid/inference/tests/api/analyzer_seq_pool1_fuse_statis_tester.cc index fa2503b637ccab..e72d8484eed0f7 100644 --- a/paddle/fluid/inference/tests/api/analyzer_seq_pool1_fuse_statis_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_seq_pool1_fuse_statis_tester.cc @@ -22,7 +22,27 @@ limitations under the License. */ namespace paddle { namespace inference { namespace analysis { -namespace seq_pool1_tester {} // namespace seq_pool1_tester +namespace seq_pool1_tester { + +// Check the fuse status +TEST(Analyzer_seq_pool1_fuse_statis, fuse_statis) { + AnalysisConfig cfg; + SetConfig(&cfg); + int num_ops; + auto predictor = CreatePaddlePredictor(cfg); + auto fuse_statis = GetFuseStatis(predictor.get(), &num_ops); + ASSERT_TRUE(fuse_statis.count("fc_fuse")); + ASSERT_TRUE(fuse_statis.count("seqpool_concat_fuse")); + ASSERT_TRUE(fuse_statis.count("squared_mat_sub_fuse")); + ASSERT_TRUE(fuse_statis.count("repeated_fc_relu_fuse")); + ASSERT_EQ(fuse_statis.at("fc_fuse"), 10); + EXPECT_EQ(fuse_statis.at("seqpool_concat_fuse"), 2); + EXPECT_EQ(fuse_statis.at("squared_mat_sub_fuse"), 0); + EXPECT_EQ(fuse_statis.at("repeated_fc_relu_fuse"), 2); + LOG(INFO) << "num_ops: " << num_ops; +} + +} // namespace seq_pool1_tester } // namespace analysis } // namespace inference } // namespace paddle diff --git a/paddle/fluid/inference/tests/api/analyzer_transformer_fuse_tester.cc b/paddle/fluid/inference/tests/api/analyzer_transformer_fuse_tester.cc index fa87da71ec2314..4e5484c9ea01df 100644 --- a/paddle/fluid/inference/tests/api/analyzer_transformer_fuse_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_transformer_fuse_tester.cc @@ -17,7 +17,20 @@ namespace paddle { namespace inference { namespace analysis { -namespace transformer_tester {} // namespace transformer_tester +namespace transformer_tester { + +// Check the fuse status +TEST(Analyzer_Transformer, fuse_statis) { + AnalysisConfig cfg; + SetConfig(&cfg); + + int num_ops; + auto predictor = CreatePaddlePredictor(cfg); + auto fuse_statis = GetFuseStatis( + static_cast(predictor.get()), &num_ops); +} + +} // namespace transformer_tester } // namespace analysis } // namespace inference } // namespace paddle diff --git a/paddle/fluid/inference/tests/api/analyzer_vis_tester.cc b/paddle/fluid/inference/tests/api/analyzer_vis_tester.cc index fe2f50104a3a65..0581eb614a416b 100644 --- a/paddle/fluid/inference/tests/api/analyzer_vis_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_vis_tester.cc @@ -125,6 +125,15 @@ TEST(Analyzer_vis, profile) { profile(); } TEST(Analyzer_vis, profile_mkldnn) { profile(true /* use_mkldnn */); } #endif +// Check the fuse status +TEST(Analyzer_vis, fuse_statis) { + AnalysisConfig cfg; + SetConfig(&cfg); + int num_ops; + auto predictor = CreatePaddlePredictor(cfg); + GetFuseStatis(predictor.get(), &num_ops); +} + // Compare result of NativeConfig and AnalysisConfig void compare(bool use_mkldnn = false) { AnalysisConfig cfg; diff --git a/paddle/fluid/inference/tests/api/analyzer_vit_ocr_tester.cc b/paddle/fluid/inference/tests/api/analyzer_vit_ocr_tester.cc index e726604c614dc5..3870fde8b533a1 100644 --- a/paddle/fluid/inference/tests/api/analyzer_vit_ocr_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_vit_ocr_tester.cc @@ -92,6 +92,22 @@ TEST(Analyzer_vit_ocr, compare) { compare(); } TEST(Analyzer_vit_ocr, compare_mkldnn) { compare(true /* use_mkldnn */); } #endif +#ifdef PADDLE_WITH_MKLDNN +// Check the fuse status +TEST(Analyzer_vit_ocr, fuse_status) { + AnalysisConfig cfg; + SetConfig(&cfg, true); + int num_ops; + auto predictor = CreatePaddlePredictor(cfg); + auto fuse_statis = GetFuseStatis( + static_cast(predictor.get()), &num_ops); + + CHECK_EQ(fuse_statis.at("fc_mkldnn_pass"), 33); + CHECK_EQ(fuse_statis.at("conv2d_gelu_mkldnn_fuse_pass"), 2); + CHECK_EQ(fuse_statis.at("fc_elementwise_add_mkldnn_fuse"), 16); +} +#endif + } // namespace analysis } // namespace inference } // namespace paddle diff --git a/paddle/fluid/inference/tests/api/tester_helper.h b/paddle/fluid/inference/tests/api/tester_helper.h index e412504e3eae8a..2108210006d747 100644 --- a/paddle/fluid/inference/tests/api/tester_helper.h +++ b/paddle/fluid/inference/tests/api/tester_helper.h @@ -372,23 +372,15 @@ std::unordered_map GetFuseStatis(PaddlePredictor *predictor, int *num_ops) { std::unordered_map res; auto *analysis_predictor = static_cast(predictor); - auto *fusion_status = - analysis_predictor->analysis_argument().fusion_statis_ptr(); - if (!fusion_status) { - return res; + auto fusion_status = analysis_predictor->fusion_statis(); + if (fusion_status.empty()) { + fusion_status = res; } - for (auto &item : *fusion_status) { + for (auto &item : fusion_status) { LOG(INFO) << "fused " << item.first << " " << item.second; } - int num = 0; - for (auto &node : - analysis_predictor->analysis_argument().main_graph().Nodes()) { - if (node->IsOp()) { - ++num; - } - } - *num_ops = num; - return *fusion_status; + *num_ops = 0; + return fusion_status; } void SetFakeImageInput(std::vector> *inputs, From be878a3c730d0355d6bcacd5d6003587db09d25b Mon Sep 17 00:00:00 2001 From: xiaoxiaoehehe001 Date: Fri, 16 Dec 2022 05:54:32 +0000 Subject: [PATCH 13/19] argument_ --- .../tests/api/analyzer_save_model_tester.cc | 12 ++++++++++++ .../inference/tests/api/analyzer_vit_ocr_tester.cc | 2 +- .../inference/tests/api/trt_split_converter_test.cc | 2 +- 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/paddle/fluid/inference/tests/api/analyzer_save_model_tester.cc b/paddle/fluid/inference/tests/api/analyzer_save_model_tester.cc index 9af32219de7c94..915e8ca5e7445f 100644 --- a/paddle/fluid/inference/tests/api/analyzer_save_model_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_save_model_tester.cc @@ -41,6 +41,18 @@ TEST(Analyzer, save_model) { std::string optimModelPath = FLAGS_infer_model + "/only_for_save_model_test"; MKDIR(optimModelPath.c_str()); SaveOptimModel(&cfg, optimModelPath); + + // Each config can only be applied to one predictor. + AnalysisConfig cfg2; + SetConfig(&cfg2); + cfg2.pass_builder()->ClearPasses(); + cfg2.SetModel(optimModelPath + "/model", optimModelPath + "/params"); + + AnalysisConfig cfg3; + SetConfig(&cfg3); + auto pass_builder3 = cfg3.pass_builder(); + pass_builder3->DeletePass("constant_folding_pass"); + cfg3.SetModel(optimModelPath + "/model", optimModelPath + "/params"); } } // namespace inference diff --git a/paddle/fluid/inference/tests/api/analyzer_vit_ocr_tester.cc b/paddle/fluid/inference/tests/api/analyzer_vit_ocr_tester.cc index 3870fde8b533a1..8180d951050ce3 100644 --- a/paddle/fluid/inference/tests/api/analyzer_vit_ocr_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_vit_ocr_tester.cc @@ -103,7 +103,7 @@ TEST(Analyzer_vit_ocr, fuse_status) { static_cast(predictor.get()), &num_ops); CHECK_EQ(fuse_statis.at("fc_mkldnn_pass"), 33); - CHECK_EQ(fuse_statis.at("conv2d_gelu_mkldnn_fuse_pass"), 2); + CHECK_EQ(fuse_statis.at("fused_conv2d_gelu_mkldnn_fuse_pass"), 2); CHECK_EQ(fuse_statis.at("fc_elementwise_add_mkldnn_fuse"), 16); } #endif diff --git a/paddle/fluid/inference/tests/api/trt_split_converter_test.cc b/paddle/fluid/inference/tests/api/trt_split_converter_test.cc index 19c6103bf7aece..4a9c454272dfb3 100644 --- a/paddle/fluid/inference/tests/api/trt_split_converter_test.cc +++ b/paddle/fluid/inference/tests/api/trt_split_converter_test.cc @@ -32,7 +32,7 @@ TEST(TensorRT, split_converter) { config.SetModel(model_dir); config.SwitchUseFeedFetchOps(false); config.EnableTensorRtEngine( - 1 << 20, batch_size, 1, AnalysisConfig::Precision::kInt8, false, true); + 1 << 20, batch_size, 1, AnalysisConfig::Precision::kHalf, false, false); auto predictor = CreatePaddlePredictor(config); From 348b5cda3bbfc7601c64f5926cbc53bcf2b40615 Mon Sep 17 00:00:00 2001 From: xiaoxiaoehehe001 Date: Fri, 16 Dec 2022 07:03:05 +0000 Subject: [PATCH 14/19] argument_ --- .../tests/api/analyzer_vit_ocr_tester.cc | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/paddle/fluid/inference/tests/api/analyzer_vit_ocr_tester.cc b/paddle/fluid/inference/tests/api/analyzer_vit_ocr_tester.cc index 8180d951050ce3..e726604c614dc5 100644 --- a/paddle/fluid/inference/tests/api/analyzer_vit_ocr_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_vit_ocr_tester.cc @@ -92,22 +92,6 @@ TEST(Analyzer_vit_ocr, compare) { compare(); } TEST(Analyzer_vit_ocr, compare_mkldnn) { compare(true /* use_mkldnn */); } #endif -#ifdef PADDLE_WITH_MKLDNN -// Check the fuse status -TEST(Analyzer_vit_ocr, fuse_status) { - AnalysisConfig cfg; - SetConfig(&cfg, true); - int num_ops; - auto predictor = CreatePaddlePredictor(cfg); - auto fuse_statis = GetFuseStatis( - static_cast(predictor.get()), &num_ops); - - CHECK_EQ(fuse_statis.at("fc_mkldnn_pass"), 33); - CHECK_EQ(fuse_statis.at("fused_conv2d_gelu_mkldnn_fuse_pass"), 2); - CHECK_EQ(fuse_statis.at("fc_elementwise_add_mkldnn_fuse"), 16); -} -#endif - } // namespace analysis } // namespace inference } // namespace paddle From b986f18b0d935befb705f03a080c89286e5d25b2 Mon Sep 17 00:00:00 2001 From: xiaoxiaoehehe001 Date: Fri, 16 Dec 2022 07:16:53 +0000 Subject: [PATCH 15/19] argument_ --- paddle/fluid/inference/tests/api/analyzer_dam_tester.cc | 1 - paddle/fluid/inference/tests/api/analyzer_ernie_tester.cc | 7 ------- paddle/fluid/inference/tests/api/analyzer_lac_tester.cc | 4 ---- paddle/fluid/inference/tests/api/analyzer_ner_tester.cc | 4 ---- .../fluid/inference/tests/api/analyzer_seq_conv1_tester.cc | 4 ---- 5 files changed, 20 deletions(-) diff --git a/paddle/fluid/inference/tests/api/analyzer_dam_tester.cc b/paddle/fluid/inference/tests/api/analyzer_dam_tester.cc index 36a2dfcb715720..c9bcbd654551a8 100644 --- a/paddle/fluid/inference/tests/api/analyzer_dam_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_dam_tester.cc @@ -261,7 +261,6 @@ TEST(Analyzer_dam, fuse_statis) { auto predictor = CreatePaddlePredictor(cfg); auto fuse_statis = GetFuseStatis( static_cast(predictor.get()), &num_ops); - ASSERT_TRUE(fuse_statis.count("fc_fuse")); } // Compare result of NativeConfig and AnalysisConfig diff --git a/paddle/fluid/inference/tests/api/analyzer_ernie_tester.cc b/paddle/fluid/inference/tests/api/analyzer_ernie_tester.cc index 8159633f514eac..bbb89bffac0c69 100644 --- a/paddle/fluid/inference/tests/api/analyzer_ernie_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_ernie_tester.cc @@ -74,13 +74,6 @@ TEST(Analyzer_Ernie, fuse_statis) { auto predictor = CreatePaddlePredictor(cfg); auto fuse_statis = GetFuseStatis( static_cast(predictor.get()), &num_ops); - ASSERT_TRUE(fuse_statis.count("fc_fuse")); - LOG(INFO) << "num_ops: " << num_ops; - if (FLAGS_ernie_large) { - ASSERT_EQ(fuse_statis.at("fc_fuse"), 146); - } else { - ASSERT_EQ(fuse_statis.at("fc_fuse"), 74); - } } // Compare result of NativeConfig and AnalysisConfig diff --git a/paddle/fluid/inference/tests/api/analyzer_lac_tester.cc b/paddle/fluid/inference/tests/api/analyzer_lac_tester.cc index 1bbd4d14173c34..4edf3c8c198241 100644 --- a/paddle/fluid/inference/tests/api/analyzer_lac_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_lac_tester.cc @@ -174,10 +174,6 @@ TEST(Analyzer_LAC, fuse_statis) { auto predictor = CreatePaddlePredictor(cfg); auto fuse_statis = GetFuseStatis( static_cast(predictor.get()), &num_ops); - ASSERT_TRUE(fuse_statis.count("fc_fuse")); - ASSERT_TRUE(fuse_statis.count("fc_gru_fuse")); - EXPECT_EQ(fuse_statis.at("fc_fuse"), 1); - EXPECT_EQ(fuse_statis.at("fc_gru_fuse"), 4); } // Compare result of NativeConfig and AnalysisConfig diff --git a/paddle/fluid/inference/tests/api/analyzer_ner_tester.cc b/paddle/fluid/inference/tests/api/analyzer_ner_tester.cc index 191b883a5d5d08..090657f83462c7 100644 --- a/paddle/fluid/inference/tests/api/analyzer_ner_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_ner_tester.cc @@ -157,10 +157,6 @@ TEST(Analyzer_Chinese_ner, fuse_statis) { auto predictor = CreatePaddlePredictor(cfg); auto fuse_statis = GetFuseStatis( static_cast(predictor.get()), &num_ops); - ASSERT_TRUE(fuse_statis.count("fc_fuse")); - ASSERT_TRUE(fuse_statis.count("fc_gru_fuse")); - EXPECT_EQ(fuse_statis.at("fc_fuse"), 1); - EXPECT_EQ(fuse_statis.at("fc_gru_fuse"), 2); } // Compare result of NativeConfig and AnalysisConfig diff --git a/paddle/fluid/inference/tests/api/analyzer_seq_conv1_tester.cc b/paddle/fluid/inference/tests/api/analyzer_seq_conv1_tester.cc index f5a583343ac54c..761086be2e5769 100644 --- a/paddle/fluid/inference/tests/api/analyzer_seq_conv1_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_seq_conv1_tester.cc @@ -157,10 +157,6 @@ TEST(Analyzer_seq_conv1, fuse_statis) { auto predictor = CreatePaddlePredictor(cfg); auto fuse_statis = GetFuseStatis(predictor.get(), &num_ops); - ASSERT_TRUE(fuse_statis.count("fc_fuse")); - ASSERT_TRUE(fuse_statis.count("seqconv_eltadd_relu_fuse")); - EXPECT_EQ(fuse_statis.at("fc_fuse"), 2); - EXPECT_EQ(fuse_statis.at("seqconv_eltadd_relu_fuse"), 6); } // Compare result of NativeConfig and AnalysisConfig From 9e10cfae948cc6c2851dd471212d027f9aceab48 Mon Sep 17 00:00:00 2001 From: xiaoxiaoehehe001 Date: Fri, 16 Dec 2022 07:18:34 +0000 Subject: [PATCH 16/19] argument_ --- .../inference/tests/api/analyzer_vit_ocr_tester.cc | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/paddle/fluid/inference/tests/api/analyzer_vit_ocr_tester.cc b/paddle/fluid/inference/tests/api/analyzer_vit_ocr_tester.cc index e726604c614dc5..44a9ea6de91cb4 100644 --- a/paddle/fluid/inference/tests/api/analyzer_vit_ocr_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_vit_ocr_tester.cc @@ -92,6 +92,18 @@ TEST(Analyzer_vit_ocr, compare) { compare(); } TEST(Analyzer_vit_ocr, compare_mkldnn) { compare(true /* use_mkldnn */); } #endif +#ifdef PADDLE_WITH_MKLDNN +// Check the fuse status +TEST(Analyzer_vit_ocr, fuse_status) { + AnalysisConfig cfg; + SetConfig(&cfg, true); + int num_ops; + auto predictor = CreatePaddlePredictor(cfg); + auto fuse_statis = GetFuseStatis( + static_cast(predictor.get()), &num_ops); +} +#endif + } // namespace analysis } // namespace inference } // namespace paddle From 89030cc0881529e82d1e95ba6540dfa3b3b03ec0 Mon Sep 17 00:00:00 2001 From: xiaoxiaoehehe001 Date: Fri, 16 Dec 2022 08:35:45 +0000 Subject: [PATCH 17/19] argument_ --- paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc | 4 ---- .../tests/api/analyzer_seq_pool1_fuse_statis_tester.cc | 8 -------- 2 files changed, 12 deletions(-) diff --git a/paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc b/paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc index 2317d02331fadd..65ffa82115585d 100644 --- a/paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc @@ -253,10 +253,6 @@ TEST(Analyzer_rnn1, fuse_statis) { auto predictor = CreatePaddlePredictor(cfg); auto fuse_statis = GetFuseStatis( static_cast(predictor.get()), &num_ops); - ASSERT_TRUE(fuse_statis.count("fc_fuse")); - EXPECT_EQ(fuse_statis.at("fc_fuse"), 1); - EXPECT_EQ(fuse_statis.at("fc_nobias_lstm_fuse"), 2); // bi-directional LSTM - EXPECT_EQ(fuse_statis.at("seq_concat_fc_fuse"), 1); } // Compare result of NativeConfig and AnalysisConfig diff --git a/paddle/fluid/inference/tests/api/analyzer_seq_pool1_fuse_statis_tester.cc b/paddle/fluid/inference/tests/api/analyzer_seq_pool1_fuse_statis_tester.cc index e72d8484eed0f7..c2b2baf241088f 100644 --- a/paddle/fluid/inference/tests/api/analyzer_seq_pool1_fuse_statis_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_seq_pool1_fuse_statis_tester.cc @@ -31,14 +31,6 @@ TEST(Analyzer_seq_pool1_fuse_statis, fuse_statis) { int num_ops; auto predictor = CreatePaddlePredictor(cfg); auto fuse_statis = GetFuseStatis(predictor.get(), &num_ops); - ASSERT_TRUE(fuse_statis.count("fc_fuse")); - ASSERT_TRUE(fuse_statis.count("seqpool_concat_fuse")); - ASSERT_TRUE(fuse_statis.count("squared_mat_sub_fuse")); - ASSERT_TRUE(fuse_statis.count("repeated_fc_relu_fuse")); - ASSERT_EQ(fuse_statis.at("fc_fuse"), 10); - EXPECT_EQ(fuse_statis.at("seqpool_concat_fuse"), 2); - EXPECT_EQ(fuse_statis.at("squared_mat_sub_fuse"), 0); - EXPECT_EQ(fuse_statis.at("repeated_fc_relu_fuse"), 2); LOG(INFO) << "num_ops: " << num_ops; } From 44363734e829c12c66768241820181881ce87646 Mon Sep 17 00:00:00 2001 From: xiaoxiaoehehe001 Date: Fri, 16 Dec 2022 16:57:06 +0000 Subject: [PATCH 18/19] argument_ --- paddle/fluid/inference/api/analysis_predictor.cc | 2 +- paddle/fluid/inference/tests/api/trt_split_converter_test.cc | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index 7ee932ab0c24b8..96fee3ca94193e 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -1364,7 +1364,7 @@ void AnalysisPredictor::OptimizeInferenceProgram() { #if defined(_WIN32) argument_->PartiallyRelease(); #else - if (config_.mkldnn_enabled()) { + if (config_.mkldnn_enabled() || config_.tensorrt_engine_enabled()) { argument_->PartiallyRelease(); } else { fusion_statis_ = *argument_->fusion_statis_ptr(); diff --git a/paddle/fluid/inference/tests/api/trt_split_converter_test.cc b/paddle/fluid/inference/tests/api/trt_split_converter_test.cc index 4a9c454272dfb3..19c6103bf7aece 100644 --- a/paddle/fluid/inference/tests/api/trt_split_converter_test.cc +++ b/paddle/fluid/inference/tests/api/trt_split_converter_test.cc @@ -32,7 +32,7 @@ TEST(TensorRT, split_converter) { config.SetModel(model_dir); config.SwitchUseFeedFetchOps(false); config.EnableTensorRtEngine( - 1 << 20, batch_size, 1, AnalysisConfig::Precision::kHalf, false, false); + 1 << 20, batch_size, 1, AnalysisConfig::Precision::kInt8, false, true); auto predictor = CreatePaddlePredictor(config); From 2115f53d74f5fa4fd58bf2c6378ae765c2bd0706 Mon Sep 17 00:00:00 2001 From: xiaoxiaoehehe001 Date: Sat, 17 Dec 2022 03:52:31 +0000 Subject: [PATCH 19/19] argument_ --- paddle/fluid/inference/api/analysis_predictor.cc | 2 +- .../fluid/inference/tests/api/analyzer_dam_tester.cc | 1 + .../fluid/inference/tests/api/analyzer_ernie_tester.cc | 7 +++++++ .../fluid/inference/tests/api/analyzer_lac_tester.cc | 4 ++++ .../fluid/inference/tests/api/analyzer_ner_tester.cc | 4 ++++ .../fluid/inference/tests/api/analyzer_rnn1_tester.cc | 4 ++++ .../inference/tests/api/analyzer_save_model_tester.cc | 10 ++++++++++ .../inference/tests/api/analyzer_seq_conv1_tester.cc | 4 ++++ .../tests/api/analyzer_seq_pool1_fuse_statis_tester.cc | 8 ++++++++ .../inference/tests/api/analyzer_vit_ocr_tester.cc | 4 ++++ 10 files changed, 47 insertions(+), 1 deletion(-) diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index 96fee3ca94193e..8461224ec52176 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -1361,13 +1361,13 @@ void AnalysisPredictor::OptimizeInferenceProgram() { // The config and argument take a lot of storage, // when the predictor settings are complete, we release these stores. config_.PartiallyRelease(); + fusion_statis_ = *argument_->fusion_statis_ptr(); #if defined(_WIN32) argument_->PartiallyRelease(); #else if (config_.mkldnn_enabled() || config_.tensorrt_engine_enabled()) { argument_->PartiallyRelease(); } else { - fusion_statis_ = *argument_->fusion_statis_ptr(); argument_.reset(nullptr); } #endif diff --git a/paddle/fluid/inference/tests/api/analyzer_dam_tester.cc b/paddle/fluid/inference/tests/api/analyzer_dam_tester.cc index c9bcbd654551a8..36a2dfcb715720 100644 --- a/paddle/fluid/inference/tests/api/analyzer_dam_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_dam_tester.cc @@ -261,6 +261,7 @@ TEST(Analyzer_dam, fuse_statis) { auto predictor = CreatePaddlePredictor(cfg); auto fuse_statis = GetFuseStatis( static_cast(predictor.get()), &num_ops); + ASSERT_TRUE(fuse_statis.count("fc_fuse")); } // Compare result of NativeConfig and AnalysisConfig diff --git a/paddle/fluid/inference/tests/api/analyzer_ernie_tester.cc b/paddle/fluid/inference/tests/api/analyzer_ernie_tester.cc index bbb89bffac0c69..8159633f514eac 100644 --- a/paddle/fluid/inference/tests/api/analyzer_ernie_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_ernie_tester.cc @@ -74,6 +74,13 @@ TEST(Analyzer_Ernie, fuse_statis) { auto predictor = CreatePaddlePredictor(cfg); auto fuse_statis = GetFuseStatis( static_cast(predictor.get()), &num_ops); + ASSERT_TRUE(fuse_statis.count("fc_fuse")); + LOG(INFO) << "num_ops: " << num_ops; + if (FLAGS_ernie_large) { + ASSERT_EQ(fuse_statis.at("fc_fuse"), 146); + } else { + ASSERT_EQ(fuse_statis.at("fc_fuse"), 74); + } } // Compare result of NativeConfig and AnalysisConfig diff --git a/paddle/fluid/inference/tests/api/analyzer_lac_tester.cc b/paddle/fluid/inference/tests/api/analyzer_lac_tester.cc index 4edf3c8c198241..1bbd4d14173c34 100644 --- a/paddle/fluid/inference/tests/api/analyzer_lac_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_lac_tester.cc @@ -174,6 +174,10 @@ TEST(Analyzer_LAC, fuse_statis) { auto predictor = CreatePaddlePredictor(cfg); auto fuse_statis = GetFuseStatis( static_cast(predictor.get()), &num_ops); + ASSERT_TRUE(fuse_statis.count("fc_fuse")); + ASSERT_TRUE(fuse_statis.count("fc_gru_fuse")); + EXPECT_EQ(fuse_statis.at("fc_fuse"), 1); + EXPECT_EQ(fuse_statis.at("fc_gru_fuse"), 4); } // Compare result of NativeConfig and AnalysisConfig diff --git a/paddle/fluid/inference/tests/api/analyzer_ner_tester.cc b/paddle/fluid/inference/tests/api/analyzer_ner_tester.cc index 090657f83462c7..191b883a5d5d08 100644 --- a/paddle/fluid/inference/tests/api/analyzer_ner_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_ner_tester.cc @@ -157,6 +157,10 @@ TEST(Analyzer_Chinese_ner, fuse_statis) { auto predictor = CreatePaddlePredictor(cfg); auto fuse_statis = GetFuseStatis( static_cast(predictor.get()), &num_ops); + ASSERT_TRUE(fuse_statis.count("fc_fuse")); + ASSERT_TRUE(fuse_statis.count("fc_gru_fuse")); + EXPECT_EQ(fuse_statis.at("fc_fuse"), 1); + EXPECT_EQ(fuse_statis.at("fc_gru_fuse"), 2); } // Compare result of NativeConfig and AnalysisConfig diff --git a/paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc b/paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc index 65ffa82115585d..2317d02331fadd 100644 --- a/paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc @@ -253,6 +253,10 @@ TEST(Analyzer_rnn1, fuse_statis) { auto predictor = CreatePaddlePredictor(cfg); auto fuse_statis = GetFuseStatis( static_cast(predictor.get()), &num_ops); + ASSERT_TRUE(fuse_statis.count("fc_fuse")); + EXPECT_EQ(fuse_statis.at("fc_fuse"), 1); + EXPECT_EQ(fuse_statis.at("fc_nobias_lstm_fuse"), 2); // bi-directional LSTM + EXPECT_EQ(fuse_statis.at("seq_concat_fc_fuse"), 1); } // Compare result of NativeConfig and AnalysisConfig diff --git a/paddle/fluid/inference/tests/api/analyzer_save_model_tester.cc b/paddle/fluid/inference/tests/api/analyzer_save_model_tester.cc index 915e8ca5e7445f..e0310d3bf1a858 100644 --- a/paddle/fluid/inference/tests/api/analyzer_save_model_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_save_model_tester.cc @@ -24,6 +24,13 @@ void SetConfig(AnalysisConfig *cfg) { cfg->SwitchIrDebug(); } +int GetNumOps(const AnalysisConfig &cfg) { + int num_ops; + auto predictor = CreatePaddlePredictor(cfg); + GetFuseStatis(static_cast(predictor.get()), &num_ops); + return num_ops; +} + /* * this model is unreasonable, it set a output tensor persistable, so * ridiculous! so I disable constant_folding_pass @@ -47,12 +54,15 @@ TEST(Analyzer, save_model) { SetConfig(&cfg2); cfg2.pass_builder()->ClearPasses(); cfg2.SetModel(optimModelPath + "/model", optimModelPath + "/params"); + int origin_num_ops = GetNumOps(cfg2); AnalysisConfig cfg3; SetConfig(&cfg3); auto pass_builder3 = cfg3.pass_builder(); pass_builder3->DeletePass("constant_folding_pass"); cfg3.SetModel(optimModelPath + "/model", optimModelPath + "/params"); + int fused_num_ops = GetNumOps(cfg3); + CHECK_LE(fused_num_ops, origin_num_ops); } } // namespace inference diff --git a/paddle/fluid/inference/tests/api/analyzer_seq_conv1_tester.cc b/paddle/fluid/inference/tests/api/analyzer_seq_conv1_tester.cc index 761086be2e5769..f5a583343ac54c 100644 --- a/paddle/fluid/inference/tests/api/analyzer_seq_conv1_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_seq_conv1_tester.cc @@ -157,6 +157,10 @@ TEST(Analyzer_seq_conv1, fuse_statis) { auto predictor = CreatePaddlePredictor(cfg); auto fuse_statis = GetFuseStatis(predictor.get(), &num_ops); + ASSERT_TRUE(fuse_statis.count("fc_fuse")); + ASSERT_TRUE(fuse_statis.count("seqconv_eltadd_relu_fuse")); + EXPECT_EQ(fuse_statis.at("fc_fuse"), 2); + EXPECT_EQ(fuse_statis.at("seqconv_eltadd_relu_fuse"), 6); } // Compare result of NativeConfig and AnalysisConfig diff --git a/paddle/fluid/inference/tests/api/analyzer_seq_pool1_fuse_statis_tester.cc b/paddle/fluid/inference/tests/api/analyzer_seq_pool1_fuse_statis_tester.cc index c2b2baf241088f..e72d8484eed0f7 100644 --- a/paddle/fluid/inference/tests/api/analyzer_seq_pool1_fuse_statis_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_seq_pool1_fuse_statis_tester.cc @@ -31,6 +31,14 @@ TEST(Analyzer_seq_pool1_fuse_statis, fuse_statis) { int num_ops; auto predictor = CreatePaddlePredictor(cfg); auto fuse_statis = GetFuseStatis(predictor.get(), &num_ops); + ASSERT_TRUE(fuse_statis.count("fc_fuse")); + ASSERT_TRUE(fuse_statis.count("seqpool_concat_fuse")); + ASSERT_TRUE(fuse_statis.count("squared_mat_sub_fuse")); + ASSERT_TRUE(fuse_statis.count("repeated_fc_relu_fuse")); + ASSERT_EQ(fuse_statis.at("fc_fuse"), 10); + EXPECT_EQ(fuse_statis.at("seqpool_concat_fuse"), 2); + EXPECT_EQ(fuse_statis.at("squared_mat_sub_fuse"), 0); + EXPECT_EQ(fuse_statis.at("repeated_fc_relu_fuse"), 2); LOG(INFO) << "num_ops: " << num_ops; } diff --git a/paddle/fluid/inference/tests/api/analyzer_vit_ocr_tester.cc b/paddle/fluid/inference/tests/api/analyzer_vit_ocr_tester.cc index 44a9ea6de91cb4..8180d951050ce3 100644 --- a/paddle/fluid/inference/tests/api/analyzer_vit_ocr_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_vit_ocr_tester.cc @@ -101,6 +101,10 @@ TEST(Analyzer_vit_ocr, fuse_status) { auto predictor = CreatePaddlePredictor(cfg); auto fuse_statis = GetFuseStatis( static_cast(predictor.get()), &num_ops); + + CHECK_EQ(fuse_statis.at("fc_mkldnn_pass"), 33); + CHECK_EQ(fuse_statis.at("fused_conv2d_gelu_mkldnn_fuse_pass"), 2); + CHECK_EQ(fuse_statis.at("fc_elementwise_add_mkldnn_fuse"), 16); } #endif