Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[StableDiffusion] Add trt backend for sd model #796

Merged
merged 5 commits into from
Dec 5, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions examples/multimodal/stable_diffusion/cpp/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# StableDiffusion C++部署示例

在部署前,需确认以下两个步骤

- 1. 软硬件环境满足要求,参考[FastDeploy环境要求](../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)
- 2. 根据开发环境,下载预编译部署库和samples代码,参考[FastDeploy预编译库](../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)

本目录下提供`*_infer.cc`快速完成StableDiffusion各任务的C++部署示例。

## Inpaint任务

StableDiffusion Inpaint任务是一个根据提示文本补全图片的任务,具体而言就是用户给定提示文本,原始图片以及原始图片的mask图片,该任务输出补全后的图片。
160 changes: 133 additions & 27 deletions examples/multimodal/stable_diffusion/cpp/main.cc
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,23 @@
// See the License for the specific language governing permissions and
// limitations under the License.

#include "dpm_solver_multistep_scheduler.h"
#include "fastdeploy/vision/common/processors/mat.h"
#include "./dpm_solver_multistep_scheduler.h"
#include "./pipeline_stable_diffusion_inpaint.h"
#include "fastdeploy/utils/perf.h"
#include "fastdeploy/vision/common/processors/mat.h"
#include "opencv2/highgui/highgui.hpp"
#include "opencv2/imgproc/imgproc.hpp"
#include "pipeline_stable_diffusion_inpaint.h"
#include <iostream>
#include <memory>
#include <sstream>
#include <string>
#include <unordered_map>

#ifdef WIN32
const char sep = '\\';
#else
const char sep = '/';
#endif

template <typename T> std::string Str(const T* value, int size) {
std::ostringstream oss;
Expand All @@ -33,17 +40,40 @@ template <typename T> std::string Str(const T* value, int size) {
return oss.str();
}

std::unique_ptr<fastdeploy::Runtime>
CreateRuntime(const std::string& model_file, const std::string& params_file,
bool use_paddle_backend = true) {
std::unique_ptr<fastdeploy::Runtime> CreateRuntime(
const std::string& model_file, const std::string& params_file,
bool use_trt_backend = false, bool use_fp16 = false,
const std::unordered_map<std::string, std::vector<std::vector<int>>>&
dynamic_shapes = {},
const std::vector<std::string>& disable_paddle_trt_ops = {}) {
fastdeploy::RuntimeOption runtime_option;
runtime_option.SetModelPath(model_file, params_file,
fastdeploy::ModelFormat::PADDLE);
runtime_option.UseGpu();
if (use_paddle_backend) {
if (!use_trt_backend) {
runtime_option.UsePaddleBackend();
} else {
runtime_option.UseOrtBackend();
runtime_option.UseTrtBackend();
runtime_option.EnablePaddleToTrt();
for (auto it = dynamic_shapes.begin(); it != dynamic_shapes.end(); ++it) {
if (it->second.size() != 3) {
std::cerr << "The size of dynamic_shapes of input `" << it->first
<< "` should be 3, but receive " << it->second.size()
<< std::endl;
continue;
}
std::vector<int> min_shape = (it->second)[0];
std::vector<int> opt_shape = (it->second)[1];
std::vector<int> max_shape = (it->second)[2];
runtime_option.SetTrtInputShape(it->first, min_shape, opt_shape,
max_shape);
}
runtime_option.SetTrtCacheFile("paddle.trt");
runtime_option.EnablePaddleTrtCollectShape();
runtime_option.DisablePaddleTrtOPs(disable_paddle_trt_ops);
if (use_fp16) {
runtime_option.EnableTrtFP16();
}
}
std::unique_ptr<fastdeploy::Runtime> runtime =
std::unique_ptr<fastdeploy::Runtime>(new fastdeploy::Runtime());
Expand All @@ -59,6 +89,20 @@ CreateRuntime(const std::string& model_file, const std::string& params_file,
}

int main() {
// 0. Init all configs
std::string model_dir = "sd15_inpaint";
int max_length = 77;
bool use_trt_backend = true;
bool use_fp16 = true;
int batch_size = 1;
int num_images_per_prompt = 1;
int num_inference_steps = 50;

int height = 512;
int width = 512;
constexpr int unet_inpaint_channels = 9;
constexpr int latents_channels = 4;

// 1. Init scheduler
std::unique_ptr<fastdeploy::Scheduler> dpm(
new fastdeploy::DPMSolverMultistepScheduler(
Expand All @@ -77,54 +121,116 @@ int main() {
/* lower_order_final = */ true));

// 2. Init text encoder runtime
std::string text_model_file = "sd15_inpaint/text_encoder/inference.pdmodel";
std::string text_params_file =
"sd15_inpaint/text_encoder/inference.pdiparams";
std::unordered_map<std::string, std::vector<std::vector<int>>>
text_dynamic_shape = {{"input_ids",
{/* min_shape */ {1, max_length},
/* opt_shape */ {batch_size, max_length},
/* max_shape */ {2 * batch_size, max_length}}}};
std::string text_model_dir = model_dir + sep + "text_encoder";
std::string text_model_file = text_model_dir + sep + "inference.pdmodel";
std::string text_params_file = text_model_dir + sep + "inference.pdiparams";
std::unique_ptr<fastdeploy::Runtime> text_encoder_runtime =
CreateRuntime(text_model_file, text_params_file, false);
CreateRuntime(text_model_file, text_params_file, use_trt_backend,
use_fp16, text_dynamic_shape);

// 3. Init vae encoder runtime
std::unordered_map<std::string, std::vector<std::vector<int>>>
vae_encoder_dynamic_shape = {
{"sample",
{/* min_shape */ {1, 3, height, width},
/* opt_shape */ {2 * batch_size, 3, height, width},
/* max_shape */ {2 * batch_size, 3, height, width}}}};
std::string vae_encoder_model_dir = model_dir + sep + "vae_encoder";
std::string vae_encoder_model_file =
"sd15_inpaint/vae_encoder/inference.pdmodel";
vae_encoder_model_dir + sep + "inference.pdmodel";
std::string vae_encoder_params_file =
"sd15_inpaint/vae_encoder/inference.pdiparams";
vae_encoder_model_dir + sep + "inference.pdiparams";
std::unique_ptr<fastdeploy::Runtime> vae_encoder_runtime =
CreateRuntime(vae_encoder_model_file, vae_encoder_params_file);
CreateRuntime(vae_encoder_model_file, vae_encoder_params_file,
use_trt_backend, use_fp16, vae_encoder_dynamic_shape);

// 4. Init vae decoder runtime
std::unordered_map<std::string, std::vector<std::vector<int>>>
vae_decoder_dynamic_shape = {
{"latent_sample",
{/* min_shape */ {1, latents_channels, height / 8, width / 8},
/* opt_shape */
{2 * batch_size, latents_channels, height / 8, width / 8},
/* max_shape */
{2 * batch_size, latents_channels, height / 8, width / 8}}}};
std::string vae_decoder_model_dir = model_dir + sep + "vae_decoder";
std::string vae_decoder_model_file =
"sd15_inpaint/vae_decoder/inference.pdmodel";
vae_decoder_model_dir + sep + "inference.pdmodel";
std::string vae_decoder_params_file =
"sd15_inpaint/vae_decoder/inference.pdiparams";
vae_decoder_model_dir + sep + "inference.pdiparams";
std::unique_ptr<fastdeploy::Runtime> vae_decoder_runtime =
CreateRuntime(vae_decoder_model_file, vae_decoder_params_file);
CreateRuntime(vae_decoder_model_file, vae_decoder_params_file,
use_trt_backend, use_fp16, vae_decoder_dynamic_shape);

// 5. Init unet runtime
std::string unet_model_file = "sd15_inpaint/unet/inference.pdmodel";
std::string unet_params_file = "sd15_inpaint/unet/inference.pdiparams";
std::unordered_map<std::string, std::vector<std::vector<int>>>
unet_dynamic_shape = {
{"sample",
{/* min_shape */ {1, unet_inpaint_channels, height / 8, width / 8},
/* opt_shape */
{2 * batch_size, unet_inpaint_channels, height / 8, width / 8},
/* max_shape */
{2 * batch_size, unet_inpaint_channels, height / 8, width / 8}}},
{"timesteps", {{1}, {1}, {1}}},
{"encoder_hidden_states",
{{1, max_length, 768},
{2 * batch_size, max_length, 768},
{2 * batch_size, max_length, 768}}}};
std::vector<std::string> unet_disable_paddle_trt_ops = {"sin", "cos"};
std::string unet_model_dir = model_dir + sep + "unet";
std::string unet_model_file = unet_model_dir + sep + "inference.pdmodel";
std::string unet_params_file = unet_model_dir + sep + "inference.pdiparams";
std::unique_ptr<fastdeploy::Runtime> unet_runtime =
CreateRuntime(unet_model_file, unet_params_file);
CreateRuntime(unet_model_file, unet_params_file, use_trt_backend,
use_fp16, unet_dynamic_shape, unet_disable_paddle_trt_ops);

// 6. Init fast tokenizer
paddlenlp::fast_tokenizer::tokenizers_impl::ClipFastTokenizer tokenizer(
"clip/vocab.json", "clip/merges.txt", /* max_length = */ 77);
"clip/vocab.json", "clip/merges.txt", /* max_length = */ max_length);
fastdeploy::StableDiffusionInpaintPipeline pipe(
std::move(vae_encoder_runtime), std::move(vae_decoder_runtime),
std::move(text_encoder_runtime), std::move(unet_runtime),
/* scheduler = */ std::move(dpm), tokenizer);
/* vae_encoder = */ std::move(vae_encoder_runtime),
/* vae_decoder = */ std::move(vae_decoder_runtime),
/* text_encoder = */ std::move(text_encoder_runtime),
/* unet = */ std::move(unet_runtime),
/* scheduler = */ std::move(dpm),
/* tokenizer = */ tokenizer);

// 7. Read images
auto image = cv::imread("overture-creations.png");
auto mask_image = cv::imread("overture-creations-mask.png");

// 8. Predict
/*
* One may need to pass the initial noise to predict api.
* There's an example:
* std::vector<float> latents_data = {xxxx};
* fastdeploy::FDTensor latents;
* latents.SetExternalData({batch_size * num_images_per_prompt, latents_channels, height / 8, width / 8},fastdeploy::FDDataType::FP32, latents_data.data());
* pipe.Predict(..., /* latents = *\/ &latents, ....);
*/
std::vector<std::string> prompts = {
"Face of a yellow cat, high resolution, sitting on a park bench"};
std::vector<fastdeploy::FDTensor> outputs;
fastdeploy::TimeCounter tc;
tc.Start();
pipe.Predict(prompts, image, mask_image, &outputs, /* height = */ 512,
/* width = */ 512, /* num_inference_steps = */ 50);
pipe.Predict(prompts, image, mask_image, &outputs,
/* height = */ height,
/* width = */ width,
/* num_inference_steps = */ num_inference_steps,
/* guidance_scale = */ 7.5,
/* negative_prompt = */ {},
/* num_images_per_prompt = */ num_images_per_prompt,
/* eta = */ 1.0,
/* max_length = */ max_length,
/* latents = */ nullptr,
/* output_cv_mat = */ true,
/* callback = */ nullptr,
/* callback_steps = */ 1);
tc.End();
tc.PrintInfo();
fastdeploy::vision::FDMat mat = fastdeploy::vision::FDMat::Create(outputs[0]);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,8 @@ void StableDiffusionInpaintPipeline::PrepareMaskAndMaskedImage(
float_mask[i] = 1;
}
}
image_mask.SetExternalData({1, 1, shape[1] * 8, shape[0] * 8},
// NCHW format
image_mask.SetExternalData({1, 1, shape[0] * 8, shape[1] * 8},
FDDataType::FP32, float_mask.data());

// Set mask_image
Expand Down Expand Up @@ -314,9 +315,6 @@ void StableDiffusionInpaintPipeline::Predict(
vision::FDMat mask_fdmat_t = vision::FDMat::Create((*output_images)[i]);
vision::RGB2BGR::Run(&mask_fdmat_t, vision::ProcLib::OPENCV);
mask_fdmat_t.CopyToTensor(&(*output_images)[i]);
FDTensor sum;
function::Sum((*output_images)[i], &sum, {}, false, true);
FDINFO << "sum = " << ((float*)sum.Data())[0] << std::endl;
}
}
} // namespace fastdeploy