diff --git a/ppdiffusers/deploy/README.md b/ppdiffusers/deploy/README.md index d7ae3c81e65b..c63a45b6156f 100644 --- a/ppdiffusers/deploy/README.md +++ b/ppdiffusers/deploy/README.md @@ -42,7 +42,8 @@ python text_to_img_infer.py --model_dir stable-diffusion-v1-5/ --scheduler "eule |----------|--------------| | --model_dir | 导出后模型的目录。 | | --model_format | 模型格式。默认为`'paddle'`,可选列表:`['paddle', 'onnx']`。 | -| --backend | 推理引擎后端。默认为`paddle`,可选列表:`['onnx_runtime', 'paddle']`,当模型格式为`onnx`时,可选列表为`['onnx_runtime']`。 | +| --backend | 推理引擎后端。默认为`paddle`,可选列表:`['onnx_runtime', 'paddle', 'paddlelite']`,当模型格式为`onnx`时,可选列表为`['onnx_runtime']`。 | +| --device | 运行设备。默认为`gpu`,可选列表:`['cpu', 'gpu', 'huawei_ascend_npu', 'kunlunxin_xpu']`。 | | --scheduler | StableDiffusion 模型的scheduler。默认为`'pndm'`。可选列表:`['pndm', 'euler_ancestral']`,StableDiffusio模型对应的scheduler可参考[ppdiffuser模型列表](https://github.com/PaddlePaddle/PaddleNLP/tree/main/ppdiffusers#ppdiffusers%E6%A8%A1%E5%9E%8B%E6%94%AF%E6%8C%81%E7%9A%84%E6%9D%83%E9%87%8D)。| | --unet_model_prefix | UNet模型前缀。默认为`unet`。 | | --vae_model_prefix | VAE模型前缀。默认为`vae_decoder`。 | diff --git a/ppdiffusers/deploy/img_to_img_infer.py b/ppdiffusers/deploy/img_to_img_infer.py index e1b1704d0a3c..ec87f94d73f4 100644 --- a/ppdiffusers/deploy/img_to_img_infer.py +++ b/ppdiffusers/deploy/img_to_img_infer.py @@ -56,11 +56,21 @@ def parse_arguments(): type=str, default="paddle", # Note(zhoushunjie): Will support 'tensorrt', 'paddle-tensorrt' soon. + choices=["onnx_runtime", "paddle", "paddlelite"], + help="The inference runtime backend of unet model and text encoder model.", + ) + parser.add_argument( + "--device", + type=str, + default="gpu", + # Note(shentanyue): Will support more devices. choices=[ - "onnx_runtime", - "paddle", + "cpu", + "gpu", + "huawei_ascend_npu", + "kunlunxin_xpu", ], - help="The inference runtime backend of unet model and text encoder model.", + help="The inference runtime device of models.", ) parser.add_argument( "--image_path", default="fd_astronaut_rides_horse.png", help="The model directory of diffusion_model." @@ -123,6 +133,25 @@ def create_paddle_inference_runtime( return fd.Runtime(option) +def create_paddle_lite_runtime(model_dir, model_prefix, device="cpu", device_id=0): + option = fd.RuntimeOption() + option.use_lite_backend() + if device == "huawei_ascend_npu": + option.use_cann() + option.set_lite_nnadapter_device_names(["huawei_ascend_npu"]) + option.set_lite_nnadapter_model_cache_dir(os.path.join(model_dir, model_prefix)) + option.set_lite_nnadapter_context_properties("HUAWEI_ASCEND_NPU_SELECTED_DEVICE_IDS={}".format(device_id)) + elif device == "kunlunxin_xpu": + # TODO(shentanyue): Add kunlunxin_xpu code + pass + else: + pass + model_file = os.path.join(model_dir, model_prefix, "inference.pdmodel") + params_file = os.path.join(model_dir, model_prefix, "inference.pdiparams") + option.set_model_path(model_file, params_file) + return fd.Runtime(option) + + def create_trt_runtime(model_dir, model_prefix, model_format, workspace=(1 << 31), dynamic_shape=None, device_id=0): option = fd.RuntimeOption() option.use_trt_backend() @@ -210,26 +239,29 @@ def get_scheduler(args): } # 4. Init runtime + device_id = args.device_id + if args.device == "cpu": + device_id = -1 if args.backend == "onnx_runtime": text_encoder_runtime = create_ort_runtime( - args.model_dir, args.text_encoder_model_prefix, args.model_format, device_id=args.device_id + args.model_dir, args.text_encoder_model_prefix, args.model_format, device_id=device_id ) vae_decoder_runtime = create_ort_runtime( - args.model_dir, args.vae_decoder_model_prefix, args.model_format, device_id=args.device_id + args.model_dir, args.vae_decoder_model_prefix, args.model_format, device_id=device_id ) vae_encoder_runtime = create_ort_runtime( - args.model_dir, args.vae_encoder_model_prefix, args.model_format, device_id=args.device_id + args.model_dir, args.vae_encoder_model_prefix, args.model_format, device_id=device_id ) start = time.time() unet_runtime = create_ort_runtime( - args.model_dir, args.unet_model_prefix, args.model_format, device_id=args.device_id + args.model_dir, args.unet_model_prefix, args.model_format, device_id=device_id ) print(f"Spend {time.time() - start : .2f} s to load unet model.") elif args.backend == "paddle" or args.backend == "paddle-tensorrt": use_trt = True if args.backend == "paddle-tensorrt" else False # Note(zhoushunjie): Will change to paddle runtime later text_encoder_runtime = create_ort_runtime( - args.model_dir, args.text_encoder_model_prefix, args.model_format, device_id=args.device_id + args.model_dir, args.text_encoder_model_prefix, args.model_format, device_id=device_id ) vae_decoder_runtime = create_paddle_inference_runtime( args.model_dir, @@ -237,7 +269,7 @@ def get_scheduler(args): use_trt, vae_decoder_dynamic_shape, use_fp16=args.use_fp16, - device_id=args.device_id, + device_id=device_id, ) vae_encoder_runtime = create_paddle_inference_runtime( args.model_dir, @@ -245,7 +277,7 @@ def get_scheduler(args): use_trt, vae_encoder_dynamic_shape, use_fp16=args.use_fp16, - device_id=args.device_id, + device_id=device_id, ) start = time.time() unet_runtime = create_paddle_inference_runtime( @@ -254,7 +286,7 @@ def get_scheduler(args): use_trt, unet_dynamic_shape, use_fp16=args.use_fp16, - device_id=args.device_id, + device_id=device_id, ) print(f"Spend {time.time() - start : .2f} s to load unet model.") elif args.backend == "tensorrt": @@ -265,7 +297,7 @@ def get_scheduler(args): args.model_format, workspace=(1 << 30), dynamic_shape=vae_decoder_dynamic_shape, - device_id=args.device_id, + device_id=device_id, ) vae_encoder_runtime = create_trt_runtime( args.model_dir, @@ -273,7 +305,7 @@ def get_scheduler(args): args.model_format, workspace=(1 << 30), dynamic_shape=vae_encoder_dynamic_shape, - device_id=args.device_id, + device_id=device_id, ) start = time.time() unet_runtime = create_trt_runtime( @@ -281,7 +313,22 @@ def get_scheduler(args): args.unet_model_prefix, args.model_format, dynamic_shape=unet_dynamic_shape, - device_id=args.device_id, + device_id=device_id, + ) + print(f"Spend {time.time() - start : .2f} s to load unet model.") + elif args.backend == "paddlelite": + text_encoder_runtime = create_paddle_lite_runtime( + args.model_dir, args.text_encoder_model_prefix, device=args.device, device_id=device_id + ) + vae_decoder_runtime = create_paddle_lite_runtime( + args.model_dir, args.vae_decoder_model_prefix, device=args.device, device_id=device_id + ) + vae_encoder_runtime = create_paddle_lite_runtime( + args.model_dir, args.vae_encoder_model_prefix, device=args.device, device_id=device_id + ) + start = time.time() + unet_runtime = create_paddle_lite_runtime( + args.model_dir, args.unet_model_prefix, device=args.device, device_id=device_id ) print(f"Spend {time.time() - start : .2f} s to load unet model.") diff --git a/ppdiffusers/deploy/inpaint_legacy_infer.py b/ppdiffusers/deploy/inpaint_legacy_infer.py index 0c30ea021f28..530cff0fedae 100644 --- a/ppdiffusers/deploy/inpaint_legacy_infer.py +++ b/ppdiffusers/deploy/inpaint_legacy_infer.py @@ -56,11 +56,21 @@ def parse_arguments(): type=str, default="paddle", # Note(zhoushunjie): Will support 'tensorrt', 'paddle-tensorrt' soon. + choices=["onnx_runtime", "paddle", "paddlelite"], + help="The inference runtime backend of unet model and text encoder model.", + ) + parser.add_argument( + "--device", + type=str, + default="gpu", + # Note(shentanyue): Will support more devices. choices=[ - "onnx_runtime", - "paddle", + "cpu", + "gpu", + "huawei_ascend_npu", + "kunlunxin_xpu", ], - help="The inference runtime backend of unet model and text encoder model.", + help="The inference runtime device of models.", ) parser.add_argument( "--image_path", default="fd_astronaut_rides_horse.png", help="The model directory of diffusion_model." @@ -123,6 +133,25 @@ def create_paddle_inference_runtime( return fd.Runtime(option) +def create_paddle_lite_runtime(model_dir, model_prefix, device="cpu", device_id=0): + option = fd.RuntimeOption() + option.use_lite_backend() + if device == "huawei_ascend_npu": + option.use_cann() + option.set_lite_nnadapter_device_names(["huawei_ascend_npu"]) + option.set_lite_nnadapter_model_cache_dir(os.path.join(model_dir, model_prefix)) + option.set_lite_nnadapter_context_properties("HUAWEI_ASCEND_NPU_SELECTED_DEVICE_IDS={}".format(device_id)) + elif device == "kunlunxin_xpu": + # TODO(shentanyue): Add kunlunxin_xpu code + pass + else: + pass + model_file = os.path.join(model_dir, model_prefix, "inference.pdmodel") + params_file = os.path.join(model_dir, model_prefix, "inference.pdiparams") + option.set_model_path(model_file, params_file) + return fd.Runtime(option) + + def create_trt_runtime(model_dir, model_prefix, model_format, workspace=(1 << 31), dynamic_shape=None, device_id=0): option = fd.RuntimeOption() option.use_trt_backend() @@ -209,26 +238,29 @@ def get_scheduler(args): } # 4. Init runtime + device_id = args.device_id + if args.device == "cpu": + device_id = -1 if args.backend == "onnx_runtime": text_encoder_runtime = create_ort_runtime( - args.model_dir, args.text_encoder_model_prefix, args.model_format, device_id=args.device_id + args.model_dir, args.text_encoder_model_prefix, args.model_format, device_id=device_id ) vae_decoder_runtime = create_ort_runtime( - args.model_dir, args.vae_decoder_model_prefix, args.model_format, device_id=args.device_id + args.model_dir, args.vae_decoder_model_prefix, args.model_format, device_id=device_id ) vae_encoder_runtime = create_ort_runtime( - args.model_dir, args.vae_encoder_model_prefix, args.model_format, device_id=args.device_id + args.model_dir, args.vae_encoder_model_prefix, args.model_format, device_id=device_id ) start = time.time() unet_runtime = create_ort_runtime( - args.model_dir, args.unet_model_prefix, args.model_format, device_id=args.device_id + args.model_dir, args.unet_model_prefix, args.model_format, device_id=device_id ) print(f"Spend {time.time() - start : .2f} s to load unet model.") elif args.backend == "paddle" or args.backend == "paddle-tensorrt": use_trt = True if args.backend == "paddle-tensorrt" else False # Note(zhoushunjie): Will change to paddle runtime later text_encoder_runtime = create_ort_runtime( - args.model_dir, args.text_encoder_model_prefix, args.model_format, device_id=args.device_id + args.model_dir, args.text_encoder_model_prefix, args.model_format, device_id=device_id ) vae_decoder_runtime = create_paddle_inference_runtime( args.model_dir, @@ -236,7 +268,7 @@ def get_scheduler(args): use_trt, vae_decoder_dynamic_shape, use_fp16=args.use_fp16, - device_id=args.device_id, + device_id=device_id, ) vae_encoder_runtime = create_paddle_inference_runtime( args.model_dir, @@ -244,7 +276,7 @@ def get_scheduler(args): use_trt, vae_encoder_dynamic_shape, use_fp16=args.use_fp16, - device_id=args.device_id, + device_id=device_id, ) start = time.time() unet_runtime = create_paddle_inference_runtime( @@ -253,7 +285,7 @@ def get_scheduler(args): use_trt, unet_dynamic_shape, use_fp16=args.use_fp16, - device_id=args.device_id, + device_id=device_id, ) print(f"Spend {time.time() - start : .2f} s to load unet model.") elif args.backend == "tensorrt": @@ -264,7 +296,7 @@ def get_scheduler(args): args.model_format, workspace=(1 << 30), dynamic_shape=vae_decoder_dynamic_shape, - device_id=args.device_id, + device_id=device_id, ) vae_encoder_runtime = create_trt_runtime( args.model_dir, @@ -272,7 +304,7 @@ def get_scheduler(args): args.model_format, workspace=(1 << 30), dynamic_shape=vae_encoder_dynamic_shape, - device_id=args.device_id, + device_id=device_id, ) start = time.time() unet_runtime = create_trt_runtime( @@ -280,7 +312,22 @@ def get_scheduler(args): args.unet_model_prefix, args.model_format, dynamic_shape=unet_dynamic_shape, - device_id=args.device_id, + device_id=device_id, + ) + print(f"Spend {time.time() - start : .2f} s to load unet model.") + elif args.backend == "paddlelite": + text_encoder_runtime = create_paddle_lite_runtime( + args.model_dir, args.text_encoder_model_prefix, device=args.device, device_id=device_id + ) + vae_decoder_runtime = create_paddle_lite_runtime( + args.model_dir, args.vae_decoder_model_prefix, device=args.device, device_id=device_id + ) + vae_encoder_runtime = create_paddle_lite_runtime( + args.model_dir, args.vae_encoder_model_prefix, device=args.device, device_id=device_id + ) + start = time.time() + unet_runtime = create_paddle_lite_runtime( + args.model_dir, args.unet_model_prefix, device=args.device, device_id=device_id ) print(f"Spend {time.time() - start : .2f} s to load unet model.") diff --git a/ppdiffusers/deploy/text_to_img_infer.py b/ppdiffusers/deploy/text_to_img_infer.py index 7a2b431f6e53..1a3a1f4ee660 100644 --- a/ppdiffusers/deploy/text_to_img_infer.py +++ b/ppdiffusers/deploy/text_to_img_infer.py @@ -49,11 +49,21 @@ def parse_arguments(): type=str, default="paddle", # Note(zhoushunjie): Will support 'tensorrt', 'paddle-tensorrt' soon. + choices=["onnx_runtime", "paddle", "paddlelite"], + help="The inference runtime backend of unet model and text encoder model.", + ) + parser.add_argument( + "--device", + type=str, + default="cpu", + # Note(shentanyue): Will support more devices. choices=[ - "onnx_runtime", - "paddle", + "cpu", + "gpu", + "huawei_ascend_npu", + "kunlunxin_xpu", ], - help="The inference runtime backend of unet model and text encoder model.", + help="The inference runtime device of models.", ) parser.add_argument( "--image_path", default="fd_astronaut_rides_horse.png", help="The model directory of diffusion_model." @@ -116,6 +126,25 @@ def create_paddle_inference_runtime( return fd.Runtime(option) +def create_paddle_lite_runtime(model_dir, model_prefix, device="cpu", device_id=0): + option = fd.RuntimeOption() + option.use_lite_backend() + if device == "huawei_ascend_npu": + option.use_cann() + option.set_lite_nnadapter_device_names(["huawei_ascend_npu"]) + option.set_lite_nnadapter_model_cache_dir(os.path.join(model_dir, model_prefix)) + option.set_lite_nnadapter_context_properties("HUAWEI_ASCEND_NPU_SELECTED_DEVICE_IDS={}".format(device_id)) + elif device == "kunlunxin_xpu": + # TODO(shentanyue): Add kunlunxin_xpu code + pass + else: + pass + model_file = os.path.join(model_dir, model_prefix, "inference.pdmodel") + params_file = os.path.join(model_dir, model_prefix, "inference.pdiparams") + option.set_model_path(model_file, params_file) + return fd.Runtime(option) + + def create_trt_runtime(model_dir, model_prefix, model_format, workspace=(1 << 31), dynamic_shape=None, device_id=0): option = fd.RuntimeOption() option.use_trt_backend() @@ -195,23 +224,26 @@ def get_scheduler(args): } # 4. Init runtime + device_id = args.device_id + if args.device == "cpu": + device_id = -1 if args.backend == "onnx_runtime": text_encoder_runtime = create_ort_runtime( - args.model_dir, args.text_encoder_model_prefix, args.model_format, device_id=args.device_id + args.model_dir, args.text_encoder_model_prefix, args.model_format, device_id=device_id ) vae_decoder_runtime = create_ort_runtime( - args.model_dir, args.vae_decoder_model_prefix, args.model_format, device_id=args.device_id + args.model_dir, args.vae_decoder_model_prefix, args.model_format, device_id=device_id ) start = time.time() unet_runtime = create_ort_runtime( - args.model_dir, args.unet_model_prefix, args.model_format, device_id=args.device_id + args.model_dir, args.unet_model_prefix, args.model_format, device_id=device_id ) print(f"Spend {time.time() - start : .2f} s to load unet model.") elif args.backend == "paddle" or args.backend == "paddle-tensorrt": use_trt = True if args.backend == "paddle-tensorrt" else False # Note(zhoushunjie): Will change to paddle runtime later text_encoder_runtime = create_ort_runtime( - args.model_dir, args.text_encoder_model_prefix, args.model_format, device_id=args.device_id + args.model_dir, args.text_encoder_model_prefix, args.model_format, device_id=device_id ) vae_decoder_runtime = create_paddle_inference_runtime( args.model_dir, @@ -219,7 +251,7 @@ def get_scheduler(args): use_trt, vae_dynamic_shape, use_fp16=args.use_fp16, - device_id=args.device_id, + device_id=device_id, ) start = time.time() unet_runtime = create_paddle_inference_runtime( @@ -228,7 +260,7 @@ def get_scheduler(args): use_trt, unet_dynamic_shape, use_fp16=args.use_fp16, - device_id=args.device_id, + device_id=device_id, ) print(f"Spend {time.time() - start : .2f} s to load unet model.") elif args.backend == "tensorrt": @@ -239,7 +271,7 @@ def get_scheduler(args): args.model_format, workspace=(1 << 30), dynamic_shape=vae_dynamic_shape, - device_id=args.device_id, + device_id=device_id, ) start = time.time() unet_runtime = create_trt_runtime( @@ -247,7 +279,19 @@ def get_scheduler(args): args.unet_model_prefix, args.model_format, dynamic_shape=unet_dynamic_shape, - device_id=args.device_id, + device_id=device_id, + ) + print(f"Spend {time.time() - start : .2f} s to load unet model.") + elif args.backend == "paddlelite": + text_encoder_runtime = create_paddle_lite_runtime( + args.model_dir, args.text_encoder_model_prefix, device=args.device, device_id=device_id + ) + vae_decoder_runtime = create_paddle_lite_runtime( + args.model_dir, args.vae_decoder_model_prefix, device=args.device, device_id=device_id + ) + start = time.time() + unet_runtime = create_paddle_lite_runtime( + args.model_dir, args.unet_model_prefix, device=args.device, device_id=device_id ) print(f"Spend {time.time() - start : .2f} s to load unet model.")