Skip to content

Commit

Permalink
add fastdeploy+lite more devices support (#4042)
Browse files Browse the repository at this point in the history
  • Loading branch information
shentanyue authored Dec 9, 2022
1 parent 383210a commit ba9f280
Show file tree
Hide file tree
Showing 4 changed files with 179 additions and 40 deletions.
3 changes: 2 additions & 1 deletion ppdiffusers/deploy/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,8 @@ python text_to_img_infer.py --model_dir stable-diffusion-v1-5/ --scheduler "eule
|----------|--------------|
| --model_dir | 导出后模型的目录。 |
| --model_format | 模型格式。默认为`'paddle'`,可选列表:`['paddle', 'onnx']`|
| --backend | 推理引擎后端。默认为`paddle`,可选列表:`['onnx_runtime', 'paddle']`,当模型格式为`onnx`时,可选列表为`['onnx_runtime']`|
| --backend | 推理引擎后端。默认为`paddle`,可选列表:`['onnx_runtime', 'paddle', 'paddlelite']`,当模型格式为`onnx`时,可选列表为`['onnx_runtime']`|
| --device | 运行设备。默认为`gpu`,可选列表:`['cpu', 'gpu', 'huawei_ascend_npu', 'kunlunxin_xpu']`|
| --scheduler | StableDiffusion 模型的scheduler。默认为`'pndm'`。可选列表:`['pndm', 'euler_ancestral']`,StableDiffusio模型对应的scheduler可参考[ppdiffuser模型列表](https://github.com/PaddlePaddle/PaddleNLP/tree/main/ppdiffusers#ppdiffusers%E6%A8%A1%E5%9E%8B%E6%94%AF%E6%8C%81%E7%9A%84%E6%9D%83%E9%87%8D)|
| --unet_model_prefix | UNet模型前缀。默认为`unet`|
| --vae_model_prefix | VAE模型前缀。默认为`vae_decoder`|
Expand Down
75 changes: 61 additions & 14 deletions ppdiffusers/deploy/img_to_img_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,11 +56,21 @@ def parse_arguments():
type=str,
default="paddle",
# Note(zhoushunjie): Will support 'tensorrt', 'paddle-tensorrt' soon.
choices=["onnx_runtime", "paddle", "paddlelite"],
help="The inference runtime backend of unet model and text encoder model.",
)
parser.add_argument(
"--device",
type=str,
default="gpu",
# Note(shentanyue): Will support more devices.
choices=[
"onnx_runtime",
"paddle",
"cpu",
"gpu",
"huawei_ascend_npu",
"kunlunxin_xpu",
],
help="The inference runtime backend of unet model and text encoder model.",
help="The inference runtime device of models.",
)
parser.add_argument(
"--image_path", default="fd_astronaut_rides_horse.png", help="The model directory of diffusion_model."
Expand Down Expand Up @@ -123,6 +133,25 @@ def create_paddle_inference_runtime(
return fd.Runtime(option)


def create_paddle_lite_runtime(model_dir, model_prefix, device="cpu", device_id=0):
option = fd.RuntimeOption()
option.use_lite_backend()
if device == "huawei_ascend_npu":
option.use_cann()
option.set_lite_nnadapter_device_names(["huawei_ascend_npu"])
option.set_lite_nnadapter_model_cache_dir(os.path.join(model_dir, model_prefix))
option.set_lite_nnadapter_context_properties("HUAWEI_ASCEND_NPU_SELECTED_DEVICE_IDS={}".format(device_id))
elif device == "kunlunxin_xpu":
# TODO(shentanyue): Add kunlunxin_xpu code
pass
else:
pass
model_file = os.path.join(model_dir, model_prefix, "inference.pdmodel")
params_file = os.path.join(model_dir, model_prefix, "inference.pdiparams")
option.set_model_path(model_file, params_file)
return fd.Runtime(option)


def create_trt_runtime(model_dir, model_prefix, model_format, workspace=(1 << 31), dynamic_shape=None, device_id=0):
option = fd.RuntimeOption()
option.use_trt_backend()
Expand Down Expand Up @@ -210,42 +239,45 @@ def get_scheduler(args):
}

# 4. Init runtime
device_id = args.device_id
if args.device == "cpu":
device_id = -1
if args.backend == "onnx_runtime":
text_encoder_runtime = create_ort_runtime(
args.model_dir, args.text_encoder_model_prefix, args.model_format, device_id=args.device_id
args.model_dir, args.text_encoder_model_prefix, args.model_format, device_id=device_id
)
vae_decoder_runtime = create_ort_runtime(
args.model_dir, args.vae_decoder_model_prefix, args.model_format, device_id=args.device_id
args.model_dir, args.vae_decoder_model_prefix, args.model_format, device_id=device_id
)
vae_encoder_runtime = create_ort_runtime(
args.model_dir, args.vae_encoder_model_prefix, args.model_format, device_id=args.device_id
args.model_dir, args.vae_encoder_model_prefix, args.model_format, device_id=device_id
)
start = time.time()
unet_runtime = create_ort_runtime(
args.model_dir, args.unet_model_prefix, args.model_format, device_id=args.device_id
args.model_dir, args.unet_model_prefix, args.model_format, device_id=device_id
)
print(f"Spend {time.time() - start : .2f} s to load unet model.")
elif args.backend == "paddle" or args.backend == "paddle-tensorrt":
use_trt = True if args.backend == "paddle-tensorrt" else False
# Note(zhoushunjie): Will change to paddle runtime later
text_encoder_runtime = create_ort_runtime(
args.model_dir, args.text_encoder_model_prefix, args.model_format, device_id=args.device_id
args.model_dir, args.text_encoder_model_prefix, args.model_format, device_id=device_id
)
vae_decoder_runtime = create_paddle_inference_runtime(
args.model_dir,
args.vae_decoder_model_prefix,
use_trt,
vae_decoder_dynamic_shape,
use_fp16=args.use_fp16,
device_id=args.device_id,
device_id=device_id,
)
vae_encoder_runtime = create_paddle_inference_runtime(
args.model_dir,
args.vae_encoder_model_prefix,
use_trt,
vae_encoder_dynamic_shape,
use_fp16=args.use_fp16,
device_id=args.device_id,
device_id=device_id,
)
start = time.time()
unet_runtime = create_paddle_inference_runtime(
Expand All @@ -254,7 +286,7 @@ def get_scheduler(args):
use_trt,
unet_dynamic_shape,
use_fp16=args.use_fp16,
device_id=args.device_id,
device_id=device_id,
)
print(f"Spend {time.time() - start : .2f} s to load unet model.")
elif args.backend == "tensorrt":
Expand All @@ -265,23 +297,38 @@ def get_scheduler(args):
args.model_format,
workspace=(1 << 30),
dynamic_shape=vae_decoder_dynamic_shape,
device_id=args.device_id,
device_id=device_id,
)
vae_encoder_runtime = create_trt_runtime(
args.model_dir,
args.vae_encoder_model_prefix,
args.model_format,
workspace=(1 << 30),
dynamic_shape=vae_encoder_dynamic_shape,
device_id=args.device_id,
device_id=device_id,
)
start = time.time()
unet_runtime = create_trt_runtime(
args.model_dir,
args.unet_model_prefix,
args.model_format,
dynamic_shape=unet_dynamic_shape,
device_id=args.device_id,
device_id=device_id,
)
print(f"Spend {time.time() - start : .2f} s to load unet model.")
elif args.backend == "paddlelite":
text_encoder_runtime = create_paddle_lite_runtime(
args.model_dir, args.text_encoder_model_prefix, device=args.device, device_id=device_id
)
vae_decoder_runtime = create_paddle_lite_runtime(
args.model_dir, args.vae_decoder_model_prefix, device=args.device, device_id=device_id
)
vae_encoder_runtime = create_paddle_lite_runtime(
args.model_dir, args.vae_encoder_model_prefix, device=args.device, device_id=device_id
)
start = time.time()
unet_runtime = create_paddle_lite_runtime(
args.model_dir, args.unet_model_prefix, device=args.device, device_id=device_id
)
print(f"Spend {time.time() - start : .2f} s to load unet model.")

Expand Down
75 changes: 61 additions & 14 deletions ppdiffusers/deploy/inpaint_legacy_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,11 +56,21 @@ def parse_arguments():
type=str,
default="paddle",
# Note(zhoushunjie): Will support 'tensorrt', 'paddle-tensorrt' soon.
choices=["onnx_runtime", "paddle", "paddlelite"],
help="The inference runtime backend of unet model and text encoder model.",
)
parser.add_argument(
"--device",
type=str,
default="gpu",
# Note(shentanyue): Will support more devices.
choices=[
"onnx_runtime",
"paddle",
"cpu",
"gpu",
"huawei_ascend_npu",
"kunlunxin_xpu",
],
help="The inference runtime backend of unet model and text encoder model.",
help="The inference runtime device of models.",
)
parser.add_argument(
"--image_path", default="fd_astronaut_rides_horse.png", help="The model directory of diffusion_model."
Expand Down Expand Up @@ -123,6 +133,25 @@ def create_paddle_inference_runtime(
return fd.Runtime(option)


def create_paddle_lite_runtime(model_dir, model_prefix, device="cpu", device_id=0):
option = fd.RuntimeOption()
option.use_lite_backend()
if device == "huawei_ascend_npu":
option.use_cann()
option.set_lite_nnadapter_device_names(["huawei_ascend_npu"])
option.set_lite_nnadapter_model_cache_dir(os.path.join(model_dir, model_prefix))
option.set_lite_nnadapter_context_properties("HUAWEI_ASCEND_NPU_SELECTED_DEVICE_IDS={}".format(device_id))
elif device == "kunlunxin_xpu":
# TODO(shentanyue): Add kunlunxin_xpu code
pass
else:
pass
model_file = os.path.join(model_dir, model_prefix, "inference.pdmodel")
params_file = os.path.join(model_dir, model_prefix, "inference.pdiparams")
option.set_model_path(model_file, params_file)
return fd.Runtime(option)


def create_trt_runtime(model_dir, model_prefix, model_format, workspace=(1 << 31), dynamic_shape=None, device_id=0):
option = fd.RuntimeOption()
option.use_trt_backend()
Expand Down Expand Up @@ -209,42 +238,45 @@ def get_scheduler(args):
}

# 4. Init runtime
device_id = args.device_id
if args.device == "cpu":
device_id = -1
if args.backend == "onnx_runtime":
text_encoder_runtime = create_ort_runtime(
args.model_dir, args.text_encoder_model_prefix, args.model_format, device_id=args.device_id
args.model_dir, args.text_encoder_model_prefix, args.model_format, device_id=device_id
)
vae_decoder_runtime = create_ort_runtime(
args.model_dir, args.vae_decoder_model_prefix, args.model_format, device_id=args.device_id
args.model_dir, args.vae_decoder_model_prefix, args.model_format, device_id=device_id
)
vae_encoder_runtime = create_ort_runtime(
args.model_dir, args.vae_encoder_model_prefix, args.model_format, device_id=args.device_id
args.model_dir, args.vae_encoder_model_prefix, args.model_format, device_id=device_id
)
start = time.time()
unet_runtime = create_ort_runtime(
args.model_dir, args.unet_model_prefix, args.model_format, device_id=args.device_id
args.model_dir, args.unet_model_prefix, args.model_format, device_id=device_id
)
print(f"Spend {time.time() - start : .2f} s to load unet model.")
elif args.backend == "paddle" or args.backend == "paddle-tensorrt":
use_trt = True if args.backend == "paddle-tensorrt" else False
# Note(zhoushunjie): Will change to paddle runtime later
text_encoder_runtime = create_ort_runtime(
args.model_dir, args.text_encoder_model_prefix, args.model_format, device_id=args.device_id
args.model_dir, args.text_encoder_model_prefix, args.model_format, device_id=device_id
)
vae_decoder_runtime = create_paddle_inference_runtime(
args.model_dir,
args.vae_decoder_model_prefix,
use_trt,
vae_decoder_dynamic_shape,
use_fp16=args.use_fp16,
device_id=args.device_id,
device_id=device_id,
)
vae_encoder_runtime = create_paddle_inference_runtime(
args.model_dir,
args.vae_encoder_model_prefix,
use_trt,
vae_encoder_dynamic_shape,
use_fp16=args.use_fp16,
device_id=args.device_id,
device_id=device_id,
)
start = time.time()
unet_runtime = create_paddle_inference_runtime(
Expand All @@ -253,7 +285,7 @@ def get_scheduler(args):
use_trt,
unet_dynamic_shape,
use_fp16=args.use_fp16,
device_id=args.device_id,
device_id=device_id,
)
print(f"Spend {time.time() - start : .2f} s to load unet model.")
elif args.backend == "tensorrt":
Expand All @@ -264,23 +296,38 @@ def get_scheduler(args):
args.model_format,
workspace=(1 << 30),
dynamic_shape=vae_decoder_dynamic_shape,
device_id=args.device_id,
device_id=device_id,
)
vae_encoder_runtime = create_trt_runtime(
args.model_dir,
args.vae_encoder_model_prefix,
args.model_format,
workspace=(1 << 30),
dynamic_shape=vae_encoder_dynamic_shape,
device_id=args.device_id,
device_id=device_id,
)
start = time.time()
unet_runtime = create_trt_runtime(
args.model_dir,
args.unet_model_prefix,
args.model_format,
dynamic_shape=unet_dynamic_shape,
device_id=args.device_id,
device_id=device_id,
)
print(f"Spend {time.time() - start : .2f} s to load unet model.")
elif args.backend == "paddlelite":
text_encoder_runtime = create_paddle_lite_runtime(
args.model_dir, args.text_encoder_model_prefix, device=args.device, device_id=device_id
)
vae_decoder_runtime = create_paddle_lite_runtime(
args.model_dir, args.vae_decoder_model_prefix, device=args.device, device_id=device_id
)
vae_encoder_runtime = create_paddle_lite_runtime(
args.model_dir, args.vae_encoder_model_prefix, device=args.device, device_id=device_id
)
start = time.time()
unet_runtime = create_paddle_lite_runtime(
args.model_dir, args.unet_model_prefix, device=args.device, device_id=device_id
)
print(f"Spend {time.time() - start : .2f} s to load unet model.")

Expand Down
Loading

0 comments on commit ba9f280

Please sign in to comment.