-
Notifications
You must be signed in to change notification settings - Fork 224
/
app_multigpu.py
143 lines (129 loc) · 5.35 KB
/
app_multigpu.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
import os
import uuid
import gradio as gr
import subprocess
import tempfile
import shutil
def run_inference_multigpu(gpus, variant, model_path, temp, guidance_scale, video_guidance_scale, resolution, prompt):
"""
Runs the external multi-GPU inference script and returns the path to the generated video.
"""
# Create a temporary directory to store inputs and outputs
with tempfile.TemporaryDirectory() as tmpdir:
output_video = os.path.join(tmpdir, f"{uuid.uuid4()}_output.mp4")
# Path to the external shell script
script_path = "./scripts/app_multigpu_engine.sh" # Updated script path
# Prepare the command
cmd = [
script_path,
str(gpus),
variant,
model_path,
't2v', # Task is always 't2v' since 'i2v' is removed
str(temp),
str(guidance_scale),
str(video_guidance_scale),
resolution,
output_video,
prompt # Pass the prompt directly as an argument
]
try:
# Run the external script
subprocess.run(cmd, check=True)
except subprocess.CalledProcessError as e:
raise RuntimeError(f"Error during video generation: {e}")
# After generation, move the video to a permanent location
final_output = os.path.join("generated_videos", f"{uuid.uuid4()}_output.mp4")
os.makedirs("generated_videos", exist_ok=True)
shutil.move(output_video, final_output)
return final_output
def generate_text_to_video(prompt, temp, guidance_scale, video_guidance_scale, resolution, gpus):
model_path = "./pyramid_flow_model" # Use the model path as specified
# Determine variant based on resolution
if resolution == "768p":
variant = "diffusion_transformer_768p"
else:
variant = "diffusion_transformer_384p"
return run_inference_multigpu(gpus, variant, model_path, temp, guidance_scale, video_guidance_scale, resolution, prompt)
# Gradio interface
with gr.Blocks() as demo:
gr.Markdown(
"""
# Pyramid Flow Video Generation Demo
Pyramid Flow is a training-efficient **Autoregressive Video Generation** model based on **Flow Matching**. It is trained only on open-source datasets within 20.7k A100 GPU hours.
[[Paper]](https://arxiv.org/abs/2410.05954) [[Project Page]](https://pyramid-flow.github.io) [[Code]](https://github.com/jy0205/Pyramid-Flow) [[Model]](https://huggingface.co/rain1011/pyramid-flow-sd3)
"""
)
# Shared settings
with gr.Row():
gpus_dropdown = gr.Dropdown(
choices=[2, 4],
value=4,
label="Number of GPUs"
)
resolution_dropdown = gr.Dropdown(
choices=["768p", "384p"],
value="768p",
label="Model Resolution"
)
with gr.Tab("Text-to-Video"):
with gr.Row():
with gr.Column():
text_prompt = gr.Textbox(
label="Prompt (Less than 128 words)",
placeholder="Enter a text prompt for the video",
lines=2
)
temp_slider = gr.Slider(1, 31, value=16, step=1, label="Duration")
guidance_scale_slider = gr.Slider(1.0, 15.0, value=9.0, step=0.1, label="Guidance Scale")
video_guidance_scale_slider = gr.Slider(1.0, 10.0, value=5.0, step=0.1, label="Video Guidance Scale")
txt_generate = gr.Button("Generate Video")
with gr.Column():
txt_output = gr.Video(label="Generated Video")
gr.Examples(
examples=[
[
"A movie trailer featuring the adventures of the 30 year old space man wearing a red wool knitted motorcycle helmet, blue sky, salt desert, cinematic style, shot on 35mm film, vivid colors",
16,
9.0,
5.0,
"768p",
4
],
[
"Beautiful, snowy Tokyo city is bustling. The camera moves through the bustling city street, following several people enjoying the beautiful snowy weather and shopping at nearby stalls. Gorgeous sakura petals are flying through the wind along with snowflakes",
16,
9.0,
5.0,
"768p",
4
],
[
"Extreme close-up of chicken and green pepper kebabs grilling on a barbeque with flames. Shallow focus and light smoke. vivid colours",
31,
9.0,
5.0,
"768p",
4
],
],
inputs=[text_prompt, temp_slider, guidance_scale_slider, video_guidance_scale_slider, resolution_dropdown, gpus_dropdown],
outputs=[txt_output],
fn=generate_text_to_video,
cache_examples='lazy',
)
# Update generate function for Text-to-Video
txt_generate.click(
generate_text_to_video,
inputs=[
text_prompt,
temp_slider,
guidance_scale_slider,
video_guidance_scale_slider,
resolution_dropdown,
gpus_dropdown
],
outputs=txt_output
)
# Launch Gradio app
demo.launch(share=True)