+ """)
+
+ with gr.Accordion("🎥 What Is FastVideo?", open=False):
+ gr.HTML("""
+
+
+ FastVideo is an inference and post-training framework for diffusion models. It features an end-to-end unified pipeline for accelerating diffusion models, starting from data preprocessing to model training, finetuning, distillation, and inference. FastVideo is designed to be modular and extensible, allowing users to easily add new optimizations and techniques. Whether it is training-free optimizations or post-training optimizations, FastVideo has you covered.
+
The compute for this demo is generously provided by GMI Cloud. Note that this demo is meant to showcase FastWan's quality and that under a large number of requests, generation speed may be affected. We are also rate-limiting users to 3 requests per minute.
+
+ """)
+
+ use_negative_prompt.change(
+ fn=lambda x: gr.update(visible=x),
+ inputs=use_negative_prompt,
+ outputs=negative_prompt,
+ )
+
+ def on_model_selection_change(selected_model):
+ if not selected_model:
+ selected_model = "FastWan2.1-T2V-1.3B"
+
+ model_path = MODEL_PATH_MAPPING.get(selected_model)
+
+ if model_path and model_path in default_params:
+ params = default_params[model_path]
+ return (
+ gr.update(value=params.height),
+ gr.update(value=params.width),
+ gr.update(value=params.num_frames),
+ gr.update(value=params.guidance_scale),
+ gr.update(value=params.seed),
+ )
+
+ return (
+ gr.update(value=448),
+ gr.update(value=832),
+ gr.update(value=61),
+ gr.update(value=3.0),
+ gr.update(value=1024),
+ )
+
+ model_selection.change(
+ fn=on_model_selection_change,
+ inputs=model_selection,
+ outputs=[height, width, num_frames, guidance_scale, seed],
+ )
+
+ def handle_generation(*args, progress=None, request: gr.Request = None):
+ model_selection, prompt, negative_prompt, use_negative_prompt, seed, guidance_scale, num_frames, height, width, randomize_seed = args
+
+ result_path, seed_or_error, timing_details = generate_video(
+ prompt, negative_prompt, use_negative_prompt, seed, guidance_scale,
+ num_frames, height, width, randomize_seed, model_selection, progress
+ )
+ if result_path and os.path.exists(result_path):
+ return (
+ result_path,
+ seed_or_error,
+ gr.update(visible=False),
+ gr.update(visible=True, value=timing_details),
+ )
+ else:
+ return (
+ None,
+ seed_or_error,
+ gr.update(visible=True, value=seed_or_error),
+ gr.update(visible=False),
+ )
+
+ run_button.click(
+ fn=handle_generation,
+ inputs=[
+ model_selection,
+ prompt,
+ negative_prompt,
+ use_negative_prompt,
+ seed,
+ guidance_scale,
+ num_frames,
+ height,
+ width,
+ randomize_seed,
+ ],
+ outputs=[result, seed_output, error_output, timing_display],
+ concurrency_limit=20,
+ )
+
+ return demo
+
+
+def main():
+ parser = argparse.ArgumentParser(description="FastVideo Gradio Local Demo")
+ parser.add_argument("--t2v_model_paths", type=str,
+ default="FastVideo/FastWan2.1-T2V-1.3B-Diffusers",
+ help="Comma separated list of paths to the T2V model(s)")
+ parser.add_argument("--host", type=str, default="0.0.0.0",
+ help="Host to bind to")
+ parser.add_argument("--port", type=int, default=7860,
+ help="Port to bind to")
+ args = parser.parse_args()
+ generators = {}
+ default_params = {}
+ model_paths = args.t2v_model_paths.split(",")
+ for model_path in model_paths:
+ print(f"Loading model: {model_path}")
+ setup_model_environment(model_path)
+ generators[model_path] = VideoGenerator.from_pretrained(model_path)
+ default_params[model_path] = SamplingParam.from_pretrained(model_path)
+ demo = create_gradio_interface(default_params, generators)
+ print(f"Starting Gradio frontend at http://{args.host}:{args.port}")
+ print(f"T2V Models: {args.t2v_model_paths}")
+
+ from fastapi import FastAPI, Request, HTTPException
+ from fastapi.responses import HTMLResponse, FileResponse
+ import uvicorn
+
+ app = FastAPI()
+
+ @app.get("/logo.png")
+ def get_logo():
+ return FileResponse(
+ "assets/full.svg",
+ media_type="image/svg+xml",
+ headers={
+ "Cache-Control": "public, max-age=3600",
+ "Access-Control-Allow-Origin": "*"
+ }
+ )
+
+ @app.get("/favicon.ico")
+ def get_favicon():
+ favicon_path = "assets/icon-simple.svg"
+
+ if os.path.exists(favicon_path):
+ return FileResponse(
+ favicon_path,
+ media_type="image/svg+xml",
+ headers={
+ "Cache-Control": "public, max-age=3600",
+ "Access-Control-Allow-Origin": "*"
+ }
+ )
+ else:
+ raise HTTPException(status_code=404, detail="Favicon not found")
+
+ @app.get("/", response_class=HTMLResponse)
+ def index(request: Request):
+ base_url = str(request.base_url).rstrip('/')
+ return f"""
+
+
+
+
+
+
+ FastWan
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ """
+
+ app = gr.mount_gradio_app(
+ app,
+ demo,
+ path="/gradio",
+ allowed_paths=[os.path.abspath("outputs"), os.path.abspath("fastvideo-logos")]
+ )
+
+ uvicorn.run(app, host=args.host, port=args.port)
+
+
+if __name__ == "__main__":
+
+ main()
\ No newline at end of file
diff --git a/examples/inference/gradio/local/prompts_final.txt b/examples/inference/gradio/local/prompts_final.txt
new file mode 100644
index 000000000..8334cfbe1
--- /dev/null
+++ b/examples/inference/gradio/local/prompts_final.txt
@@ -0,0 +1,11 @@
+A dynamic shot of a sleek black motorcycle accelerating down an empty highway at sunset. The bike's engine roars as it gains speed, smoke trailing from the tires. The rider, wearing a black leather jacket and helmet, leans forward with determination, gripping the handlebars tightly. The camera follows the motorcycle from a distance, capturing the dust kicked up behind it, then zooms in to show the intense focus on the rider's face. The background showcases the endless road stretching into the horizon with vibrant orange and pink hues of the setting sun. Medium shot transitioning to close-up.
+A Jedi Master Yoda, recognizable by his green skin, large ears, and wise wrinkles, is performing on a small stage, strumming a guitar with great concentration. Yoda wears a casual robe and sits on a stool, his eyes closed as he plays, fully immersed in the music. The stage is dimly lit with spotlights highlighting Yoda, creating a mystical atmosphere. The background shows a live audience watching intently. Medium close-up shot focusing on Yoda's expressive face and hands moving gracefully over the guitar strings.
+A cute, fluffy panda bear is preparing a meal in a cozy, modern kitchen. The panda is standing at a wooden countertop, wearing a white chef’s hat and apron. It skillfully stirs a pot on the stove with one hand while holding a spatula in the other. The kitchen is well-lit, with appliances and cabinets in pastel colors, creating a warm and inviting atmosphere. The panda moves gracefully, with a focused and determined expression, as steam rises from the pot. Medium shot focusing on the panda’s actions at the stove.
+In a futuristic Tokyo rooftop during a heavy rainstorm, a robotic DJ stands behind a turntable, spinning vinyl records in a cyberpunk night setting. The robot has metallic, sleek body parts with glowing blue LED lights, and it moves gracefully with the beat. Raindrops create a shimmering effect as they hit the ground and the DJ. The surrounding environment features neon signs, towering skyscrapers, and a dark, misty atmosphere. The camera starts with a wide shot of the city skyline before zooming in on the DJ performing. Sci-fi, fantasy.
+A realistic animated scene featuring a polar bear playing a guitar. The polar bear is standing upright, wearing a cozy fur vest and fingerless gloves. It holds the guitar with both hands, strumming the strings with one hand while plucking them with the other, showcasing natural, fluid motions. The polar bear's expressive face shows concentration and joy as it plays. The background is a snowy Arctic landscape with icebergs and a clear blue sky. The scene captures the bear from a mid-shot angle, focusing on its interaction with the guitar.
+The scene opens to a breathtaking view of a tranquil ocean horizon at dusk, displaying a vibrant tapestry of oranges, pinks, and purples as the sun sets. In the foreground, tall, swaying palm trees frame the scene, their silhouettes stark against the colorful sky. The ocean itself shimmers with reflections of the sunset, creating a peaceful, almost ethereal atmosphere. A small boat can be seen in the distance, centered on the horizon, adding a sense of scale and solitude to the scene. The waves gently lap the shore, creating faint patterns on the sandy beach, which stretches across the foreground. Above, the sky is dotted with scattered clouds that catch the last light of the day, enhancing the drama and beauty of the scene. The overall mood is serene and contemplative, capturing a perfect moment of nature’s grandeur.
+A large, modern semi-truck accelerating down an empty highway, gaining speed with each second. The truck's powerful engine roars as it moves forward, smoke billowing from the tires. The camera starts from a wide shot, capturing the truck in the distance, then smoothly zooms in to follow the vehicle as it speeds up. The truck's headlights illuminate the road ahead, casting a bright glow. The truck driver can be seen through the windshield, focused and determined. The background shows the vast openness of the highway stretching into the horizon under a clear blue sky. Medium to close-up shots of the truck as it accelerates.
+Soft blue light pulses from the blade’s rune-etched hilt, illuminating nearby moss-covered roots and ferns. The surrounding trees are tall and gnarled, their branches curling like claws overhead. Fog swirls gently at ground level, parting slightly as a figure in a cloak approaches from the distance. Medium shot slowly zooming toward the sword, emphasizing its mystical aura.
+The video opens with a tranquil scene in the heart of a dense forest, emphasizing two large, textured tree trunks in the foreground framing the view. Sunlight filters through the canopy above, casting intricate patterns of light and shadow on the trees and the ground. Between the tree trunks, a clear view of a calm, muddy river unfolds, its surface shimmering under the gentle sunlight. The riverbank is decorated with a variety of small bushes and vibrant foliage, subtly transitioning into the deep greens of tall, leafy plants. In the background, the dense forest looms, filled with dark, towering trees, their branches intertwining to form an intricate canopy. The scene is bathed in the soft glow of the sun, creating a serene and picturesque setting. Occasional sunbeams pierce through the foliage, adding a magical aura to the landscape. The vibrant reds and oranges of the smaller plants add contrast, bringing warmth to the earthy tones of the scenery. Overall, this harmonious blend of natural elements creates a peaceful and idyllic forest setting.
+A lone figure stands on a large, moss-covered rock, surrounded by the soft rush of a nearby stream. The figure is wearing white sneakers and shorts, with a plaid shirt that hangs loosely in the breeze. The lighting creates dramatic shadows, enhancing the textures of the rock and the subtle movement of the water below. In the background, a waterfall cascades into the stream, completing this tranquil and serene nature scene.
+In an industrial setting, a person leans casually against a railing, exuding a sense of confidence and composure. They are wearing a striking outfit, consisting of a vibrant, patterned jacket over a simple white crop top, creating a bold contrast. The atmosphere is infused with warm, ambient lighting that casts soft shadows on the concrete walls and metallic surfaces. Intricate wiring and pipes form an intricate backdrop, enhancing the urban aesthetic. Their relaxed posture and direct, engaging gaze suggest a sense of ease in this industrial environment. This scene encapsulates a blend of modern fashion and gritty, urban architecture, creating a visually compelling narrative.
diff --git a/examples/inference/gradio/gradio_frontend.py b/examples/inference/gradio/serving/gradio_frontend.py
similarity index 100%
rename from examples/inference/gradio/gradio_frontend.py
rename to examples/inference/gradio/serving/gradio_frontend.py
diff --git a/examples/inference/gradio/ray_serve_backend.py b/examples/inference/gradio/serving/ray_serve_backend.py
similarity index 100%
rename from examples/inference/gradio/ray_serve_backend.py
rename to examples/inference/gradio/serving/ray_serve_backend.py
diff --git a/examples/inference/gradio/start.sh b/examples/inference/gradio/serving/start.sh
similarity index 100%
rename from examples/inference/gradio/start.sh
rename to examples/inference/gradio/serving/start.sh
diff --git a/examples/inference/gradio/start_ray_serve_app.py b/examples/inference/gradio/serving/start_ray_serve_app.py
similarity index 100%
rename from examples/inference/gradio/start_ray_serve_app.py
rename to examples/inference/gradio/serving/start_ray_serve_app.py
diff --git a/fastvideo/entrypoints/video_generator.py b/fastvideo/entrypoints/video_generator.py
index a24ea4554..3a29eb016 100644
--- a/fastvideo/entrypoints/video_generator.py
+++ b/fastvideo/entrypoints/video_generator.py
@@ -8,6 +8,7 @@
import math
import os
+import re
import time
from copy import deepcopy
from typing import Any
@@ -110,7 +111,7 @@ def generate_video(
prompt: The prompt to use for generation (optional if prompt_txt is provided)
negative_prompt: The negative prompt to use (overrides the one in fastvideo_args)
output_path: Path to save the video (overrides the one in fastvideo_args)
- output_video_name: Name of the video file to save. Default is the first 100 characters of the prompt.
+ prompt_path: Path to prompt file
save_video: Whether to save the video to disk
return_frames: Whether to return the raw frames
num_inference_steps: Number of denoising steps (overrides fastvideo_args)
@@ -127,8 +128,13 @@ def generate_video(
Either the output dictionary, list of frames, or list of results for batch processing
"""
# Handle batch processing from text file
- if self.fastvideo_args.prompt_txt is not None:
- prompt_txt_path = self.fastvideo_args.prompt_txt
+ if sampling_param is None:
+ sampling_param = SamplingParam.from_pretrained(
+ self.fastvideo_args.model_path)
+ sampling_param.update(kwargs)
+
+ if self.fastvideo_args.prompt_txt is not None or sampling_param.prompt_path is not None:
+ prompt_txt_path = sampling_param.prompt_path or self.fastvideo_args.prompt_txt
if not os.path.exists(prompt_txt_path):
raise FileNotFoundError(
f"Prompt text file not found: {prompt_txt_path}")
@@ -142,22 +148,19 @@ def generate_video(
logger.info("Found %d prompts in %s", len(prompts), prompt_txt_path)
- if sampling_param is not None:
- original_output_video_name = sampling_param.output_video_name
- else:
- original_output_video_name = None
-
results = []
for i, batch_prompt in enumerate(prompts):
logger.info("Processing prompt %d/%d: %s...", i + 1,
len(prompts), batch_prompt[:100])
-
try:
# Generate video for this prompt using the same logic below
- if sampling_param is not None and original_output_video_name is not None:
- sampling_param.output_video_name = original_output_video_name + f"_{i}"
+ output_path = self._prepare_output_path(
+ sampling_param.output_path, batch_prompt)
+ kwargs["output_path"] = output_path
result = self._generate_single_video(
- batch_prompt, sampling_param, **kwargs)
+ prompt=batch_prompt,
+ sampling_param=sampling_param,
+ **kwargs)
# Add prompt info to result
if isinstance(result, dict):
@@ -181,8 +184,40 @@ def generate_video(
# Single prompt generation (original behavior)
if prompt is None:
raise ValueError("Either prompt or prompt_txt must be provided")
-
- return self._generate_single_video(prompt, sampling_param, **kwargs)
+ output_path = self._prepare_output_path(sampling_param.output_path,
+ prompt)
+ kwargs["output_path"] = output_path
+ return self._generate_single_video(prompt=prompt,
+ sampling_param=sampling_param,
+ **kwargs)
+
+ def _prepare_output_path(
+ self,
+ output_path: str,
+ prompt: str,
+ ) -> str:
+ base_path, extension = os.path.splitext(output_path)
+ if extension == ".mp4":
+ output_dir = os.path.dirname(output_path)
+ video_name = re.sub(r'[\/:*?"<>|]', '',
+ os.path.basename(output_path))
+ if video_name != os.path.basename(output_path):
+ print(
+ f"The video name '{os.path.basename(output_path)}' contained invalid characters. It has been renamed to '{video_name}'"
+ )
+ else:
+ output_dir = output_path
+ video_name = re.sub(r'[\/:*?"<>|]', '', prompt[:100] + ".mp4")
+ if output_dir:
+ os.makedirs(output_dir, exist_ok=True)
+ new_output_path = os.path.join(output_dir, video_name)
+ counter = 1
+ while os.path.exists(new_output_path):
+ name_part, ext_part = os.path.splitext(video_name)
+ new_video_name = f"{name_part}_{counter}{ext_part}"
+ new_output_path = os.path.join(output_dir, new_video_name)
+ counter += 1
+ return new_output_path
def _generate_single_video(
self,
@@ -200,15 +235,9 @@ def _generate_single_video(
raise TypeError(
f"`prompt` must be a string, but got {type(prompt)}")
prompt = prompt.strip()
- if sampling_param is None:
- sampling_param = SamplingParam.from_pretrained(
- fastvideo_args.model_path)
- else:
- sampling_param = deepcopy(sampling_param)
-
- kwargs["prompt"] = prompt
- sampling_param.update(kwargs)
-
+ sampling_param = deepcopy(sampling_param)
+ output_path = kwargs["output_path"]
+ sampling_param.prompt = prompt
# Process negative prompt
if sampling_param.negative_prompt is not None:
sampling_param.negative_prompt = sampling_param.negative_prompt.strip(
@@ -277,7 +306,7 @@ def _generate_single_video(
height: {target_height}
width: {target_width}
video_length: {sampling_param.num_frames}
- prompt: {prompt}
+ prompt: {sampling_param.prompt}
image_path: {sampling_param.image_path}
neg_prompt: {sampling_param.negative_prompt}
seed: {sampling_param.seed}
@@ -288,7 +317,7 @@ def _generate_single_video(
flow_shift: {fastvideo_args.pipeline_config.flow_shift}
embedded_guidance_scale: {fastvideo_args.pipeline_config.embedded_cfg_scale}
save_video: {sampling_param.save_video}
- output_path: {sampling_param.output_path}
+ output_path: {output_path}
""" # type: ignore[attr-defined]
logger.info(debug_str)
@@ -301,10 +330,6 @@ def _generate_single_video(
extra={},
)
- # Use prompt[:100] for video name
- if batch.output_video_name is None:
- batch.output_video_name = prompt[:100]
-
# Run inference
start_time = time.perf_counter()
output_batch = self.executor.execute_forward(batch, fastvideo_args)
@@ -324,15 +349,8 @@ def _generate_single_video(
# Save video if requested
if batch.save_video:
- output_path = batch.output_path
- if output_path:
- os.makedirs(output_path, exist_ok=True)
- video_path = os.path.join(output_path,
- f"{batch.output_video_name}.mp4")
- imageio.mimsave(video_path, frames, fps=batch.fps, format="mp4")
- logger.info("Saved video to %s", video_path)
- else:
- logger.warning("No output path provided, video not saved")
+ imageio.mimsave(output_path, frames, fps=batch.fps, format="mp4")
+ logger.info("Saved video to %s", output_path)
if batch.return_frames:
return frames
diff --git a/fastvideo/platforms/cuda.py b/fastvideo/platforms/cuda.py
index 20ca613fb..46caa2a1a 100644
--- a/fastvideo/platforms/cuda.py
+++ b/fastvideo/platforms/cuda.py
@@ -158,7 +158,9 @@ def get_attn_backend_cls(cls, selected_backend: AttentionBackendEnum | None,
"Failed to import Video Sparse Attention backend: %s",
str(e))
raise ImportError(
- "Video Sparse Attention backend is not installed. ") from e
+ "The Video Sparse Attention backend is not installed.To install it, please follow the instructions at: https://hao-ai-lab.github.io/FastVideo/video_sparse_attention/installation.html "
+ ) from e
+
elif selected_backend == AttentionBackendEnum.TORCH_SDPA:
logger.info("Using Torch SDPA backend.")
return "fastvideo.attention.backends.sdpa.SDPABackend"