diff --git a/.github/workflows/build-image.yml b/.github/workflows/build-image.yml
index 58bda54c9..664f376cd 100644
--- a/.github/workflows/build-image.yml
+++ b/.github/workflows/build-image.yml
@@ -18,6 +18,12 @@ on:
         required: false
         default: false
         type: boolean
+      python_3_12_cuda_12_9:
+        description: 'Build Python 3.12 image Cuda 12.9'
+        required: false
+        default: false
+        type: boolean
+
 
 permissions:
   contents: read
@@ -49,4 +55,13 @@ jobs:
       python_version: '3.12'
       dockerfile_path: docker/Dockerfile.python3.12
       tag_suffix: py3.12
+    secrets: inherit
+
+  build-python-3-12-cuda-12-9:
+    if: ${{ github.event.inputs.python_3_12_cuda_12_9 == 'true' }}
+    uses: ./.github/workflows/build-image-template.yml
+    with:
+      python_version: '3.12'
+      dockerfile_path: docker/Dockerfile.python3.12.cuda12.9.1
+      tag_suffix: py3.12-cuda12.9.1
     secrets: inherit
\ No newline at end of file
diff --git a/assets/full.svg b/assets/full.svg
new file mode 100644
index 000000000..0b8b058b5
--- /dev/null
+++ b/assets/full.svg
@@ -0,0 +1,18 @@
+<svg width="252" height="105" viewBox="0 0 252 105" fill="none" xmlns="http://www.w3.org/2000/svg">
+<path d="M89.4843 55.5457H101.361L87.7028 101H74.638L89.4843 55.5457Z" fill="#356CFF"/>
+<path fill-rule="evenodd" clip-rule="evenodd" d="M96.0167 1.00057H112.645L118.583 48.273H104.924L103.737 39.7882H79.9827L67.5117 55.5457H85.3273L43.1638 101H28.3174L22.3789 55.5457H33.6621L38.4129 91.3031L58.604 68.2729H44.3515L96.0167 1.00057ZM100.768 13.1217L87.7028 29.4852H103.143L100.768 13.1217Z" fill="#356CFF"/>
+<path d="M37.2252 1.00057L22.3789 48.273H36.0375L40.7884 30.6974L62.6727 30.6974L69.6727 21.0005L43.7576 21.0004L46.7269 11.9096L77.6727 11.9096L86 1.00057L37.2252 1.00057Z" fill="#356CFF"/>
+<path fill-rule="evenodd" clip-rule="evenodd" d="M108.488 55.5457L94.2351 101C94.2351 101 105.518 101 120.959 101C136.399 101 144.078 93.0133 148.276 79.788C152.432 68.0157 153.027 55.5457 136.399 55.5457C119.771 55.5457 108.488 55.5457 108.488 55.5457ZM109.081 90.697L116.802 65.8487C116.802 65.8487 120.959 65.8487 132.242 65.8487C143.525 65.8487 137.586 78.5759 135.211 84.0304C133.307 88.4021 127.491 90.697 122.74 90.697C117.989 90.697 109.081 90.697 109.081 90.697Z" fill="#356CFF"/>
+<path d="M173.188 1.00056L168.625 11.9096C168.625 11.9096 149.386 11.9092 142.525 11.9095C135.664 11.9098 136.586 20.3944 141.337 20.3944H159.747C168.654 20.3944 166.961 33.6899 163.904 38.5761C160.467 44.0675 157.371 48.273 148.463 48.273L125.188 48.273L124 37.97L147.87 37.97C153.808 37.97 156.184 29.4852 151.433 29.4852H131.836C120.142 29.4852 125.897 1.00043 141.337 1.00043L173.188 1.00056Z" fill="#356CFF"/>
+<path d="M179.938 1.00056L175.688 11.9096L191.221 11.9096L179.938 48.273H192.409L203.692 11.9096L219.132 11.9095L223.289 1.00043L179.938 1.00056Z" fill="#356CFF"/>
+<path d="M161.341 55.5457H202.845L198.5 65.8487H169.654L167.279 73.7268H188.5L184.749 82.8177H164.31L161.934 90.697H190.251L186.624 101H146.494L161.341 55.5457Z" fill="#356CFF"/>
+<path fill-rule="evenodd" clip-rule="evenodd" d="M230.821 54.9391C255.169 54.9391 251.776 67.0602 249.231 77.9692C246.686 88.8783 240.917 101 217.757 101C194.596 101 195.606 88.8783 199.347 77.9692C203.089 67.0602 206.473 54.9391 230.821 54.9391ZM237.948 77.9692C239.984 70.6965 240.917 65.242 228.446 65.242C215.975 65.242 211.818 71.9087 210.037 77.9692C208.255 84.0298 208.255 91.3025 219.538 91.3025C230.821 91.3025 235.911 85.2419 237.948 77.9692Z" fill="#356CFF"/>
+<path d="M173.188 1.00056L168.625 11.9096C168.625 11.9096 149.386 11.9092 142.525 11.9095C135.664 11.9098 136.586 20.3944 141.337 20.3944M173.188 1.00056C173.188 1.00056 156.777 1.00043 141.337 1.00043M173.188 1.00056L141.337 1.00043M141.337 20.3944C146.088 20.3944 150.839 20.3944 159.747 20.3944M141.337 20.3944H159.747M159.747 20.3944C168.654 20.3944 166.961 33.6899 163.904 38.5761C160.467 44.0675 157.371 48.273 148.463 48.273M148.463 48.273C139.556 48.273 125.188 48.273 125.188 48.273M148.463 48.273L125.188 48.273M125.188 48.273L124 37.97M124 37.97C124 37.97 141.931 37.97 147.87 37.97M124 37.97L147.87 37.97M147.87 37.97C153.808 37.97 156.184 29.4852 151.433 29.4852M151.433 29.4852C146.682 29.4852 138.962 29.4852 131.836 29.4852M151.433 29.4852H131.836M131.836 29.4852C120.142 29.4852 125.897 1.00043 141.337 1.00043M37.2252 1.00057L22.3789 48.273H36.0375L40.7884 30.6974L62.6727 30.6974L69.6727 21.0005L43.7576 21.0004L46.7269 11.9096L77.6727 11.9096L86 1.00057L37.2252 1.00057ZM96.0167 1.00057H112.645L118.583 48.273H104.924L103.737 39.7882H79.9827L67.5117 55.5457H85.3273L43.1638 101H28.3174L22.3789 55.5457H33.6621L38.4129 91.3031L58.604 68.2729H44.3515L96.0167 1.00057ZM87.7028 29.4852L100.768 13.1217L103.143 29.4852H87.7028ZM89.4843 55.5457H101.361L87.7028 101H74.638L89.4843 55.5457ZM108.488 55.5457L94.2351 101C94.2351 101 105.518 101 120.959 101C136.399 101 144.078 93.0133 148.276 79.788C152.432 68.0157 153.027 55.5457 136.399 55.5457C119.771 55.5457 108.488 55.5457 108.488 55.5457ZM116.802 65.8487L109.081 90.697C109.081 90.697 117.989 90.697 122.74 90.697C127.491 90.697 133.307 88.4021 135.211 84.0304C137.586 78.5759 143.525 65.8487 132.242 65.8487C120.959 65.8487 116.802 65.8487 116.802 65.8487ZM179.938 1.00056L175.688 11.9096L191.221 11.9096L179.938 48.273H192.409L203.692 11.9096L219.132 11.9095L223.289 1.00043L179.938 1.00056ZM161.341 55.5457H202.845L198.5 65.8487H169.654L167.279 73.7268H188.5L184.749 82.8177H164.31L161.934 90.697H190.251L186.624 101H146.494L161.341 55.5457ZM230.821 54.9391C255.169 54.9391 251.776 67.0602 249.231 77.9692C246.686 88.8783 240.917 101 217.757 101C194.596 101 195.606 88.8783 199.347 77.9692C203.089 67.0602 206.473 54.9391 230.821 54.9391ZM228.446 65.242C240.917 65.242 239.984 70.6965 237.948 77.9692C235.911 85.2419 230.821 91.3025 219.538 91.3025C208.255 91.3025 208.255 84.0298 210.037 77.9692C211.818 71.9087 215.975 65.242 228.446 65.242Z" stroke="#356CFF" stroke-width="1.18771"/>
+<path d="M15.2524 55.5451L21.191 100.999L24.7541 100.999L18.8156 55.5451L15.2524 55.5451Z" fill="#356CFF" stroke="#356CFF" stroke-width="1.18771"/>
+<path d="M8.12646 55.5451L14.065 100.999L15.2527 100.999L9.31417 55.5451L8.12646 55.5451Z" fill="#356CFF" stroke="#356CFF" stroke-width="1.18771"/>
+<path d="M1 55.5451L6.93853 100.999L7.53239 100.999L1.59385 55.5451L1 55.5451Z" fill="#356CFF" stroke="#356CFF" stroke-width="0.593853"/>
+<path d="M15.2524 48.2724L30.0988 1H33.6619L18.8156 48.2724H15.2524Z" fill="#356CFF" stroke="#356CFF" stroke-width="1.18771"/>
+<path d="M8.12646 48.2724L22.9728 1H24.1605L9.31417 48.2724H8.12646Z" fill="#356CFF" stroke="#356CFF" stroke-width="1.18771"/>
+<path d="M1 48.2724L15.8463 1H16.4402L1.59385 48.2724H1Z" fill="#356CFF" stroke="#356CFF" stroke-width="0.593853"/>
+<path d="M85.3271 55.5457H67.5116L87 12.7363L44.3513 68.2729H58.6038L43.1636 101L85.3271 55.5457Z" fill="#FDC717" stroke="#FDC717" stroke-width="1.18771" stroke-miterlimit="16"/>
+</svg>
diff --git a/assets/icon-simple.svg b/assets/icon-simple.svg
new file mode 100644
index 000000000..414643899
--- /dev/null
+++ b/assets/icon-simple.svg
@@ -0,0 +1,6 @@
+<svg width="160" height="93" viewBox="0 0 160 93" fill="none" xmlns="http://www.w3.org/2000/svg">
+<path d="M28.8511 91.66L57.6319 1.86368H64.5394L35.7585 91.66H28.8511Z" fill="#356CFF" stroke="#356CFF" stroke-width="2.30244"/>
+<path d="M15.0376 91.66L43.8185 1.86368H46.1209L17.3401 91.66H15.0376Z" fill="#356CFF" stroke="#356CFF" stroke-width="2.30244"/>
+<path d="M1.22217 91.66L30.003 1.86366H31.1543L2.3734 91.66H1.22217Z" fill="#356CFF" stroke="#356CFF" stroke-width="1.15122"/>
+<path d="M71.4465 1.86483L42.666 91.6599H69.144L78.3538 58.2746H123.251L129.007 39.855H84.1099L89.866 22.5868H152.032L157.788 1.86483H71.4465Z" fill="#356CFF" stroke="#356CFF" stroke-width="2.30244"/>
+</svg>
diff --git a/docker/Dockerfile.python3.10 b/docker/Dockerfile.python3.10
index 95701e4e2..0937fa1da 100644
--- a/docker/Dockerfile.python3.10
+++ b/docker/Dockerfile.python3.10
@@ -1,7 +1,9 @@
-FROM nvidia/cuda:12.4.1-devel-ubuntu20.04
+FROM nvidia/cuda:12.8.0-devel-ubuntu22.04
 
 ENV DEBIAN_FRONTEND=noninteractive
 
+SHELL ["/bin/bash", "-c"]
+
 WORKDIR /FastVideo
 
 RUN apt-get update && apt-get install -y --no-install-recommends \
@@ -9,17 +11,25 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
     git \
     ca-certificates \
     openssh-server \
+    zsh \
+    vim \
+    curl \
+    gcc-11 \
+    g++-11 \
+    clang-11 \
     && rm -rf /var/lib/apt/lists/*
 
-RUN wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \
-    bash Miniconda3-latest-Linux-x86_64.sh -b -p /opt/conda && \
-    rm Miniconda3-latest-Linux-x86_64.sh
+# Set up C++20 compilers for ThunderKittens
+RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-11 100 --slave /usr/bin/g++ g++ /usr/bin/g++-11
 
-ENV PATH=/opt/conda/bin:$PATH
+# Set CUDA environment variables
+ENV CUDA_HOME=/usr/local/cuda-12.8
+ENV PATH=${CUDA_HOME}/bin:${PATH}
+ENV LD_LIBRARY_PATH=${CUDA_HOME}/lib64:$LD_LIBRARY_PATH
 
-RUN conda create --name fastvideo-dev python=3.10.0 -y
-
-SHELL ["/bin/bash", "-c"]
+# Install uv and source its environment
+RUN curl -LsSf https://astral.sh/uv/install.sh | sh && \
+    echo 'source $HOME/.local/bin/env' >> /root/.bashrc
 
 # Copy just the pyproject.toml first to leverage Docker cache
 COPY pyproject.toml ./
@@ -27,22 +37,36 @@ COPY pyproject.toml ./
 # Create a dummy README to satisfy the installation
 RUN echo "# Placeholder" > README.md
 
-RUN conda run -n fastvideo-dev pip install --no-cache-dir --upgrade pip && \
-    conda run -n fastvideo-dev pip install --no-cache-dir .[dev] && \
-    conda run -n fastvideo-dev pip install --no-cache-dir flash-attn==2.7.4.post1 --no-build-isolation && \
-    conda clean -afy
+# Create and activate virtual environment with specific Python version and seed
+RUN source $HOME/.local/bin/env && \
+    uv venv --python 3.10 --seed /opt/venv && \
+    source /opt/venv/bin/activate && \
+    uv pip install --no-cache-dir --upgrade pip && \
+    uv pip install --no-cache-dir .[dev] && \
+    uv pip install --no-cache-dir flash-attn==2.8.3 --no-build-isolation
 
 COPY . .
 
-RUN conda run -n fastvideo-dev pip install --no-cache-dir -e .[dev]
+# Install dependencies using uv and set up shell configuration
+RUN source $HOME/.local/bin/env && \
+    source /opt/venv/bin/activate && \
+    uv pip install --no-cache-dir -e .[dev] && \
+    git config --unset-all http.https://github.com/.extraheader || true && \
+    echo 'source /opt/venv/bin/activate' >> /root/.bashrc && \
+    echo 'if [ -n "$ZSH_VERSION" ] && [ -f ~/.zshrc ]; then . ~/.zshrc; elif [ -f ~/.bashrc ]; then . ~/.bashrc; fi' > /root/.profile
 
-# Remove authentication headers
-RUN git config --unset-all http.https://github.com/.extraheader || true
+# Install STA (Sliding Tile Attention)
+RUN source $HOME/.local/bin/env && \
+    source /opt/venv/bin/activate && \
+    cd csrc/attn && \
+    git submodule update --init --recursive && \
+    python setup_sta.py install
 
-# Set up automatic conda environment activation for all shells
-RUN echo 'source /opt/conda/etc/profile.d/conda.sh' >> /root/.bashrc && \
-    echo 'conda activate fastvideo-dev' >> /root/.bashrc && \
-    # Ensure .bashrc is sourced for SSH login shells
-    echo 'if [ -f ~/.bashrc ]; then . ~/.bashrc; fi' > /root/.profile
+# Install VSA 
+RUN source $HOME/.local/bin/env && \
+    source /opt/venv/bin/activate && \
+    cd csrc/attn && \
+    git submodule update --init --recursive && \
+    python setup_vsa.py install
 
 EXPOSE 22
\ No newline at end of file
diff --git a/docker/Dockerfile.python3.11 b/docker/Dockerfile.python3.11
index 35bd87b17..60f9f49d9 100644
--- a/docker/Dockerfile.python3.11
+++ b/docker/Dockerfile.python3.11
@@ -1,7 +1,9 @@
-FROM nvidia/cuda:12.4.1-devel-ubuntu20.04
+FROM nvidia/cuda:12.8.0-devel-ubuntu22.04
 
 ENV DEBIAN_FRONTEND=noninteractive
 
+SHELL ["/bin/bash", "-c"]
+
 WORKDIR /FastVideo
 
 RUN apt-get update && apt-get install -y --no-install-recommends \
@@ -9,17 +11,25 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
     git \
     ca-certificates \
     openssh-server \
+    zsh \
+    vim \
+    curl \
+    gcc-11 \
+    g++-11 \
+    clang-11 \
     && rm -rf /var/lib/apt/lists/*
 
-RUN wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \
-    bash Miniconda3-latest-Linux-x86_64.sh -b -p /opt/conda && \
-    rm Miniconda3-latest-Linux-x86_64.sh
+# Set up C++20 compilers for ThunderKittens
+RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-11 100 --slave /usr/bin/g++ g++ /usr/bin/g++-11
 
-ENV PATH=/opt/conda/bin:$PATH
+# Set CUDA environment variables
+ENV CUDA_HOME=/usr/local/cuda-12.8
+ENV PATH=${CUDA_HOME}/bin:${PATH}
+ENV LD_LIBRARY_PATH=${CUDA_HOME}/lib64:$LD_LIBRARY_PATH
 
-RUN conda create --name fastvideo-dev python=3.11.11 -y
-
-SHELL ["/bin/bash", "-c"]
+# Install uv and source its environment
+RUN curl -LsSf https://astral.sh/uv/install.sh | sh && \
+    echo 'source $HOME/.local/bin/env' >> /root/.bashrc
 
 # Copy just the pyproject.toml first to leverage Docker cache
 COPY pyproject.toml ./
@@ -27,22 +37,36 @@ COPY pyproject.toml ./
 # Create a dummy README to satisfy the installation
 RUN echo "# Placeholder" > README.md
 
-RUN conda run -n fastvideo-dev pip install --no-cache-dir --upgrade pip && \
-    conda run -n fastvideo-dev pip install --no-cache-dir .[dev] && \
-    conda run -n fastvideo-dev pip install --no-cache-dir flash-attn==2.7.4.post1 --no-build-isolation && \
-    conda clean -afy
+# Create and activate virtual environment with specific Python version and seed
+RUN source $HOME/.local/bin/env && \
+    uv venv --python 3.11 --seed /opt/venv && \
+    source /opt/venv/bin/activate && \
+    uv pip install --no-cache-dir --upgrade pip && \
+    uv pip install --no-cache-dir .[dev] && \
+    uv pip install --no-cache-dir flash-attn==2.8.3 --no-build-isolation
 
 COPY . .
 
-RUN conda run -n fastvideo-dev pip install --no-cache-dir -e .[dev]
+# Install dependencies using uv and set up shell configuration
+RUN source $HOME/.local/bin/env && \
+    source /opt/venv/bin/activate && \
+    uv pip install --no-cache-dir -e .[dev] && \
+    git config --unset-all http.https://github.com/.extraheader || true && \
+    echo 'source /opt/venv/bin/activate' >> /root/.bashrc && \
+    echo 'if [ -n "$ZSH_VERSION" ] && [ -f ~/.zshrc ]; then . ~/.zshrc; elif [ -f ~/.bashrc ]; then . ~/.bashrc; fi' > /root/.profile
 
-# Remove authentication headers
-RUN git config --unset-all http.https://github.com/.extraheader || true
+# Install STA (Sliding Tile Attention)
+RUN source $HOME/.local/bin/env && \
+    source /opt/venv/bin/activate && \
+    cd csrc/attn && \
+    git submodule update --init --recursive && \
+    python setup_sta.py install
 
-# Set up automatic conda environment activation for all shells
-RUN echo 'source /opt/conda/etc/profile.d/conda.sh' >> /root/.bashrc && \
-    echo 'conda activate fastvideo-dev' >> /root/.bashrc && \
-    # Ensure .bashrc is sourced for SSH login shells
-    echo 'if [ -f ~/.bashrc ]; then . ~/.bashrc; fi' > /root/.profile
+# Install VSA 
+RUN source $HOME/.local/bin/env && \
+    source /opt/venv/bin/activate && \
+    cd csrc/attn && \
+    git submodule update --init --recursive && \
+    python setup_vsa.py install
 
 EXPOSE 22
\ No newline at end of file
diff --git a/docker/Dockerfile.python3.12 b/docker/Dockerfile.python3.12
index 2c42853c5..0140e5705 100644
--- a/docker/Dockerfile.python3.12
+++ b/docker/Dockerfile.python3.12
@@ -43,7 +43,7 @@ RUN source $HOME/.local/bin/env && \
     source /opt/venv/bin/activate && \
     uv pip install --no-cache-dir --upgrade pip && \
     uv pip install --no-cache-dir .[dev] && \
-    uv pip install --no-cache-dir flash-attn==2.8.0.post2 --no-build-isolation
+    uv pip install --no-cache-dir flash-attn==2.8.3 --no-build-isolation
 
 COPY . .
 
diff --git a/docker/Dockerfile.python3.12.cuda12.9.1 b/docker/Dockerfile.python3.12.cuda12.9.1
new file mode 100644
index 000000000..068ced4f8
--- /dev/null
+++ b/docker/Dockerfile.python3.12.cuda12.9.1
@@ -0,0 +1,72 @@
+FROM nvidia/cuda:12.9.1-cudnn-devel-ubuntu22.04
+
+ENV DEBIAN_FRONTEND=noninteractive
+
+SHELL ["/bin/bash", "-c"]
+
+WORKDIR /FastVideo
+
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    wget \
+    git \
+    ca-certificates \
+    openssh-server \
+    zsh \
+    vim \
+    curl \
+    gcc-11 \
+    g++-11 \
+    clang-11 \
+    && rm -rf /var/lib/apt/lists/*
+
+# Set up C++20 compilers for ThunderKittens
+RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-11 100 --slave /usr/bin/g++ g++ /usr/bin/g++-11
+
+# Set CUDA environment variables
+ENV CUDA_HOME=/usr/local/cuda-12.9
+ENV PATH=${CUDA_HOME}/bin:${PATH}
+ENV LD_LIBRARY_PATH=${CUDA_HOME}/lib64:$LD_LIBRARY_PATH
+
+# Install uv and source its environment
+RUN curl -LsSf https://astral.sh/uv/install.sh | sh && \
+    echo 'source $HOME/.local/bin/env' >> /root/.bashrc
+
+# Copy just the pyproject.toml first to leverage Docker cache
+COPY pyproject.toml ./
+
+# Create a dummy README to satisfy the installation
+RUN echo "# Placeholder" > README.md
+
+# Create and activate virtual environment with specific Python version and seed
+RUN source $HOME/.local/bin/env && \
+    uv venv --python 3.12 --seed /opt/venv && \
+    source /opt/venv/bin/activate && \
+    uv pip install --no-cache-dir --upgrade pip && \
+    uv pip install --no-cache-dir .[dev] && \
+    uv pip install --no-cache-dir flash-attn==2.8.3 --no-build-isolation
+
+COPY . .
+
+# Install dependencies using uv and set up shell configuration
+RUN source $HOME/.local/bin/env && \
+    source /opt/venv/bin/activate && \
+    uv pip install --no-cache-dir -e .[dev] && \
+    git config --unset-all http.https://github.com/.extraheader || true && \
+    echo 'source /opt/venv/bin/activate' >> /root/.bashrc && \
+    echo 'if [ -n "$ZSH_VERSION" ] && [ -f ~/.zshrc ]; then . ~/.zshrc; elif [ -f ~/.bashrc ]; then . ~/.bashrc; fi' > /root/.profile
+
+# Install STA (Sliding Tile Attention)
+RUN source $HOME/.local/bin/env && \
+    source /opt/venv/bin/activate && \
+    cd csrc/attn && \
+    git submodule update --init --recursive && \
+    python setup_sta.py install
+
+# Install VSA 
+RUN source $HOME/.local/bin/env && \
+    source /opt/venv/bin/activate && \
+    cd csrc/attn && \
+    git submodule update --init --recursive && \
+    python setup_vsa.py install
+
+EXPOSE 22
\ No newline at end of file
diff --git a/examples/inference/gradio/local/README.md b/examples/inference/gradio/local/README.md
new file mode 100644
index 000000000..f744dbf46
--- /dev/null
+++ b/examples/inference/gradio/local/README.md
@@ -0,0 +1,56 @@
+# FastVideo Gradio Local Demo
+
+This is a Gradio-based web interface for generating videos using the FastVideo framework. The demo allows users to create videos from text prompts with various customization options.
+
+## Overview
+
+The demo uses the FastVideo framework to generate videos based on text prompts. It provides a simple web interface built with Gradio that allows users to:
+
+- Enter text prompts to generate videos
+- Customize video parameters (dimensions, number of frames, etc.)
+- Use negative prompts to guide the generation process
+- Set or randomize seeds for reproducibility
+
+---
+
+## Usage
+
+Run the demo with:
+
+```bash
+python examples/inference/gradio/local/gradio_local_demo.py
+```
+
+This will start a web server at `http://0.0.0.0:7860` where you can access the interface.
+
+---
+
+## Model Initialization
+
+This demo initializes a `VideoGenerator` with the minimum required arguments for inference. Users can seamlessly adjust inference options between generations, including prompts, resolution, video length, *without ever needing to reload the model*.
+
+## Video Generation
+
+The core functionality is in the `generate_video` function, which:
+1. Processes user inputs
+2. Uses the FastVideo VideoGenerator from earlier to run inference (`generator.generate_video()`)
+
+## Gradio Interface
+
+The interface is built with several components:
+- A text input for the prompt
+- A video display for the result
+- Inference options in a collapsible accordion:
+  - Height and width sliders
+  - Number of frames slider
+  - Guidance scale slider
+  - Negative prompt options
+  - Seed controls
+
+### Inference Options
+
+- **Height/Width**: Control the resolution of the generated video
+- **Number of Frames**: Set how many frames to generate
+- **Guidance Scale**: Control how closely the generation follows the prompt
+- **Negative Prompt**: Specify what you don't want to see in the video
+- **Seed**: Control randomness for reproducible results
\ No newline at end of file
diff --git a/examples/inference/gradio/local/gradio_local_demo.py b/examples/inference/gradio/local/gradio_local_demo.py
new file mode 100644
index 000000000..700003be0
--- /dev/null
+++ b/examples/inference/gradio/local/gradio_local_demo.py
@@ -0,0 +1,656 @@
+import argparse
+import os
+import base64
+import time
+
+import gradio as gr
+from fastvideo.entrypoints.video_generator import VideoGenerator
+from fastvideo.configs.sample.base import SamplingParam
+from copy import deepcopy
+
+
+MODEL_PATH_MAPPING = {
+    "FastWan2.1-T2V-1.3B": "FastVideo/FastWan2.1-T2V-1.3B-Diffusers",
+    # "FastWan2.2-TI2V-5B-FullAttn": "FastVideo/FastWan2.2-TI2V-5B-FullAttn-Diffusers",
+}
+
+def create_timing_display(inference_time, total_time, stage_execution_times, num_frames):
+    dit_denoising_time = f"{stage_execution_times[5]:.2f}s" if len(stage_execution_times) > 5 else "N/A"
+    
+    timing_html = f"""
+    <div style="margin: 10px 0;">
+        <h3 style="text-align: center; margin-bottom: 10px;">⏱️ Timing Breakdown</h3>
+        <div style="display: grid; grid-template-columns: repeat(5, 1fr); gap: 10px; margin-bottom: 10px;">
+            <div class="timing-card timing-card-highlight">
+                <div style="font-size: 20px;">🚀</div>
+                <div style="font-weight: bold; margin: 3px 0; font-size: 14px;">DiT Denoising</div>
+                <div style="font-size: 16px; color: #ffa200; font-weight: bold;">{dit_denoising_time}</div>
+            </div>
+            <div class="timing-card">
+                <div style="font-size: 20px;">🧠</div>
+                <div style="font-weight: bold; margin: 3px 0; font-size: 14px;">E2E (w. vae/text encoder)</div>
+                <div style="font-size: 16px; color: #2563eb;">{inference_time:.2f}s</div>
+            </div>
+            <div class="timing-card">
+                <div style="font-size: 20px;">🎬</div>
+                <div style="font-weight: bold; margin: 3px 0; font-size: 14px;">Video Encoding</div>
+                <div style="font-size: 16px; color: #dc2626;">N/A</div>
+            </div>
+            <div class="timing-card">
+                <div style="font-size: 20px;">🌐</div>
+                <div style="font-weight: bold; margin: 3px 0; font-size: 14px;">Network Transfer</div>
+                <div style="font-size: 16px; color: #059669;">N/A</div>
+            </div>
+            <div class="timing-card">
+                <div style="font-size: 20px;">📊</div>
+                <div style="font-weight: bold; margin: 3px 0; font-size: 14px;">Total Processing</div>
+                <div style="font-size: 18px; color: #0277bd;">{total_time:.2f}s</div>
+            </div>
+        </div>"""
+    
+    if inference_time > 0:
+        fps = num_frames / inference_time
+        timing_html += f"""
+        <div class="performance-card" style="margin-top: 15px;">
+            <span style="font-weight: bold;">Generation Speed: </span>
+            <span style="font-size: 18px; color: #6366f1; font-weight: bold;">{fps:.1f} frames/second</span>
+        </div>"""
+    
+    return timing_html + "</div>"
+def setup_model_environment(model_path: str) -> None:
+    if "fullattn" in model_path.lower():
+        os.environ["FASTVIDEO_ATTENTION_BACKEND"] = "FLASH_ATTN"
+    else:
+        os.environ["FASTVIDEO_ATTENTION_BACKEND"] = "VIDEO_SPARSE_ATTN"
+    os.environ["FASTVIDEO_STAGE_LOGGING"] = "1"
+
+def load_example_prompts():
+    def contains_chinese(text):
+        return any('\u4e00' <= char <= '\u9fff' for char in text)
+    
+    def load_from_file(filepath):
+        prompts, labels = [], []
+        try:
+            with open(filepath, "r", encoding='utf-8') as f:
+                for line in f:
+                    line = line.strip()
+                    if line and not contains_chinese(line):
+                        label = line[:100] + "..." if len(line) > 100 else line
+                        labels.append(label)
+                        prompts.append(line)
+        except Exception as e:
+            print(f"Warning: Could not read {filepath}: {e}")
+        return prompts, labels
+    
+    examples, example_labels = load_from_file("/FastVideo/examples/inference/gradio/local/prompts_final.txt")
+    
+    if not examples:
+        examples = ["A crowded rooftop bar buzzes with energy, the city skyline twinkling like a field of stars in the background."]
+        example_labels = ["Crowded rooftop bar at night"]
+    
+    return examples, example_labels
+
+
+def create_gradio_interface(default_params: dict[str, SamplingParam], generators: dict[str, VideoGenerator]):
+    def generate_video(
+        prompt, negative_prompt, use_negative_prompt, seed, guidance_scale,
+        num_frames, height, width, randomize_seed, model_selection, progress
+    ):
+        model_path = MODEL_PATH_MAPPING.get(model_selection, "FastVideo/FastWan2.1-T2V-1.3B-Diffusers")
+        setup_model_environment(model_path)
+        try:
+            if progress:
+                progress(0.1, desc="Loading model for local inference...")
+            
+            generator = generators[model_path]
+            params = deepcopy(default_params[model_path])
+            total_start_time = time.time()
+            if progress:
+                progress(0.2, desc="Configuring parameters...")
+
+            params.prompt = prompt
+            params.seed = int(seed)
+            params.guidance_scale = guidance_scale
+            params.num_frames = int(num_frames)
+            params.height = int(height)
+            params.width = int(width)
+
+            if randomize_seed:
+                params.seed = torch.randint(0, 1000000, (1, )).item()
+
+            if use_negative_prompt and negative_prompt:
+                params.negative_prompt = negative_prompt
+            else:
+                params.negative_prompt = default_params[model_path].negative_prompt
+
+            if progress:
+                progress(0.4, desc="Generating video locally...")
+
+            output_dir = "outputs/"
+            os.makedirs(output_dir, exist_ok=True)
+            start_time = time.time()
+            result = generator.generate_video(prompt=prompt, sampling_param=params, save_video=True, return_frames=False)
+            inference_time = time.time() - start_time
+            logging_info = result.get("logging_info", None)
+            if logging_info:
+                stage_names = logging_info.get_execution_order()
+                stage_execution_times = [
+                    logging_info.get_stage_info(stage_name).get("execution_time", 0.0) 
+                    for stage_name in stage_names
+                ]
+            else:
+                stage_names = []
+                stage_execution_times = []
+            total_time = time.time() - total_start_time
+            timing_details=create_timing_display(inference_time=inference_time, total_time=total_time, stage_execution_times=stage_execution_times, num_frames=params.num_frames)
+            safe_prompt = params.prompt[:100].replace(' ', '_').replace('/', '_').replace('\\', '_')
+            video_filename = f"{params.prompt[:100]}.mp4"
+            output_path = os.path.join(output_dir, video_filename)
+
+            if progress:
+                progress(1.0, desc="Generation complete!")
+
+            return output_path, params.seed, timing_details
+
+        except Exception as e:
+            print(f"An error occurred during local generation: {e}")
+            return None, f"Generation failed: {str(e)}", ""
+
+    examples, example_labels = load_example_prompts()
+    
+    theme = gr.themes.Base().set(
+        button_primary_background_fill="#2563eb",
+        button_primary_background_fill_hover="#1d4ed8",
+        button_primary_text_color="white",
+        slider_color="#2563eb",
+        checkbox_background_color_selected="#2563eb",
+    )
+    
+    def get_default_values(model_name):
+        model_path = MODEL_PATH_MAPPING.get(model_name)
+        if model_path and model_path in default_params:
+            params = default_params[model_path]
+            return {
+                'height': params.height,
+                'width': params.width,
+                'num_frames': params.num_frames,
+                'guidance_scale': params.guidance_scale,
+                'seed': params.seed,
+            }
+        
+        return {
+            'height': 448,
+            'width': 832,
+            'num_frames': 61,
+            'guidance_scale': 3.0,
+            'seed': 1024,
+        }
+    
+    initial_values = get_default_values("FastWan2.1-T2V-1.3B")
+    
+    with gr.Blocks(title="FastWan", theme=theme) as demo:
+        gr.Image("assets/full.svg", show_label=False, container=False, height=80)
+
+        gr.HTML("""
+        <div style="text-align: center; margin-bottom: 10px;">
+            <p style="font-size: 18px;"> Make Video Generation Go Blurrrrrrr </p>
+            <p style="font-size: 18px;"> <a href="https://github.com/hao-ai-lab/FastVideo/tree/main" target="_blank">Code</a> | <a href="https://hao-ai-lab.github.io/blogs/fastvideo_post_training/" target="_blank">Blog</a> | <a href="https://hao-ai-lab.github.io/FastVideo/" target="_blank">Docs</a>  </p>
+        </div>
+        """)
+        
+        with gr.Accordion("🎥 What Is FastVideo?", open=False):
+            gr.HTML("""
+            <div style="padding: 20px; line-height: 1.6;">
+                <p style="font-size: 16px; margin-bottom: 15px;">
+                    FastVideo is an inference and post-training framework for diffusion models. It features an end-to-end unified pipeline for accelerating diffusion models, starting from data preprocessing to model training, finetuning, distillation, and inference. FastVideo is designed to be modular and extensible, allowing users to easily add new optimizations and techniques. Whether it is training-free optimizations or post-training optimizations, FastVideo has you covered.
+                </p>
+            </div>
+            """)
+        
+        with gr.Row():
+            model_selection = gr.Dropdown(
+                choices=list(MODEL_PATH_MAPPING.keys()),
+                value="FastWan2.1-T2V-1.3B",
+                label="Select Model",
+                interactive=True
+            )
+
+        with gr.Row():
+            example_dropdown = gr.Dropdown(
+                choices=example_labels,
+                label="Example Prompts",
+                value=None,
+                interactive=True,
+                allow_custom_value=False
+            )
+        
+        with gr.Row():
+            with gr.Column(scale=6):
+                prompt = gr.Text(
+                    label="Prompt",
+                    show_label=False,
+                    max_lines=3,
+                    placeholder="Describe your scene...",
+                    container=False,
+                    lines=3,
+                    autofocus=True,
+                )
+            with gr.Column(scale=1, min_width=120, elem_classes="center-button"):
+                run_button = gr.Button("Run", variant="primary", size="lg")
+        
+        with gr.Row():
+            with gr.Column():
+                error_output = gr.Text(label="Error", visible=False)
+                timing_display = gr.Markdown(label="Timing Breakdown", visible=False)
+
+        with gr.Row(equal_height=True, elem_classes="main-content-row"):
+            with gr.Column(scale=1, elem_classes="advanced-options-column"):
+                with gr.Group():
+                    gr.HTML("<div style='margin: 0 0 15px 0; text-align: center; font-size: 16px;'>Advanced Options</div>")
+                    with gr.Row():
+                        height = gr.Number(
+                            label="Height",
+                            value=initial_values['height'],
+                            interactive=False,
+                            container=True
+                        )
+                        width = gr.Number(
+                            label="Width",
+                            value=initial_values['width'],
+                            interactive=False,
+                            container=True
+                        )
+                    
+                    with gr.Row():
+                        num_frames = gr.Number(
+                            label="Number of Frames",
+                            value=initial_values['num_frames'],
+                            interactive=False,
+                            container=True
+                        )
+                        guidance_scale = gr.Slider(
+                            label="Guidance Scale",
+                            minimum=1,
+                            maximum=12,
+                            value=initial_values['guidance_scale'],
+                        )
+                    
+                    with gr.Row():
+                        use_negative_prompt = gr.Checkbox(
+                            label="Use negative prompt", value=False)
+                        negative_prompt = gr.Text(
+                            label="Negative prompt",
+                            max_lines=3,
+                            lines=3,
+                            placeholder="Enter a negative prompt",
+                            visible=False,
+                        )
+
+                    seed = gr.Slider(
+                        label="Seed",
+                        minimum=0,
+                        maximum=1000000,
+                        step=1,
+                        value=initial_values['seed'],
+                    )
+                    randomize_seed = gr.Checkbox(label="Randomize seed", value=False)
+                    seed_output = gr.Number(label="Used Seed")
+        
+            with gr.Column(scale=1, elem_classes="video-column"):
+                result = gr.Video(
+                    label="Generated Video", 
+                    show_label=True,
+                    height=466,
+                    width=600,
+                    container=True,
+                    elem_classes="video-component"
+                )
+        
+        gr.HTML("""
+        <style>
+        .center-button {
+            display: flex !important;
+            justify-content: center !important;
+            height: 100% !important;
+            padding-top: 1.4em !important;
+        }
+        
+        .gradio-container {
+            max-width: 1200px !important;
+            margin: 0 auto !important;
+        }
+        
+        .main {
+            max-width: 1200px !important;
+            margin: 0 auto !important;
+        }
+        
+        .gr-form, .gr-box, .gr-group {
+            max-width: 1200px !important;
+        }
+        
+        .gr-video {
+            max-width: 500px !important;
+            margin: 0 auto !important;
+        }
+        
+        .main-content-row {
+            display: flex !important;
+            align-items: flex-start !important;
+            min-height: 500px !important;
+            gap: 20px !important;
+        }
+        
+        .advanced-options-column,
+        .video-column {
+            display: flex !important;
+            flex-direction: column !important;
+            flex: 1 !important;
+            min-height: 400px !important;
+            align-items: stretch !important;
+        }
+        
+        .video-column > * {
+            margin-top: 0 !important;
+        }
+        
+        .video-column .gr-video,
+        .video-component {
+            margin-top: 0 !important;
+            padding-top: 0 !important;
+        }
+        
+        .video-column .gr-video .gr-form {
+            margin-top: 0 !important;
+        }
+        
+        .advanced-options-column .gr-group,
+        .video-column .gr-video {
+            margin-top: 0 !important;
+            vertical-align: top !important;
+        }
+        
+        .advanced-options-column > *:last-child,
+        .video-column > *:last-child {
+            flex-grow: 0 !important;
+        }
+        
+        @media (max-width: 1400px) {
+            .main-content-row {
+                min-height: 600px !important;
+            }
+            
+            .advanced-options-column,
+            .video-column {
+                min-height: 600px !important;
+            }
+        }
+        
+        @media (max-width: 1200px) {
+            .main-content-row {
+                flex-direction: column !important;
+                align-items: stretch !important;
+            }
+            
+            .advanced-options-column,
+            .video-column {
+                min-height: auto !important;
+                width: 100% !important;
+            }
+        }
+        
+        .timing-card {
+            background: var(--background-fill-secondary) !important;
+            border: 1px solid var(--border-color-primary) !important;
+            color: var(--body-text-color) !important;
+            padding: 10px;
+            border-radius: 8px;
+            text-align: center;
+            min-height: 80px;
+            display: flex;
+            flex-direction: column;
+            justify-content: center;
+        }
+        
+        .timing-card-highlight {
+            background: var(--background-fill-primary) !important;
+            border: 2px solid var(--color-accent) !important;
+        }
+        
+        .performance-card {
+            background: var(--background-fill-secondary) !important;
+            border: 1px solid var(--border-color-primary) !important;
+            color: var(--body-text-color) !important;
+            padding: 10px;
+            border-radius: 6px;
+            text-align: center;
+        }
+        
+        .gr-number input[readonly] {
+            background-color: var(--background-fill-secondary) !important;
+            border: 1px solid var(--border-color-primary) !important;
+            color: var(--body-text-color-subdued) !important;
+            cursor: default !important;
+            text-align: center !important;
+            font-weight: 500 !important;
+        }
+        </style>
+        """)
+        
+        def on_example_select(example_label):
+            if example_label and example_label in example_labels:
+                index = example_labels.index(example_label)
+                return examples[index]
+            return ""
+        
+        example_dropdown.change(
+            fn=on_example_select,
+            inputs=example_dropdown,
+            outputs=prompt,
+        )
+        
+        gr.HTML("""
+        <div style="text-align: center; margin-top: 10px; margin-bottom: 15px;">
+            <p style="font-size: 16px; margin: 0;">The compute for this demo is generously provided by <a href="https://www.gmicloud.ai/" target="_blank">GMI Cloud</a>. Note that this demo is meant to showcase FastWan's quality and that under a large number of requests, generation speed may be affected. We are also rate-limiting users to 3 requests per minute.</p>
+        </div>
+        """)
+        
+        use_negative_prompt.change(
+            fn=lambda x: gr.update(visible=x),
+            inputs=use_negative_prompt,
+            outputs=negative_prompt,
+        )
+        
+        def on_model_selection_change(selected_model):
+            if not selected_model:
+                selected_model = "FastWan2.1-T2V-1.3B"
+            
+            model_path = MODEL_PATH_MAPPING.get(selected_model)
+            
+            if model_path and model_path in default_params:
+                params = default_params[model_path]
+                return (
+                    gr.update(value=params.height),
+                    gr.update(value=params.width),
+                    gr.update(value=params.num_frames),
+                    gr.update(value=params.guidance_scale),
+                    gr.update(value=params.seed),
+                )
+            
+            return (
+                gr.update(value=448),
+                gr.update(value=832),
+                gr.update(value=61),
+                gr.update(value=3.0),
+                gr.update(value=1024),
+            )
+        
+        model_selection.change(
+            fn=on_model_selection_change,
+            inputs=model_selection,
+            outputs=[height, width, num_frames, guidance_scale, seed],
+        )
+        
+        def handle_generation(*args, progress=None, request: gr.Request = None):
+            model_selection, prompt, negative_prompt, use_negative_prompt, seed, guidance_scale, num_frames, height, width, randomize_seed = args
+            
+            result_path, seed_or_error, timing_details = generate_video(
+                prompt, negative_prompt, use_negative_prompt, seed, guidance_scale, 
+                num_frames, height, width, randomize_seed, model_selection, progress
+            )
+            if result_path and os.path.exists(result_path):
+                return (
+                    result_path, 
+                    seed_or_error, 
+                    gr.update(visible=False),
+                    gr.update(visible=True, value=timing_details),
+                )
+            else:
+                return (
+                    None, 
+                    seed_or_error, 
+                    gr.update(visible=True, value=seed_or_error),
+                    gr.update(visible=False),
+                )
+        
+        run_button.click(
+            fn=handle_generation,
+            inputs=[
+                model_selection,
+                prompt,
+                negative_prompt,
+                use_negative_prompt,
+                seed,
+                guidance_scale,
+                num_frames,
+                height,
+                width,
+                randomize_seed,
+            ],
+            outputs=[result, seed_output, error_output, timing_display],
+            concurrency_limit=20,
+        )
+    
+    return demo
+
+
+def main():
+    parser = argparse.ArgumentParser(description="FastVideo Gradio Local Demo")
+    parser.add_argument("--t2v_model_paths", type=str,
+                        default="FastVideo/FastWan2.1-T2V-1.3B-Diffusers",
+                        help="Comma separated list of paths to the T2V model(s)")
+    parser.add_argument("--host", type=str, default="0.0.0.0",
+                        help="Host to bind to")
+    parser.add_argument("--port", type=int, default=7860,
+                        help="Port to bind to")
+    args = parser.parse_args()
+    generators = {}
+    default_params = {}
+    model_paths = args.t2v_model_paths.split(",")
+    for model_path in model_paths:
+        print(f"Loading model: {model_path}")
+        setup_model_environment(model_path)
+        generators[model_path] = VideoGenerator.from_pretrained(model_path)
+        default_params[model_path] = SamplingParam.from_pretrained(model_path)
+    demo = create_gradio_interface(default_params, generators)
+    print(f"Starting Gradio frontend at http://{args.host}:{args.port}")
+    print(f"T2V Models: {args.t2v_model_paths}")
+    
+    from fastapi import FastAPI, Request, HTTPException
+    from fastapi.responses import HTMLResponse, FileResponse
+    import uvicorn
+    
+    app = FastAPI()
+    
+    @app.get("/logo.png")
+    def get_logo():
+        return FileResponse(
+            "assets/full.svg",
+            media_type="image/svg+xml",
+            headers={
+                "Cache-Control": "public, max-age=3600",
+                "Access-Control-Allow-Origin": "*"
+            }
+        )
+    
+    @app.get("/favicon.ico")
+    def get_favicon():
+        favicon_path = "assets/icon-simple.svg"
+        
+        if os.path.exists(favicon_path):
+            return FileResponse(
+                favicon_path, 
+                media_type="image/svg+xml",
+                headers={
+                    "Cache-Control": "public, max-age=3600",
+                    "Access-Control-Allow-Origin": "*"
+                }
+            )
+        else:
+            raise HTTPException(status_code=404, detail="Favicon not found")
+    
+    @app.get("/", response_class=HTMLResponse)
+    def index(request: Request):
+        base_url = str(request.base_url).rstrip('/')
+        return f"""
+        <!DOCTYPE html>
+        <html lang="en">
+        <head>
+            <meta charset="UTF-8" />
+            <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+            
+            <title>FastWan</title>
+            <meta name="title" content="FastWan">
+            <meta name="description" content="Make video generation go blurrrrrrr">
+            <meta name="keywords" content="FastVideo, video generation, AI, machine learning, FastWan">
+            
+            <meta property="og:type" content="website">
+            <meta property="og:url" content="{base_url}/">
+            <meta property="og:title" content="FastWan">
+            <meta property="og:description" content="Make video generation go blurrrrrrr">
+            <meta property="og:image" content="{base_url}/logo.png">
+            <meta property="og:image:width" content="1200">
+            <meta property="og:image:height" content="630">
+            <meta property="og:site_name" content="FastWan">
+            
+            <meta property="twitter:card" content="summary_large_image">
+            <meta property="twitter:url" content="{base_url}/">
+            <meta property="twitter:title" content="FastWan">
+            <meta property="twitter:description" content="Make video generation go blurrrrrrr">
+            <meta property="twitter:image" content="{base_url}/logo.png">
+            <link rel="icon" type="image/png" sizes="32x32" href="/favicon.ico">
+            <link rel="icon" type="image/png" sizes="16x16" href="/favicon.ico">
+            <link rel="apple-touch-icon" href="/favicon.ico">
+            <style>
+                body, html {{
+                    margin: 0;
+                    padding: 0;
+                    height: 100%;
+                    overflow: hidden;
+                }}
+                iframe {{
+                    width: 100%;
+                    height: 100vh;
+                    border: none;
+                }}
+            </style>
+        </head>
+        <body>
+            <iframe src="/gradio" width="100%" height="100%" style="border: none;"></iframe>
+        </body>
+        </html>
+        """
+    
+    app = gr.mount_gradio_app(
+        app, 
+        demo, 
+        path="/gradio",
+        allowed_paths=[os.path.abspath("outputs"), os.path.abspath("fastvideo-logos")]
+    )
+    
+    uvicorn.run(app, host=args.host, port=args.port)
+
+
+if __name__ == "__main__":
+    
+    main() 
\ No newline at end of file
diff --git a/examples/inference/gradio/local/prompts_final.txt b/examples/inference/gradio/local/prompts_final.txt
new file mode 100644
index 000000000..8334cfbe1
--- /dev/null
+++ b/examples/inference/gradio/local/prompts_final.txt
@@ -0,0 +1,11 @@
+A dynamic shot of a sleek black motorcycle accelerating down an empty highway at sunset. The bike's engine roars as it gains speed, smoke trailing from the tires. The rider, wearing a black leather jacket and helmet, leans forward with determination, gripping the handlebars tightly. The camera follows the motorcycle from a distance, capturing the dust kicked up behind it, then zooms in to show the intense focus on the rider's face. The background showcases the endless road stretching into the horizon with vibrant orange and pink hues of the setting sun. Medium shot transitioning to close-up.
+A Jedi Master Yoda, recognizable by his green skin, large ears, and wise wrinkles, is performing on a small stage, strumming a guitar with great concentration. Yoda wears a casual robe and sits on a stool, his eyes closed as he plays, fully immersed in the music. The stage is dimly lit with spotlights highlighting Yoda, creating a mystical atmosphere. The background shows a live audience watching intently. Medium close-up shot focusing on Yoda's expressive face and hands moving gracefully over the guitar strings.
+A cute, fluffy panda bear is preparing a meal in a cozy, modern kitchen. The panda is standing at a wooden countertop, wearing a white chef’s hat and apron. It skillfully stirs a pot on the stove with one hand while holding a spatula in the other. The kitchen is well-lit, with appliances and cabinets in pastel colors, creating a warm and inviting atmosphere. The panda moves gracefully, with a focused and determined expression, as steam rises from the pot. Medium shot focusing on the panda’s actions at the stove.
+In a futuristic Tokyo rooftop during a heavy rainstorm, a robotic DJ stands behind a turntable, spinning vinyl records in a cyberpunk night setting. The robot has metallic, sleek body parts with glowing blue LED lights, and it moves gracefully with the beat. Raindrops create a shimmering effect as they hit the ground and the DJ. The surrounding environment features neon signs, towering skyscrapers, and a dark, misty atmosphere. The camera starts with a wide shot of the city skyline before zooming in on the DJ performing. Sci-fi, fantasy.
+A realistic animated scene featuring a polar bear playing a guitar. The polar bear is standing upright, wearing a cozy fur vest and fingerless gloves. It holds the guitar with both hands, strumming the strings with one hand while plucking them with the other, showcasing natural, fluid motions. The polar bear's expressive face shows concentration and joy as it plays. The background is a snowy Arctic landscape with icebergs and a clear blue sky. The scene captures the bear from a mid-shot angle, focusing on its interaction with the guitar.
+The scene opens to a breathtaking view of a tranquil ocean horizon at dusk, displaying a vibrant tapestry of oranges, pinks, and purples as the sun sets. In the foreground, tall, swaying palm trees frame the scene, their silhouettes stark against the colorful sky. The ocean itself shimmers with reflections of the sunset, creating a peaceful, almost ethereal atmosphere. A small boat can be seen in the distance, centered on the horizon, adding a sense of scale and solitude to the scene. The waves gently lap the shore, creating faint patterns on the sandy beach, which stretches across the foreground. Above, the sky is dotted with scattered clouds that catch the last light of the day, enhancing the drama and beauty of the scene. The overall mood is serene and contemplative, capturing a perfect moment of nature’s grandeur.
+A large, modern semi-truck accelerating down an empty highway, gaining speed with each second. The truck's powerful engine roars as it moves forward, smoke billowing from the tires. The camera starts from a wide shot, capturing the truck in the distance, then smoothly zooms in to follow the vehicle as it speeds up. The truck's headlights illuminate the road ahead, casting a bright glow. The truck driver can be seen through the windshield, focused and determined. The background shows the vast openness of the highway stretching into the horizon under a clear blue sky. Medium to close-up shots of the truck as it accelerates.
+Soft blue light pulses from the blade’s rune-etched hilt, illuminating nearby moss-covered roots and ferns. The surrounding trees are tall and gnarled, their branches curling like claws overhead. Fog swirls gently at ground level, parting slightly as a figure in a cloak approaches from the distance. Medium shot slowly zooming toward the sword, emphasizing its mystical aura.
+The video opens with a tranquil scene in the heart of a dense forest, emphasizing two large, textured tree trunks in the foreground framing the view. Sunlight filters through the canopy above, casting intricate patterns of light and shadow on the trees and the ground. Between the tree trunks, a clear view of a calm, muddy river unfolds, its surface shimmering under the gentle sunlight. The riverbank is decorated with a variety of small bushes and vibrant foliage, subtly transitioning into the deep greens of tall, leafy plants. In the background, the dense forest looms, filled with dark, towering trees, their branches intertwining to form an intricate canopy. The scene is bathed in the soft glow of the sun, creating a serene and picturesque setting. Occasional sunbeams pierce through the foliage, adding a magical aura to the landscape. The vibrant reds and oranges of the smaller plants add contrast, bringing warmth to the earthy tones of the scenery. Overall, this harmonious blend of natural elements creates a peaceful and idyllic forest setting.
+A lone figure stands on a large, moss-covered rock, surrounded by the soft rush of a nearby stream. The figure is wearing white sneakers and shorts, with a plaid shirt that hangs loosely in the breeze. The lighting creates dramatic shadows, enhancing the textures of the rock and the subtle movement of the water below. In the background, a waterfall cascades into the stream, completing this tranquil and serene nature scene.
+In an industrial setting, a person leans casually against a railing, exuding a sense of confidence and composure. They are wearing a striking outfit, consisting of a vibrant, patterned jacket over a simple white crop top, creating a bold contrast. The atmosphere is infused with warm, ambient lighting that casts soft shadows on the concrete walls and metallic surfaces. Intricate wiring and pipes form an intricate backdrop, enhancing the urban aesthetic. Their relaxed posture and direct, engaging gaze suggest a sense of ease in this industrial environment. This scene encapsulates a blend of modern fashion and gritty, urban architecture, creating a visually compelling narrative.
diff --git a/examples/inference/gradio/gradio_frontend.py b/examples/inference/gradio/serving/gradio_frontend.py
similarity index 100%
rename from examples/inference/gradio/gradio_frontend.py
rename to examples/inference/gradio/serving/gradio_frontend.py
diff --git a/examples/inference/gradio/ray_serve_backend.py b/examples/inference/gradio/serving/ray_serve_backend.py
similarity index 100%
rename from examples/inference/gradio/ray_serve_backend.py
rename to examples/inference/gradio/serving/ray_serve_backend.py
diff --git a/examples/inference/gradio/start.sh b/examples/inference/gradio/serving/start.sh
similarity index 100%
rename from examples/inference/gradio/start.sh
rename to examples/inference/gradio/serving/start.sh
diff --git a/examples/inference/gradio/start_ray_serve_app.py b/examples/inference/gradio/serving/start_ray_serve_app.py
similarity index 100%
rename from examples/inference/gradio/start_ray_serve_app.py
rename to examples/inference/gradio/serving/start_ray_serve_app.py
diff --git a/fastvideo/entrypoints/video_generator.py b/fastvideo/entrypoints/video_generator.py
index a24ea4554..3a29eb016 100644
--- a/fastvideo/entrypoints/video_generator.py
+++ b/fastvideo/entrypoints/video_generator.py
@@ -8,6 +8,7 @@
 
 import math
 import os
+import re
 import time
 from copy import deepcopy
 from typing import Any
@@ -110,7 +111,7 @@ def generate_video(
             prompt: The prompt to use for generation (optional if prompt_txt is provided)
             negative_prompt: The negative prompt to use (overrides the one in fastvideo_args)
             output_path: Path to save the video (overrides the one in fastvideo_args)
-            output_video_name: Name of the video file to save. Default is the first 100 characters of the prompt.
+            prompt_path: Path to prompt file
             save_video: Whether to save the video to disk
             return_frames: Whether to return the raw frames
             num_inference_steps: Number of denoising steps (overrides fastvideo_args)
@@ -127,8 +128,13 @@ def generate_video(
             Either the output dictionary, list of frames, or list of results for batch processing
         """
         # Handle batch processing from text file
-        if self.fastvideo_args.prompt_txt is not None:
-            prompt_txt_path = self.fastvideo_args.prompt_txt
+        if sampling_param is None:
+            sampling_param = SamplingParam.from_pretrained(
+                self.fastvideo_args.model_path)
+        sampling_param.update(kwargs)
+
+        if self.fastvideo_args.prompt_txt is not None or sampling_param.prompt_path is not None:
+            prompt_txt_path = sampling_param.prompt_path or self.fastvideo_args.prompt_txt
             if not os.path.exists(prompt_txt_path):
                 raise FileNotFoundError(
                     f"Prompt text file not found: {prompt_txt_path}")
@@ -142,22 +148,19 @@ def generate_video(
 
             logger.info("Found %d prompts in %s", len(prompts), prompt_txt_path)
 
-            if sampling_param is not None:
-                original_output_video_name = sampling_param.output_video_name
-            else:
-                original_output_video_name = None
-
             results = []
             for i, batch_prompt in enumerate(prompts):
                 logger.info("Processing prompt %d/%d: %s...", i + 1,
                             len(prompts), batch_prompt[:100])
-
                 try:
                     # Generate video for this prompt using the same logic below
-                    if sampling_param is not None and original_output_video_name is not None:
-                        sampling_param.output_video_name = original_output_video_name + f"_{i}"
+                    output_path = self._prepare_output_path(
+                        sampling_param.output_path, batch_prompt)
+                    kwargs["output_path"] = output_path
                     result = self._generate_single_video(
-                        batch_prompt, sampling_param, **kwargs)
+                        prompt=batch_prompt,
+                        sampling_param=sampling_param,
+                        **kwargs)
 
                     # Add prompt info to result
                     if isinstance(result, dict):
@@ -181,8 +184,40 @@ def generate_video(
         # Single prompt generation (original behavior)
         if prompt is None:
             raise ValueError("Either prompt or prompt_txt must be provided")
-
-        return self._generate_single_video(prompt, sampling_param, **kwargs)
+        output_path = self._prepare_output_path(sampling_param.output_path,
+                                                prompt)
+        kwargs["output_path"] = output_path
+        return self._generate_single_video(prompt=prompt,
+                                           sampling_param=sampling_param,
+                                           **kwargs)
+
+    def _prepare_output_path(
+        self,
+        output_path: str,
+        prompt: str,
+    ) -> str:
+        base_path, extension = os.path.splitext(output_path)
+        if extension == ".mp4":
+            output_dir = os.path.dirname(output_path)
+            video_name = re.sub(r'[\/:*?"<>|]', '',
+                                os.path.basename(output_path))
+            if video_name != os.path.basename(output_path):
+                print(
+                    f"The video name '{os.path.basename(output_path)}' contained invalid characters. It has been renamed to '{video_name}'"
+                )
+        else:
+            output_dir = output_path
+            video_name = re.sub(r'[\/:*?"<>|]', '', prompt[:100] + ".mp4")
+        if output_dir:
+            os.makedirs(output_dir, exist_ok=True)
+        new_output_path = os.path.join(output_dir, video_name)
+        counter = 1
+        while os.path.exists(new_output_path):
+            name_part, ext_part = os.path.splitext(video_name)
+            new_video_name = f"{name_part}_{counter}{ext_part}"
+            new_output_path = os.path.join(output_dir, new_video_name)
+            counter += 1
+        return new_output_path
 
     def _generate_single_video(
         self,
@@ -200,15 +235,9 @@ def _generate_single_video(
             raise TypeError(
                 f"`prompt` must be a string, but got {type(prompt)}")
         prompt = prompt.strip()
-        if sampling_param is None:
-            sampling_param = SamplingParam.from_pretrained(
-                fastvideo_args.model_path)
-        else:
-            sampling_param = deepcopy(sampling_param)
-
-        kwargs["prompt"] = prompt
-        sampling_param.update(kwargs)
-
+        sampling_param = deepcopy(sampling_param)
+        output_path = kwargs["output_path"]
+        sampling_param.prompt = prompt
         # Process negative prompt
         if sampling_param.negative_prompt is not None:
             sampling_param.negative_prompt = sampling_param.negative_prompt.strip(
@@ -277,7 +306,7 @@ def _generate_single_video(
                       height: {target_height}
                        width: {target_width}
                 video_length: {sampling_param.num_frames}
-                      prompt: {prompt}
+                      prompt: {sampling_param.prompt}
                       image_path: {sampling_param.image_path}
                   neg_prompt: {sampling_param.negative_prompt}
                         seed: {sampling_param.seed}
@@ -288,7 +317,7 @@ def _generate_single_video(
                   flow_shift: {fastvideo_args.pipeline_config.flow_shift}
      embedded_guidance_scale: {fastvideo_args.pipeline_config.embedded_cfg_scale}
                   save_video: {sampling_param.save_video}
-                  output_path: {sampling_param.output_path}
+                  output_path: {output_path}
         """ # type: ignore[attr-defined]
         logger.info(debug_str)
 
@@ -301,10 +330,6 @@ def _generate_single_video(
             extra={},
         )
 
-        # Use prompt[:100] for video name
-        if batch.output_video_name is None:
-            batch.output_video_name = prompt[:100]
-
         # Run inference
         start_time = time.perf_counter()
         output_batch = self.executor.execute_forward(batch, fastvideo_args)
@@ -324,15 +349,8 @@ def _generate_single_video(
 
         # Save video if requested
         if batch.save_video:
-            output_path = batch.output_path
-            if output_path:
-                os.makedirs(output_path, exist_ok=True)
-                video_path = os.path.join(output_path,
-                                          f"{batch.output_video_name}.mp4")
-                imageio.mimsave(video_path, frames, fps=batch.fps, format="mp4")
-                logger.info("Saved video to %s", video_path)
-            else:
-                logger.warning("No output path provided, video not saved")
+            imageio.mimsave(output_path, frames, fps=batch.fps, format="mp4")
+            logger.info("Saved video to %s", output_path)
 
         if batch.return_frames:
             return frames
diff --git a/fastvideo/platforms/cuda.py b/fastvideo/platforms/cuda.py
index 20ca613fb..46caa2a1a 100644
--- a/fastvideo/platforms/cuda.py
+++ b/fastvideo/platforms/cuda.py
@@ -158,7 +158,9 @@ def get_attn_backend_cls(cls, selected_backend: AttentionBackendEnum | None,
                     "Failed to import Video Sparse Attention backend: %s",
                     str(e))
                 raise ImportError(
-                    "Video Sparse Attention backend is not installed. ") from e
+                    "The Video Sparse Attention backend is not installed.To install it, please follow the instructions at: https://hao-ai-lab.github.io/FastVideo/video_sparse_attention/installation.html "
+                ) from e
+
         elif selected_backend == AttentionBackendEnum.TORCH_SDPA:
             logger.info("Using Torch SDPA backend.")
             return "fastvideo.attention.backends.sdpa.SDPABackend"