pilarcode · pilarcode · Apr 4, 2025
diff --git a/src/multiagents/video_agent.py b/src/multiagents/video_agent.py
@@ -0,0 +1,132 @@
+import subprocess
+import time
+from pathlib import Path
+
+from agno.agent import Agent
+from agno.media import Video
+from agno.models.google import Gemini
+from agno.utils.log import logger
+from google.generativeai import get_file, upload_file
+
+video_path = Path(__file__).parent.joinpath("sample.mp4")
+output_dir = Path("tmp/shorts")
+
+agent = Agent(
+    name="Video2Shorts",
+    description="Process videos and generate engaging shorts.",
+    model=Gemini(id="gemini-2.0-flash-exp"),
+    markdown=True,
+    debug_mode=True,
+    instructions=[
+        "Analyze the provided video directly—do NOT reference or analyze any external sources or YouTube videos.",
+        "Identify engaging moments that meet the specified criteria for short-form content.",
+        """Provide your analysis in a **table format** with these columns:
+   - Start Time | End Time | Description | Importance Score""",
+        "Ensure all timestamps use MM:SS format and importance scores range from 1-10. ",
+        "Focus only on segments between 15 and 60 seconds long.",
+        "Base your analysis solely on the provided video content.",
+        "Deliver actionable insights to improve the identified segments for short-form optimization.",
+    ],
+)
+
+# Upload and process video
+video_file = upload_file(video_path)
+while video_file.state.name == "PROCESSING":
+    time.sleep(2)
+    video_file = get_file(video_file.name)
+
+# Multimodal Query for Video Analysis
+query = """
+You are an expert in video content creation, specializing in crafting engaging short-form content for platforms like YouTube Shorts and Instagram Reels. Your task is to analyze the provided video and identify segments that maximize viewer engagement.
+
+For each video, you'll:
+
+1. Identify key moments that will capture viewers' attention, focusing on:
+   - High-energy sequences
+   - Emotional peaks
+   - Surprising or unexpected moments
+   - Strong visual and audio elements
+   - Clear narrative segments with compelling storytelling
+
+2. Extract segments that work best for short-form content, considering:
+   - Optimal length (strictly 15–60 seconds)
+   - Natural start and end points that ensure smooth transitions
+   - Engaging pacing that maintains viewer attention
+   - Audio-visual harmony for an immersive experience
+   - Vertical format compatibility and adjustments if necessary
+
+3. Provide a detailed analysis of each segment, including:
+   - Precise timestamps (Start Time | End Time in MM:SS format)
+   - A clear description of why the segment would be engaging
+   - Suggestions on how to enhance the segment for short-form content
+   - An importance score (1-10) based on engagement potential
+
+Your goal is to identify moments that are visually compelling, emotionally engaging, and perfectly optimized for short-form platforms.
+"""
+
+# Generate Video Analysis
+response = agent.run(query, videos=[Video(content=video_file)])
+
+# Create output directory
+output_dir = Path(output_dir)
+output_dir.mkdir(parents=True, exist_ok=True)
+
+# Extract and cut video segments
+def extract_segments(response_text):
+    import re
+
+    segments_pattern = r"\|\s*(\d+:\d+)\s*\|\s*(\d+:\d+)\s*\|\s*(.*?)\s*\|\s*(\d+)\s*\|"
+    segments: list[dict] = []
+
+    for match in re.finditer(segments_pattern, str(response_text)):
+        start_time = match.group(1)
+        end_time = match.group(2)
+        description = match.group(3)
+        score = int(match.group(4))
+
+        start_seconds = sum(x * int(t) for x, t in zip([60, 1], start_time.split(":")))
+        end_seconds = sum(x * int(t) for x, t in zip([60, 1], end_time.split(":")))
+        duration = end_seconds - start_seconds
+
+        if 15 <= duration <= 60 and score > 7:
+            output_path = output_dir / f"short_{len(segments) + 1}.mp4"
+
+            command = [
+                "ffmpeg",
+                "-ss",
+                str(start_seconds),
+                "-i",
+                video_path,
+                "-t",
+                str(duration),
+                "-vf",
+                "scale=1080:1920,setsar=1:1",
+                "-c:v",
+                "libx264",
+                "-c:a",
+                "aac",
+                "-y",
+                str(output_path),
+            ]
+
+            try:
+                subprocess.run(command, check=True)
+                segments.append(
+                    {"path": output_path, "description": description, "score": score}
+                )
+            except subprocess.CalledProcessError:
+                print(f"Failed to process segment: {start_time} - {end_time}")
+
+    return segments
+
+logger.debug(f"{response.content}")
+
+# Process segments
+shorts = extract_segments(response.content)
+
+# Print results
+print("\n--- Generated Shorts ---")
+for short in shorts:
+    print(f"Short at {short['path']}")
+    print(f"Description: {short['description']}")
+    print(f"Engagement Score: {short['score']}/10\n")