codefortulsa
diff --git a/‎.gitignore‎
Lines changed: 2 additions & 0 deletions b/‎.gitignore‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 27 additions & 5 deletions b/‎README.md‎
Lines changed: 27 additions & 5 deletions
diff --git a/‎data/meetings.csv‎
Lines changed: 0 additions & 7153 deletions b/‎data/meetings.csv‎
Lines changed: 0 additions & 7153 deletions
diff --git a/‎flows/__init__.py‎ b/‎flows/__init__.py‎
diff --git a/‎flows/translate_meetings.py‎
Lines changed: 17 additions & 0 deletions b/‎flows/translate_meetings.py‎
Lines changed: 17 additions & 0 deletions
diff --git a/‎NOTEBOOK_GUIDELINES.md‎ renamed to ‎notebook_precommit.md‎ b/‎NOTEBOOK_GUIDELINES.md‎ renamed to ‎notebook_precommit.md‎
diff --git a/‎notebooks/meetings.ipynb‎
Lines changed: 367 additions & 42 deletions b/‎notebooks/meetings.ipynb‎
Lines changed: 367 additions & 42 deletions
diff --git a/‎notebooks/vtt_subtitles.ipynb‎
Lines changed: 3 additions & 3 deletions b/‎notebooks/vtt_subtitles.ipynb‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎pyproject.toml‎
Lines changed: 8 additions & 5 deletions b/‎pyproject.toml‎
Lines changed: 8 additions & 5 deletions
diff --git a/‎scripts/download_m3u8.py‎
Lines changed: 0 additions & 176 deletions b/‎scripts/download_m3u8.py‎
Lines changed: 0 additions & 176 deletions
@@ -1,5 +1,6 @@
 # Environment variables
 .env
+.envrc
 
 data/
 models/
@@ -16,6 +17,7 @@ __pycache__/
 *.py[cod]
 *.so
 .Python
+.python-version
 build/
 develop-eggs/
 dist/
 
@@ -14,17 +14,35 @@ poetry install --no-root
 poetry self add poetry-plugin-shell
 poetry shell
 
-# Install Jupyter kernel for this environment (needed for Jupyter notebooks)
-python -m ipykernel install --user --name=tgov-scraper --display-name="TGOV Scraper"
+# Set up pre-commit hooks
+poetry run pre-commit install
+
+# Verify pre-commit hooks are working
+poetry run pre-commit run --all-files
+
+# See notebook_precommit.md for more details on how notebook outputs are automatically stripped
 ```
 
 ## Running
+### Jupyter notebooks
 
 ```bash
+# Install Jupyter kernel for this environment (needed for Jupyter notebooks)
+python -m ipykernel install --user --name=tgov-scraper --display-name="TGOV Scraper"
+
 jupyter notebook
 ```
 
-## Running Tests
+### Prefect flows
+See https://docs.prefect.io/get-started
+
+```bash
+prefect server start                      # to start the persistent server
+
+python -m flows.translate_meetings        # to run a specific flow
+```
+
+### Tests
 
 ```bash
 # Run all tests
@@ -39,12 +57,16 @@ pytest -v
 
 ## Project Structure
 
+- `data/`: local data artifacts
+- `flows/`: prefect flows
+- `notebooks/`: Jupyter notebooks for analysis and exploration
+- `scripts/`: one off scripts for downloading, conversions, etc
 - `src/`: Source code for the scraper
   - `models/`: Pydantic models for data representation
-- 'scripts`: one off scripts for downloading, conversions, etc
+- `tasks/`: prefect tasks
 - `tests/`: Test files
 - `notebooks/`: Jupyter notebooks for analysis and exploration
-- `data/`: output from notebooks 
+- `data/`: output from notebooks
 
 
 ## Running the transcription scripts
 
@@ -0,0 +1,17 @@
+from prefect import flow
+
+from tasks.meetings import create_meetings_csv
+
+
+@flow(log_prints=True)
+async def translate_meetings():
+    await create_meetings_csv()
+    # TODO: await download_videos()
+    # TODO: await transcribe_videos()
+    # TODO: await diarize_transcriptions()
+    # TODO: await translate_transcriptions()
+    # TODO: await create_subtitled_video_pages()
+
+if __name__ == "__main__":
+    import asyncio
+    asyncio.run(translate_meetings())
@@ -70,17 +70,17 @@
     "\n",
     "# Import from the new subtitles module\n",
     "from src.subtitles import create_track, load_transcript\n",
-    "from src.models.subtitles import TrackFormat\n",
+    "from src.models.subtitles import SubtitleTrack\n",
     "\n",
     "# Path to the transcript file\n",
     "transcript_file = Path(\n",
     "    \"../data/transcripts/regular_council_meeting___2025_02_26.diarized.json\"\n",
     ")\n",
     "\n",
     "# Create VTT track\n",
-    "vtt_track = create_track(\n",
+    "vtt_track: SubtitleTrack = create_track(\n",
     "    transcript_data=transcript_file,\n",
-    "    track_format='vtt',\n",
+    "    format='vtt',\n",
     "    max_duration=5.0,\n",
     "    include_speaker_prefix=False,\n",
     ")\n",
 
@@ -2,11 +2,11 @@
 name = "tgov scraper"
 version = "0.1.0"
 description = "A set of scripts and notebooks for exploring Tulsa Government Access Television"
-authors = ["jdungan <johnadungan@gmail.com>"]
+authors = ["jdungan <johnadungan@gmail.com>", "groovecoder <luke@groovecoder.com>"]
 readme = "README.md"
 
 [tool.poetry.dependencies]
-python = "3.11.*"
+python = ">=3.11,<3.13"
 selectolax = "^0.3.28"
 aiohttp = "^3.11.13"
 pytest-asyncio = "^0.25.3"
@@ -25,14 +25,17 @@ jupyter-nbextensions-configurator = "^0.6.4"
 python-dotenv = "^1.0.1"
 aiofiles = "^24.1.0"
 faster-whisper = "^1.1.1"
+prefect = "^3.3.0"
+boto3 = "^1.37.24"
 
 
 [tool.poetry.group.dev.dependencies]
-jupyter = "^1.1.1"
+ipdb = "^0.13.13"
 ipykernel = "^6.29.5"
-pytest = "^8.0.0"
-pre-commit = "^4.2.0"
+jupyter = "^1.1.1"
 nbstripout = "^0.8.1"
+pre-commit = "^4.2.0"
+pytest = "^8.0.0"
 
 [build-system]
 requires = ["poetry-core"]