Skip to content

Commit 2df0eb4

Browse files
authored
fix: improved server side cleanup after ingest (#477)
1 parent d1f8a80 commit 2df0eb4

File tree

4 files changed

+21
-126
lines changed

4 files changed

+21
-126
lines changed

src/server/main.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
from server.metrics_server import start_metrics_server
2020
from server.routers import dynamic, index, ingest
2121
from server.server_config import templates
22-
from server.server_utils import lifespan, limiter, rate_limit_exception_handler
22+
from server.server_utils import limiter, rate_limit_exception_handler
2323

2424
# Load environment variables from .env file
2525
load_dotenv()
@@ -55,8 +55,8 @@
5555
environment=sentry_environment,
5656
)
5757

58-
# Initialize the FastAPI application with lifespan
59-
app = FastAPI(lifespan=lifespan, docs_url=None, redoc_url=None)
58+
# Initialize the FastAPI application
59+
app = FastAPI(docs_url=None, redoc_url=None)
6060
app.state.limiter = limiter
6161

6262
# Register the custom exception handler for rate limits

src/server/query_processor.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
from __future__ import annotations
44

5+
import shutil
56
from pathlib import Path
67
from typing import TYPE_CHECKING, cast
78

@@ -31,6 +32,17 @@
3132
from gitingest.schemas.ingestion import IngestionQuery
3233

3334

35+
def _cleanup_repository(clone_config: CloneConfig) -> None:
36+
"""Clean up the cloned repository after processing."""
37+
try:
38+
local_path = Path(clone_config.local_path)
39+
if local_path.exists():
40+
shutil.rmtree(local_path)
41+
logger.info("Successfully cleaned up repository", extra={"local_path": str(local_path)})
42+
except (PermissionError, OSError):
43+
logger.exception("Could not delete repository", extra={"local_path": str(clone_config.local_path)})
44+
45+
3446
async def _check_s3_cache(
3547
query: IngestionQuery,
3648
input_text: str,
@@ -292,6 +304,8 @@ async def process_query(
292304
_store_digest_content(query, clone_config, digest_content, summary, tree, content)
293305
except Exception as exc:
294306
_print_error(query.url, exc, max_file_size, pattern_type, pattern)
307+
# Clean up repository even if processing failed
308+
_cleanup_repository(clone_config)
295309
return IngestErrorResponse(error=str(exc))
296310

297311
if len(content) > MAX_DISPLAY_SIZE:
@@ -310,6 +324,9 @@ async def process_query(
310324

311325
digest_url = _generate_digest_url(query)
312326

327+
# Clean up the repository after successful processing
328+
_cleanup_repository(clone_config)
329+
313330
return IngestSuccessResponse(
314331
repo_url=input_text,
315332
short_repo_url=short_repo_url,

src/server/server_config.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
from fastapi.templating import Jinja2Templates
88

99
MAX_DISPLAY_SIZE: int = 300_000
10-
DELETE_REPO_AFTER: int = 60 * 60 # In seconds (1 hour)
1110

1211
# Slider configuration (if updated, update the logSliderToSize function in src/static/js/utils.js)
1312
DEFAULT_FILE_SIZE_KB: int = 5 * 1024 # 5 mb

src/server/server_utils.py

Lines changed: 1 addition & 122 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,12 @@
11
"""Utility functions for the server."""
22

3-
import asyncio
4-
import shutil
5-
import time
6-
from contextlib import asynccontextmanager, suppress
7-
from pathlib import Path
8-
from typing import AsyncGenerator
9-
10-
from fastapi import FastAPI, Request
3+
from fastapi import Request
114
from fastapi.responses import Response
125
from slowapi import Limiter, _rate_limit_exceeded_handler
136
from slowapi.errors import RateLimitExceeded
147
from slowapi.util import get_remote_address
158

16-
from gitingest.config import TMP_BASE_PATH
179
from gitingest.utils.logging_config import get_logger
18-
from server.server_config import DELETE_REPO_AFTER
1910

2011
# Initialize logger for this module
2112
logger = get_logger(__name__)
@@ -52,118 +43,6 @@ async def rate_limit_exception_handler(request: Request, exc: Exception) -> Resp
5243
raise exc
5344

5445

55-
@asynccontextmanager
56-
async def lifespan(_: FastAPI) -> AsyncGenerator[None, None]:
57-
"""Manage startup & graceful-shutdown tasks for the FastAPI app.
58-
59-
Returns
60-
-------
61-
AsyncGenerator[None, None]
62-
Yields control back to the FastAPI application while the background task runs.
63-
64-
"""
65-
task = asyncio.create_task(_remove_old_repositories())
66-
67-
yield # app runs while the background task is alive
68-
69-
task.cancel() # ask the worker to stop
70-
with suppress(asyncio.CancelledError):
71-
await task # swallow the cancellation signal
72-
73-
74-
async def _remove_old_repositories(
75-
base_path: Path = TMP_BASE_PATH,
76-
scan_interval: int = 60,
77-
delete_after: int = DELETE_REPO_AFTER,
78-
) -> None:
79-
"""Periodically delete old repositories/directories.
80-
81-
Every ``scan_interval`` seconds the coroutine scans ``base_path`` and deletes directories older than
82-
``delete_after`` seconds. The repository URL is extracted from the first ``.txt`` file in each directory
83-
and appended to ``history.txt``, assuming the filename format: "owner-repository.txt". Filesystem errors are
84-
logged and the loop continues.
85-
86-
Parameters
87-
----------
88-
base_path : Path
89-
The path to the base directory where repositories are stored (default: ``TMP_BASE_PATH``).
90-
scan_interval : int
91-
The number of seconds between scans (default: 60).
92-
delete_after : int
93-
The number of seconds after which a repository is considered old and will be deleted
94-
(default: ``DELETE_REPO_AFTER``).
95-
96-
"""
97-
while True:
98-
if not base_path.exists():
99-
await asyncio.sleep(scan_interval)
100-
continue
101-
102-
now = time.time()
103-
try:
104-
for folder in base_path.iterdir():
105-
if now - folder.stat().st_ctime <= delete_after: # Not old enough
106-
continue
107-
108-
await _process_folder(folder)
109-
110-
except (OSError, PermissionError):
111-
logger.exception("Error in repository cleanup", extra={"base_path": str(base_path)})
112-
113-
await asyncio.sleep(scan_interval)
114-
115-
116-
async def _process_folder(folder: Path) -> None:
117-
"""Append the repo URL (if discoverable) to ``history.txt`` and delete ``folder``.
118-
119-
Parameters
120-
----------
121-
folder : Path
122-
The path to the folder to be processed.
123-
124-
"""
125-
history_file = Path("history.txt")
126-
loop = asyncio.get_running_loop()
127-
128-
try:
129-
first_txt_file = next(folder.glob("*.txt"))
130-
except StopIteration: # No .txt file found
131-
return
132-
133-
# Append owner/repo to history.txt
134-
try:
135-
filename = first_txt_file.stem # "owner-repo"
136-
if "-" in filename:
137-
owner, repo = filename.split("-", 1)
138-
repo_url = f"{owner}/{repo}"
139-
await loop.run_in_executor(None, _append_line, history_file, repo_url)
140-
except (OSError, PermissionError):
141-
logger.exception("Error logging repository URL", extra={"folder": str(folder)})
142-
143-
# Delete the cloned repo
144-
try:
145-
await loop.run_in_executor(None, shutil.rmtree, folder)
146-
except PermissionError:
147-
logger.exception("No permission to delete folder", extra={"folder": str(folder)})
148-
except OSError:
149-
logger.exception("Could not delete folder", extra={"folder": str(folder)})
150-
151-
152-
def _append_line(path: Path, line: str) -> None:
153-
"""Append a line to a file.
154-
155-
Parameters
156-
----------
157-
path : Path
158-
The path to the file to append the line to.
159-
line : str
160-
The line to append to the file.
161-
162-
"""
163-
with path.open("a", encoding="utf-8") as fp:
164-
fp.write(f"{line}\n")
165-
166-
16746
## Color printing utility
16847
class Colors:
16948
"""ANSI color codes."""

0 commit comments

Comments
 (0)