Skip to content

Commit 20061d3

Browse files
committed
feat(logging): implement loguru + add json logging format
1 parent efe5a26 commit 20061d3

18 files changed

+369
-103
lines changed

.pre-commit-config.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,7 @@ repos:
117117
click>=8.0.0,
118118
'fastapi[standard]>=0.109.1',
119119
httpx,
120+
loguru>=0.7.0,
120121
pathspec>=0.12.1,
121122
prometheus-client,
122123
pydantic,
@@ -143,6 +144,7 @@ repos:
143144
click>=8.0.0,
144145
'fastapi[standard]>=0.109.1',
145146
httpx,
147+
loguru>=0.7.0,
146148
pathspec>=0.12.1,
147149
prometheus-client,
148150
pydantic,

CONTRIBUTING.md

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,8 +65,7 @@ If you ever get stuck, reach out on [Discord](https://discord.com/invite/zerRaGK
6565
9. **Run the local server** to sanity-check:
6666

6767
```bash
68-
cd src
69-
uvicorn server.main:app
68+
LOG_FORMAT=human python run_server.py
7069
```
7170

7271
Open [http://localhost:8000](http://localhost:8000) to confirm everything works.

Dockerfile

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,11 +37,12 @@ RUN set -eux; \
3737

3838
COPY --from=python-builder --chown=$UID:$GID /usr/local/lib/python3.13/site-packages/ /usr/local/lib/python3.13/site-packages/
3939
COPY --chown=$UID:$GID src/ ./
40+
COPY --chown=$UID:$GID run_server.py ./
4041

4142
RUN set -eux; \
4243
chown -R appuser:appuser /app
4344
USER appuser
4445

4546
EXPOSE 8000
4647
EXPOSE 9090
47-
CMD ["python", "-m", "uvicorn", "server.main:app", "--host", "0.0.0.0", "--port", "8000"]
48+
CMD ["python", "run_server.py"]

compose.yml

Lines changed: 48 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,45 @@
1-
# Common base configuration for all services
1+
x-base-environment: &base-environment
2+
# Python Configuration
3+
PYTHONUNBUFFERED: "1"
4+
PYTHONDONTWRITEBYTECODE: "1"
5+
# Host Configuration
6+
ALLOWED_HOSTS: ${ALLOWED_HOSTS:-gitingest.com,*.gitingest.com,localhost,127.0.0.1}
7+
# Metrics Configuration
8+
GITINGEST_METRICS_ENABLED: ${GITINGEST_METRICS_ENABLED:-true}
9+
GITINGEST_METRICS_HOST: ${GITINGEST_METRICS_HOST:-0.0.0.0}
10+
GITINGEST_METRICS_PORT: ${GITINGEST_METRICS_PORT:-9090}
11+
# Sentry Configuration
12+
GITINGEST_SENTRY_ENABLED: ${GITINGEST_SENTRY_ENABLED:-false}
13+
GITINGEST_SENTRY_DSN: ${GITINGEST_SENTRY_DSN:-}
14+
GITINGEST_SENTRY_TRACES_SAMPLE_RATE: ${GITINGEST_SENTRY_TRACES_SAMPLE_RATE:-1.0}
15+
GITINGEST_SENTRY_PROFILE_SESSION_SAMPLE_RATE: ${GITINGEST_SENTRY_PROFILE_SESSION_SAMPLE_RATE:-1.0}
16+
GITINGEST_SENTRY_PROFILE_LIFECYCLE: ${GITINGEST_SENTRY_PROFILE_LIFECYCLE:-trace}
17+
GITINGEST_SENTRY_SEND_DEFAULT_PII: ${GITINGEST_SENTRY_SEND_DEFAULT_PII:-true}
18+
19+
x-prod-environment: &prod-environment
20+
GITINGEST_SENTRY_ENVIRONMENT: ${GITINGEST_SENTRY_ENVIRONMENT:-production}
21+
22+
x-dev-environment: &dev-environment
23+
DEBUG: "true"
24+
LOG_LEVEL: "debug"
25+
RELOAD: "true"
26+
GITINGEST_SENTRY_ENVIRONMENT: ${GITINGEST_SENTRY_ENVIRONMENT:-development}
27+
# S3 Configuration for development
28+
S3_ENABLED: "true"
29+
S3_ENDPOINT: http://minio:9000
30+
S3_ACCESS_KEY: ${S3_ACCESS_KEY:-gitingest}
31+
S3_SECRET_KEY: ${S3_SECRET_KEY:-gitingest123}
32+
S3_BUCKET_NAME: ${S3_BUCKET_NAME:-gitingest-bucket}
33+
S3_REGION: ${S3_REGION:-us-east-1}
34+
S3_DIRECTORY_PREFIX: ${S3_DIRECTORY_PREFIX:-dev}
35+
S3_ALIAS_HOST: ${S3_ALIAS_HOST:-http://127.0.0.1:9000/${S3_BUCKET_NAME:-gitingest-bucket}}
36+
237
x-app-base: &app-base
338
ports:
439
- "${APP_WEB_BIND:-8000}:8000" # Main application port
540
- "${GITINGEST_METRICS_HOST:-127.0.0.1}:${GITINGEST_METRICS_PORT:-9090}:9090" # Metrics port
6-
environment:
7-
# Python Configuration
8-
- PYTHONUNBUFFERED=1
9-
- PYTHONDONTWRITEBYTECODE=1
10-
# Host Configuration
11-
- ALLOWED_HOSTS=${ALLOWED_HOSTS:-gitingest.com,*.gitingest.com,localhost,127.0.0.1}
12-
# Metrics Configuration
13-
- GITINGEST_METRICS_ENABLED=${GITINGEST_METRICS_ENABLED:-true}
14-
- GITINGEST_METRICS_HOST=${GITINGEST_METRICS_HOST:-127.0.0.1}
15-
- GITINGEST_METRICS_PORT=${GITINGEST_METRICS_PORT:-9090}
16-
# Sentry Configuration
17-
- GITINGEST_SENTRY_ENABLED=${GITINGEST_SENTRY_ENABLED:-false}
18-
- GITINGEST_SENTRY_DSN=${GITINGEST_SENTRY_DSN:-}
19-
- GITINGEST_SENTRY_TRACES_SAMPLE_RATE=${GITINGEST_SENTRY_TRACES_SAMPLE_RATE:-1.0}
20-
- GITINGEST_SENTRY_PROFILE_SESSION_SAMPLE_RATE=${GITINGEST_SENTRY_PROFILE_SESSION_SAMPLE_RATE:-1.0}
21-
- GITINGEST_SENTRY_PROFILE_LIFECYCLE=${GITINGEST_SENTRY_PROFILE_LIFECYCLE:-trace}
22-
- GITINGEST_SENTRY_SEND_DEFAULT_PII=${GITINGEST_SENTRY_SEND_DEFAULT_PII:-true}
2341
user: "1000:1000"
24-
command: ["python", "-m", "uvicorn", "server.main:app", "--host", "0.0.0.0", "--port", "8000"]
42+
command: ["python", "run_server.py"]
2543

2644
services:
2745
# Production service configuration
@@ -31,7 +49,7 @@ services:
3149
profiles:
3250
- prod
3351
environment:
34-
- GITINGEST_SENTRY_ENVIRONMENT=${GITINGEST_SENTRY_ENVIRONMENT:-production}
52+
<<: [*base-environment, *prod-environment]
3553
restart: unless-stopped
3654

3755
# Development service configuration
@@ -43,24 +61,13 @@ services:
4361
profiles:
4462
- dev
4563
environment:
46-
- DEBUG=true
47-
- GITINGEST_SENTRY_ENVIRONMENT=${GITINGEST_SENTRY_ENVIRONMENT:-development}
48-
# S3 Configuration
49-
- S3_ENABLED=true
50-
- S3_ENDPOINT=http://minio:9000
51-
- S3_ACCESS_KEY=${S3_ACCESS_KEY:-gitingest}
52-
- S3_SECRET_KEY=${S3_SECRET_KEY:-gitingest123}
53-
# Use lowercase bucket name to ensure compatibility with MinIO
54-
- S3_BUCKET_NAME=${S3_BUCKET_NAME:-gitingest-bucket}
55-
- S3_REGION=${S3_REGION:-us-east-1}
56-
- S3_DIRECTORY_PREFIX=${S3_DIRECTORY_PREFIX:-dev}
57-
# Public URL for S3 resources
58-
- S3_ALIAS_HOST=${S3_ALIAS_HOST:-http://127.0.0.1:9000/${S3_BUCKET_NAME:-gitingest-bucket}}
64+
<<: [*base-environment, *dev-environment]
5965
volumes:
6066
# Mount source code for live development
6167
- ./src:/app:ro
68+
- ./run_server.py:/app/run_server.py:ro
6269
# Use --reload flag for hot reloading during development
63-
command: ["python", "-m", "uvicorn", "server.main:app", "--host", "0.0.0.0", "--port", "8000", "--reload"]
70+
command: ["python", "run_server.py"]
6471
depends_on:
6572
minio-setup:
6673
condition: service_completed_successfully
@@ -73,9 +80,9 @@ services:
7380
ports:
7481
- "9000:9000" # API port
7582
- "9001:9001" # Console port
76-
environment:
77-
- MINIO_ROOT_USER=${MINIO_ROOT_USER:-minioadmin}
78-
- MINIO_ROOT_PASSWORD=${MINIO_ROOT_PASSWORD:-minioadmin}
83+
environment: &minio-environment
84+
MINIO_ROOT_USER: ${MINIO_ROOT_USER:-minioadmin}
85+
MINIO_ROOT_PASSWORD: ${MINIO_ROOT_PASSWORD:-minioadmin}
7986
volumes:
8087
- minio-data:/data
8188
command: server /data --console-address ":9001"
@@ -96,11 +103,10 @@ services:
96103
minio:
97104
condition: service_healthy
98105
environment:
99-
- MINIO_ROOT_USER=${MINIO_ROOT_USER:-minioadmin}
100-
- MINIO_ROOT_PASSWORD=${MINIO_ROOT_PASSWORD:-minioadmin}
101-
- S3_ACCESS_KEY=${S3_ACCESS_KEY:-gitingest}
102-
- S3_SECRET_KEY=${S3_SECRET_KEY:-gitingest123}
103-
- S3_BUCKET_NAME=${S3_BUCKET_NAME:-gitingest-bucket}
106+
<<: *minio-environment
107+
S3_ACCESS_KEY: ${S3_ACCESS_KEY:-gitingest}
108+
S3_SECRET_KEY: ${S3_SECRET_KEY:-gitingest123}
109+
S3_BUCKET_NAME: ${S3_BUCKET_NAME:-gitingest-bucket}
104110
volumes:
105111
- ./.docker/minio/setup.sh:/setup.sh:ro
106112
entrypoint: sh

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ requires-python = ">= 3.8"
77
dependencies = [
88
"click>=8.0.0",
99
"httpx",
10+
"loguru>=0.7.0",
1011
"pathspec>=0.12.1",
1112
"pydantic",
1213
"python-dotenv",
@@ -96,7 +97,6 @@ ignore = [ # https://docs.astral.sh/ruff/rules/...
9697

9798
# TODO: fix the following issues:
9899
"TD003", # missing-todo-link, TODO: add issue links
99-
"T201", # print, TODO: replace with logging
100100
"S108", # hardcoded-temp-file, TODO: replace with tempfile
101101
"BLE001", # blind-except, TODO: replace with specific exceptions
102102
"FAST003", # fast-api-unused-path-parameter, TODO: fix

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ boto3>=1.28.0 # AWS SDK for S3 support
22
click>=8.0.0
33
fastapi[standard]>=0.109.1 # Vulnerable to https://osv.dev/vulnerability/PYSEC-2024-38
44
httpx
5+
loguru>=0.7.0
56
pathspec>=0.12.1
67
prometheus-client
78
pydantic

run_server.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
#!/usr/bin/env python3
2+
"""Custom server startup script to ensure logging is configured before uvicorn starts."""
3+
4+
import os
5+
6+
import uvicorn
7+
8+
# Import logging configuration first to intercept all logging
9+
from gitingest.utils.logging_config import get_logger
10+
11+
logger = get_logger(__name__)
12+
13+
if __name__ == "__main__":
14+
# Get configuration from environment variables
15+
host = os.getenv("HOST", "0.0.0.0") # noqa: S104
16+
port = int(os.getenv("PORT", "8000"))
17+
reload = os.getenv("RELOAD", "false").lower() == "true"
18+
19+
logger.info(
20+
"Starting Gitingest server",
21+
extra={
22+
"host": host,
23+
"port": port,
24+
},
25+
)
26+
27+
uvicorn.run(
28+
"server.main:app",
29+
host=host,
30+
port=port,
31+
reload=reload,
32+
log_config=None, # Disable uvicorn's default logging config
33+
)

src/gitingest/__main__.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,12 @@
1212
from gitingest.config import MAX_FILE_SIZE, OUTPUT_FILE_NAME
1313
from gitingest.entrypoint import ingest_async
1414

15+
# Import logging configuration first to intercept all logging
16+
from gitingest.utils.logging_config import get_logger
17+
18+
# Initialize logger for this module
19+
logger = get_logger(__name__)
20+
1521

1622
class _CLIArgs(TypedDict):
1723
source: str

src/gitingest/ingestion.py

Lines changed: 51 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,14 @@
99
from gitingest.output_formatter import format_node
1010
from gitingest.schemas import FileSystemNode, FileSystemNodeType, FileSystemStats
1111
from gitingest.utils.ingestion_utils import _should_exclude, _should_include
12+
from gitingest.utils.logging_config import get_logger
1213

1314
if TYPE_CHECKING:
1415
from gitingest.schemas import IngestionQuery
1516

17+
# Initialize logger for this module
18+
logger = get_logger(__name__)
19+
1620

1721
def ingest_query(query: IngestionQuery) -> tuple[str, str, str]:
1822
"""Run the ingestion process for a parsed query.
@@ -111,7 +115,14 @@ def _process_node(node: FileSystemNode, query: IngestionQuery, stats: FileSystem
111115
_process_symlink(path=sub_path, parent_node=node, stats=stats, local_path=query.local_path)
112116
elif sub_path.is_file():
113117
if sub_path.stat().st_size > query.max_file_size:
114-
print(f"Skipping file {sub_path}: would exceed max file size limit")
118+
logger.warning(
119+
"Skipping file: would exceed max file size limit",
120+
extra={
121+
"file_path": str(sub_path),
122+
"file_size": sub_path.stat().st_size,
123+
"max_file_size": query.max_file_size,
124+
},
125+
)
115126
continue
116127
_process_file(path=sub_path, parent_node=node, stats=stats, local_path=query.local_path)
117128
elif sub_path.is_dir():
@@ -133,7 +144,7 @@ def _process_node(node: FileSystemNode, query: IngestionQuery, stats: FileSystem
133144
node.file_count += child_directory_node.file_count
134145
node.dir_count += 1 + child_directory_node.dir_count
135146
else:
136-
print(f"Warning: {sub_path} is an unknown file type, skipping")
147+
logger.warning("Unknown file type, skipping", extra={"file_path": str(sub_path)})
137148

138149
node.sort_children()
139150

@@ -186,12 +197,27 @@ def _process_file(path: Path, parent_node: FileSystemNode, stats: FileSystemStat
186197
187198
"""
188199
if stats.total_files + 1 > MAX_FILES:
189-
print(f"Maximum file limit ({MAX_FILES}) reached")
200+
logger.warning(
201+
"Maximum file limit reached",
202+
extra={
203+
"current_files": stats.total_files,
204+
"max_files": MAX_FILES,
205+
"file_path": str(path),
206+
},
207+
)
190208
return
191209

192210
file_size = path.stat().st_size
193211
if stats.total_size + file_size > MAX_TOTAL_SIZE_BYTES:
194-
print(f"Skipping file {path}: would exceed total size limit")
212+
logger.warning(
213+
"Skipping file: would exceed total size limit",
214+
extra={
215+
"file_path": str(path),
216+
"file_size": file_size,
217+
"current_total_size": stats.total_size,
218+
"max_total_size": MAX_TOTAL_SIZE_BYTES,
219+
},
220+
)
195221
return
196222

197223
stats.total_files += 1
@@ -232,15 +258,33 @@ def limit_exceeded(stats: FileSystemStats, depth: int) -> bool:
232258
233259
"""
234260
if depth > MAX_DIRECTORY_DEPTH:
235-
print(f"Maximum depth limit ({MAX_DIRECTORY_DEPTH}) reached")
261+
logger.warning(
262+
"Maximum directory depth limit reached",
263+
extra={
264+
"current_depth": depth,
265+
"max_depth": MAX_DIRECTORY_DEPTH,
266+
},
267+
)
236268
return True
237269

238270
if stats.total_files >= MAX_FILES:
239-
print(f"Maximum file limit ({MAX_FILES}) reached")
271+
logger.warning(
272+
"Maximum file limit reached",
273+
extra={
274+
"current_files": stats.total_files,
275+
"max_files": MAX_FILES,
276+
},
277+
)
240278
return True # TODO: end recursion
241279

242280
if stats.total_size >= MAX_TOTAL_SIZE_BYTES:
243-
print(f"Maxumum total size limit ({MAX_TOTAL_SIZE_BYTES / 1024 / 1024:.1f}MB) reached")
281+
logger.warning(
282+
"Maximum total size limit reached",
283+
extra={
284+
"current_size_mb": stats.total_size / 1024 / 1024,
285+
"max_size_mb": MAX_TOTAL_SIZE_BYTES / 1024 / 1024,
286+
},
287+
)
244288
return True # TODO: end recursion
245289

246290
return False

src/gitingest/output_formatter.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,14 @@
88

99
from gitingest.schemas import FileSystemNode, FileSystemNodeType
1010
from gitingest.utils.compat_func import readlink
11+
from gitingest.utils.logging_config import get_logger
1112

1213
if TYPE_CHECKING:
1314
from gitingest.schemas import IngestionQuery
1415

16+
# Initialize logger for this module
17+
logger = get_logger(__name__)
18+
1519
_TOKEN_THRESHOLDS: list[tuple[int, str]] = [
1620
(1_000_000, "M"),
1721
(1_000, "k"),
@@ -189,8 +193,8 @@ def _format_token_count(text: str) -> str | None:
189193
try:
190194
encoding = tiktoken.get_encoding("o200k_base") # gpt-4o, gpt-4o-mini
191195
total_tokens = len(encoding.encode(text, disallowed_special=()))
192-
except (ValueError, UnicodeEncodeError) as exc:
193-
print(exc)
196+
except (ValueError, UnicodeEncodeError):
197+
logger.exception("Failed to encode text for token counting")
194198
return None
195199

196200
for threshold, suffix in _TOKEN_THRESHOLDS:

0 commit comments

Comments
 (0)