Skip to content

Commit 42f2bb0

Browse files
committed
feat(logging): implement loguru
1 parent efe5a26 commit 42f2bb0

File tree

11 files changed

+244
-57
lines changed

11 files changed

+244
-57
lines changed

.pre-commit-config.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,7 @@ repos:
117117
click>=8.0.0,
118118
'fastapi[standard]>=0.109.1',
119119
httpx,
120+
loguru>=0.7.0,
120121
pathspec>=0.12.1,
121122
prometheus-client,
122123
pydantic,
@@ -143,6 +144,7 @@ repos:
143144
click>=8.0.0,
144145
'fastapi[standard]>=0.109.1',
145146
httpx,
147+
loguru>=0.7.0,
146148
pathspec>=0.12.1,
147149
prometheus-client,
148150
pydantic,

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ requires-python = ">= 3.8"
77
dependencies = [
88
"click>=8.0.0",
99
"httpx",
10+
"loguru>=0.7.0",
1011
"pathspec>=0.12.1",
1112
"pydantic",
1213
"python-dotenv",
@@ -96,7 +97,6 @@ ignore = [ # https://docs.astral.sh/ruff/rules/...
9697

9798
# TODO: fix the following issues:
9899
"TD003", # missing-todo-link, TODO: add issue links
99-
"T201", # print, TODO: replace with logging
100100
"S108", # hardcoded-temp-file, TODO: replace with tempfile
101101
"BLE001", # blind-except, TODO: replace with specific exceptions
102102
"FAST003", # fast-api-unused-path-parameter, TODO: fix

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ boto3>=1.28.0 # AWS SDK for S3 support
22
click>=8.0.0
33
fastapi[standard]>=0.109.1 # Vulnerable to https://osv.dev/vulnerability/PYSEC-2024-38
44
httpx
5+
loguru>=0.7.0
56
pathspec>=0.12.1
67
prometheus-client
78
pydantic

src/gitingest/ingestion.py

Lines changed: 51 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,14 @@
99
from gitingest.output_formatter import format_node
1010
from gitingest.schemas import FileSystemNode, FileSystemNodeType, FileSystemStats
1111
from gitingest.utils.ingestion_utils import _should_exclude, _should_include
12+
from gitingest.utils.logging_config import get_logger
1213

1314
if TYPE_CHECKING:
1415
from gitingest.schemas import IngestionQuery
1516

17+
# Initialize logger for this module
18+
logger = get_logger(__name__)
19+
1620

1721
def ingest_query(query: IngestionQuery) -> tuple[str, str, str]:
1822
"""Run the ingestion process for a parsed query.
@@ -111,7 +115,14 @@ def _process_node(node: FileSystemNode, query: IngestionQuery, stats: FileSystem
111115
_process_symlink(path=sub_path, parent_node=node, stats=stats, local_path=query.local_path)
112116
elif sub_path.is_file():
113117
if sub_path.stat().st_size > query.max_file_size:
114-
print(f"Skipping file {sub_path}: would exceed max file size limit")
118+
logger.warning(
119+
"Skipping file: would exceed max file size limit",
120+
extra={
121+
"file_path": str(sub_path),
122+
"file_size": sub_path.stat().st_size,
123+
"max_file_size": query.max_file_size,
124+
},
125+
)
115126
continue
116127
_process_file(path=sub_path, parent_node=node, stats=stats, local_path=query.local_path)
117128
elif sub_path.is_dir():
@@ -133,7 +144,7 @@ def _process_node(node: FileSystemNode, query: IngestionQuery, stats: FileSystem
133144
node.file_count += child_directory_node.file_count
134145
node.dir_count += 1 + child_directory_node.dir_count
135146
else:
136-
print(f"Warning: {sub_path} is an unknown file type, skipping")
147+
logger.warning("Unknown file type, skipping", extra={"file_path": str(sub_path)})
137148

138149
node.sort_children()
139150

@@ -186,12 +197,27 @@ def _process_file(path: Path, parent_node: FileSystemNode, stats: FileSystemStat
186197
187198
"""
188199
if stats.total_files + 1 > MAX_FILES:
189-
print(f"Maximum file limit ({MAX_FILES}) reached")
200+
logger.warning(
201+
"Maximum file limit reached",
202+
extra={
203+
"current_files": stats.total_files,
204+
"max_files": MAX_FILES,
205+
"file_path": str(path),
206+
},
207+
)
190208
return
191209

192210
file_size = path.stat().st_size
193211
if stats.total_size + file_size > MAX_TOTAL_SIZE_BYTES:
194-
print(f"Skipping file {path}: would exceed total size limit")
212+
logger.warning(
213+
"Skipping file: would exceed total size limit",
214+
extra={
215+
"file_path": str(path),
216+
"file_size": file_size,
217+
"current_total_size": stats.total_size,
218+
"max_total_size": MAX_TOTAL_SIZE_BYTES,
219+
},
220+
)
195221
return
196222

197223
stats.total_files += 1
@@ -232,15 +258,33 @@ def limit_exceeded(stats: FileSystemStats, depth: int) -> bool:
232258
233259
"""
234260
if depth > MAX_DIRECTORY_DEPTH:
235-
print(f"Maximum depth limit ({MAX_DIRECTORY_DEPTH}) reached")
261+
logger.warning(
262+
"Maximum directory depth limit reached",
263+
extra={
264+
"current_depth": depth,
265+
"max_depth": MAX_DIRECTORY_DEPTH,
266+
},
267+
)
236268
return True
237269

238270
if stats.total_files >= MAX_FILES:
239-
print(f"Maximum file limit ({MAX_FILES}) reached")
271+
logger.warning(
272+
"Maximum file limit reached",
273+
extra={
274+
"current_files": stats.total_files,
275+
"max_files": MAX_FILES,
276+
},
277+
)
240278
return True # TODO: end recursion
241279

242280
if stats.total_size >= MAX_TOTAL_SIZE_BYTES:
243-
print(f"Maxumum total size limit ({MAX_TOTAL_SIZE_BYTES / 1024 / 1024:.1f}MB) reached")
281+
logger.warning(
282+
"Maximum total size limit reached",
283+
extra={
284+
"current_size_mb": stats.total_size / 1024 / 1024,
285+
"max_size_mb": MAX_TOTAL_SIZE_BYTES / 1024 / 1024,
286+
},
287+
)
244288
return True # TODO: end recursion
245289

246290
return False

src/gitingest/output_formatter.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,14 @@
88

99
from gitingest.schemas import FileSystemNode, FileSystemNodeType
1010
from gitingest.utils.compat_func import readlink
11+
from gitingest.utils.logging_config import get_logger
1112

1213
if TYPE_CHECKING:
1314
from gitingest.schemas import IngestionQuery
1415

16+
# Initialize logger for this module
17+
logger = get_logger(__name__)
18+
1519
_TOKEN_THRESHOLDS: list[tuple[int, str]] = [
1620
(1_000_000, "M"),
1721
(1_000, "k"),
@@ -189,8 +193,8 @@ def _format_token_count(text: str) -> str | None:
189193
try:
190194
encoding = tiktoken.get_encoding("o200k_base") # gpt-4o, gpt-4o-mini
191195
total_tokens = len(encoding.encode(text, disallowed_special=()))
192-
except (ValueError, UnicodeEncodeError) as exc:
193-
print(exc)
196+
except (ValueError, UnicodeEncodeError):
197+
logger.exception("Failed to encode text for token counting")
194198
return None
195199

196200
for threshold, suffix in _TOKEN_THRESHOLDS:

src/gitingest/utils/git_utils.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,14 @@
1515

1616
from gitingest.utils.compat_func import removesuffix
1717
from gitingest.utils.exceptions import InvalidGitHubTokenError
18-
from server.server_utils import Colors
18+
from gitingest.utils.logging_config import get_logger
1919

2020
if TYPE_CHECKING:
2121
from gitingest.schemas import CloneConfig
2222

23+
# Initialize logger for this module
24+
logger = get_logger(__name__)
25+
2326
# GitHub Personal-Access tokens (classic + fine-grained).
2427
# - ghp_ / gho_ / ghu_ / ghs_ / ghr_ → 36 alphanumerics
2528
# - github_pat_ → 22 alphanumerics + "_" + 59 alphanumerics
@@ -97,13 +100,12 @@ async def ensure_git_installed() -> None:
97100
try:
98101
stdout, _ = await run_command("git", "config", "core.longpaths")
99102
if stdout.decode().strip().lower() != "true":
100-
print(
101-
f"{Colors.BROWN}WARN{Colors.END}: {Colors.RED}Git clone may fail on Windows "
102-
f"due to long file paths:{Colors.END}",
103+
logger.warning(
104+
"Git clone may fail on Windows due to long file paths. "
105+
"Consider enabling long path support with: 'git config --global core.longpaths true'. "
106+
"Note: This command may require administrator privileges.",
107+
extra={"platform": "windows", "longpaths_enabled": False},
103108
)
104-
print(f"{Colors.RED}To avoid this issue, consider enabling long path support with:{Colors.END}")
105-
print(f"{Colors.RED} git config --global core.longpaths true{Colors.END}")
106-
print(f"{Colors.RED}Note: This command may require administrator privileges.{Colors.END}")
107109
except RuntimeError:
108110
# Ignore if checking 'core.longpaths' fails.
109111
pass

src/gitingest/utils/logging_config.py

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
"""Logging configuration for gitingest using loguru.
2+
3+
This module provides structured JSON logging suitable for Kubernetes deployments
4+
while also supporting human-readable logging for development.
5+
"""
6+
7+
from __future__ import annotations
8+
9+
import os
10+
import sys
11+
from typing import Any
12+
13+
from loguru import logger
14+
15+
16+
def serialize_record(record: dict[str, Any]) -> str:
17+
"""Serialize log record for structured logging.
18+
19+
Parameters
20+
----------
21+
record : dict[str, Any]
22+
The log record dictionary
23+
24+
Returns
25+
-------
26+
str
27+
Serialized log record
28+
29+
"""
30+
# Extract relevant fields for structured logging
31+
log_entry = {
32+
"timestamp": record["time"].isoformat(),
33+
"level": record["level"].name,
34+
"logger": record["name"],
35+
"module": record["module"],
36+
"function": record["function"],
37+
"line": record["line"],
38+
"message": record["message"],
39+
}
40+
41+
# Add exception info if present
42+
if record["exception"]:
43+
log_entry["exception"] = {
44+
"type": record["exception"].type.__name__,
45+
"value": str(record["exception"].value),
46+
"traceback": record["exception"].traceback,
47+
}
48+
49+
# Add extra fields if present
50+
if hasattr(record, "extra"):
51+
log_entry["extra"] = record["extra"]
52+
53+
return str(log_entry)
54+
55+
56+
def configure_logging() -> None:
57+
"""Configure loguru for the application.
58+
59+
Sets up JSON logging for production/Kubernetes environments
60+
or human-readable logging for development.
61+
"""
62+
# Remove default handler
63+
logger.remove()
64+
65+
# Check if we're in Kubernetes or production environment
66+
is_k8s = os.getenv("KUBERNETES_SERVICE_HOST") is not None
67+
log_format = os.getenv("LOG_FORMAT", "json" if is_k8s else "human")
68+
log_level = os.getenv("LOG_LEVEL", "INFO")
69+
70+
if log_format.lower() == "json":
71+
# JSON format for structured logging (Kubernetes/production)
72+
logger.add(
73+
sys.stdout,
74+
format=serialize_record,
75+
level=log_level,
76+
enqueue=True, # Async logging for better performance
77+
diagnose=False, # Don't include variable values in exceptions (security)
78+
backtrace=True, # Include full traceback
79+
)
80+
else:
81+
# Human-readable format for development
82+
logger.add(
83+
sys.stdout,
84+
format="<green>{time:YYYY-MM-DD HH:mm:ss.SSS}</green> | "
85+
"<level>{level: <8}</level> | "
86+
"<cyan>{name}</cyan>:<cyan>{function}</cyan>:<cyan>{line}</cyan> | "
87+
"<level>{message}</level>",
88+
level=log_level,
89+
enqueue=True,
90+
diagnose=True, # Include variable values in development
91+
backtrace=True,
92+
)
93+
94+
95+
def get_logger(name: str | None = None) -> logger.__class__:
96+
"""Get a configured logger instance.
97+
98+
Parameters
99+
----------
100+
name : str | None, optional
101+
Logger name, defaults to the calling module name
102+
103+
Returns
104+
-------
105+
logger.__class__
106+
Configured logger instance
107+
108+
"""
109+
if name:
110+
return logger.bind(name=name)
111+
return logger
112+
113+
114+
# Initialize logging when module is imported
115+
configure_logging()

src/server/metrics_server.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,14 @@
11
"""Prometheus metrics server running on a separate port."""
22

3-
import logging
4-
53
import uvicorn
64
from fastapi import FastAPI
75
from fastapi.responses import HTMLResponse
86
from prometheus_client import REGISTRY, generate_latest
97

8+
from gitingest.utils.logging_config import get_logger
9+
1010
# Create a logger for this module
11-
logger = logging.getLogger(__name__)
11+
logger = get_logger(__name__)
1212

1313
# Create a separate FastAPI app for metrics
1414
metrics_app = FastAPI(

0 commit comments

Comments
 (0)