Skip to content

Commit d963fb7

Browse files
committed
Configure json logger for the whole gitingest module
1 parent 998cea1 commit d963fb7

14 files changed

+92
-58
lines changed

.pre-commit-config.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,7 @@ repos:
122122
pytest-asyncio,
123123
pytest-mock,
124124
python-dotenv,
125+
python-json-logger,
125126
'sentry-sdk[fastapi]',
126127
slowapi,
127128
starlette>=0.40.0,

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ dependencies = [
1010
"pathspec>=0.12.1",
1111
"pydantic",
1212
"python-dotenv",
13+
"python-json-logger",
1314
"starlette>=0.40.0", # Minimum safe release (https://osv.dev/vulnerability/GHSA-f96h-pmfr-66vw)
1415
"strenum; python_version < '3.11'",
1516
"tiktoken>=0.7.0", # Support for o200k_base encoding

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ pathspec>=0.12.1
55
prometheus-client
66
pydantic
77
python-dotenv
8+
python-json-logger
89
sentry-sdk[fastapi]
910
slowapi
1011
starlette>=0.40.0 # Vulnerable to https://osv.dev/vulnerability/GHSA-f96h-pmfr-66vw

src/gitingest/__main__.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,19 @@
44
from __future__ import annotations
55

66
import asyncio
7+
import logging
78
from typing import TypedDict
89

910
import click
1011
from typing_extensions import Unpack
1112

1213
from gitingest.config import MAX_FILE_SIZE, OUTPUT_FILE_NAME
1314
from gitingest.entrypoint import ingest_async
15+
from gitingest.logging_config import setup_json_logging
16+
17+
setup_json_logging()
18+
19+
logger = logging.getLogger(__name__)
1420

1521

1622
class _CLIArgs(TypedDict):

src/gitingest/entrypoint.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,10 @@
44

55
import asyncio
66
import errno
7+
import logging
78
import shutil
89
import stat
910
import sys
10-
import warnings
1111
from contextlib import asynccontextmanager
1212
from pathlib import Path
1313
from typing import TYPE_CHECKING, AsyncGenerator, Callable
@@ -28,6 +28,8 @@
2828

2929
from gitingest.schemas import IngestionQuery
3030

31+
logger = logging.getLogger(__name__)
32+
3133

3234
async def ingest_async(
3335
source: str,
@@ -209,19 +211,19 @@ def _override_branch_and_tag(query: IngestionQuery, branch: str | None, tag: str
209211
"""
210212
if tag and query.tag and tag != query.tag:
211213
msg = f"Warning: The specified tag '{tag}' overrides the tag found in the URL '{query.tag}'."
212-
warnings.warn(msg, RuntimeWarning, stacklevel=3)
214+
logger.warning(msg)
213215

214216
query.tag = tag or query.tag
215217

216218
if branch and query.branch and branch != query.branch:
217219
msg = f"Warning: The specified branch '{branch}' overrides the branch found in the URL '{query.branch}'."
218-
warnings.warn(msg, RuntimeWarning, stacklevel=3)
220+
logger.warning(msg)
219221

220222
query.branch = branch or query.branch
221223

222224
if tag and branch:
223225
msg = "Warning: Both tag and branch are specified. The tag will be used."
224-
warnings.warn(msg, RuntimeWarning, stacklevel=3)
226+
logger.warning(msg)
225227

226228
# Tag wins over branch if both supplied
227229
if query.tag:

src/gitingest/ingestion.py

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
from __future__ import annotations
44

5+
import logging
56
from pathlib import Path
67
from typing import TYPE_CHECKING
78

@@ -13,6 +14,8 @@
1314
if TYPE_CHECKING:
1415
from gitingest.schemas import IngestionQuery
1516

17+
logger = logging.getLogger(__name__)
18+
1619

1720
def ingest_query(query: IngestionQuery) -> tuple[str, str, str]:
1821
"""Run the ingestion process for a parsed query.
@@ -111,7 +114,7 @@ def _process_node(node: FileSystemNode, query: IngestionQuery, stats: FileSystem
111114
_process_symlink(path=sub_path, parent_node=node, stats=stats, local_path=query.local_path)
112115
elif sub_path.is_file():
113116
if sub_path.stat().st_size > query.max_file_size:
114-
print(f"Skipping file {sub_path}: would exceed max file size limit")
117+
logger.info("Skipping file %s: would exceed max file size limit", sub_path)
115118
continue
116119
_process_file(path=sub_path, parent_node=node, stats=stats, local_path=query.local_path)
117120
elif sub_path.is_dir():
@@ -133,7 +136,7 @@ def _process_node(node: FileSystemNode, query: IngestionQuery, stats: FileSystem
133136
node.file_count += child_directory_node.file_count
134137
node.dir_count += 1 + child_directory_node.dir_count
135138
else:
136-
print(f"Warning: {sub_path} is an unknown file type, skipping")
139+
logger.warning("Warning: %s is an unknown file type, skipping", sub_path)
137140

138141
node.sort_children()
139142

@@ -186,12 +189,12 @@ def _process_file(path: Path, parent_node: FileSystemNode, stats: FileSystemStat
186189
187190
"""
188191
if stats.total_files + 1 > MAX_FILES:
189-
print(f"Maximum file limit ({MAX_FILES}) reached")
192+
logger.warning("Maximum file limit (%i) reached", MAX_FILES)
190193
return
191194

192195
file_size = path.stat().st_size
193196
if stats.total_size + file_size > MAX_TOTAL_SIZE_BYTES:
194-
print(f"Skipping file {path}: would exceed total size limit")
197+
logger.info("Skipping file %s: would exceed total size limit", path)
195198
return
196199

197200
stats.total_files += 1
@@ -232,15 +235,15 @@ def limit_exceeded(stats: FileSystemStats, depth: int) -> bool:
232235
233236
"""
234237
if depth > MAX_DIRECTORY_DEPTH:
235-
print(f"Maximum depth limit ({MAX_DIRECTORY_DEPTH}) reached")
238+
logger.warning("Maximum depth limit (%i) reached", MAX_DIRECTORY_DEPTH)
236239
return True
237240

238241
if stats.total_files >= MAX_FILES:
239-
print(f"Maximum file limit ({MAX_FILES}) reached")
242+
logger.warning("Maximum file limit (%i) reached", MAX_FILES)
240243
return True # TODO: end recursion
241244

242245
if stats.total_size >= MAX_TOTAL_SIZE_BYTES:
243-
print(f"Maxumum total size limit ({MAX_TOTAL_SIZE_BYTES / 1024 / 1024:.1f}MB) reached")
246+
logger.warning("Maxumum total size limit (%.1fMB) reached", MAX_TOTAL_SIZE_BYTES / 1024 / 1024)
244247
return True # TODO: end recursion
245248

246249
return False

src/gitingest/logging_config.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
"""Global logger configuration."""
2+
3+
import logging
4+
from typing import Literal
5+
6+
from pythonjsonlogger import jsonlogger
7+
8+
9+
def setup_json_logging(level: Literal = logging.INFO) -> None:
10+
"""Configure json logger for the whole gitingest module."""
11+
logger = logging.getLogger(__name__)
12+
logger.setLevel(level)
13+
log_handler = logging.StreamHandler()
14+
formatter = jsonlogger.JsonFormatter("%(asctime)s %(levelname)s %(name)s %(message)s")
15+
log_handler.setFormatter(formatter)
16+
logger.handlers = [log_handler]

src/gitingest/output_formatter.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
from __future__ import annotations
44

5+
import logging
56
from typing import TYPE_CHECKING
67

78
import tiktoken
@@ -12,6 +13,8 @@
1213
if TYPE_CHECKING:
1314
from gitingest.schemas import IngestionQuery
1415

16+
logger = logging.getLogger(__name__)
17+
1518
_TOKEN_THRESHOLDS: list[tuple[int, str]] = [
1619
(1_000_000, "M"),
1720
(1_000, "k"),
@@ -189,8 +192,8 @@ def _format_token_count(text: str) -> str | None:
189192
try:
190193
encoding = tiktoken.get_encoding("o200k_base") # gpt-4o, gpt-4o-mini
191194
total_tokens = len(encoding.encode(text, disallowed_special=()))
192-
except (ValueError, UnicodeEncodeError) as exc:
193-
print(exc)
195+
except (ValueError, UnicodeEncodeError):
196+
logger.exception()
194197
return None
195198

196199
for threshold, suffix in _TOKEN_THRESHOLDS:

src/gitingest/query_parser.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@
22

33
from __future__ import annotations
44

5+
import logging
56
import uuid
6-
import warnings
77
from pathlib import Path
88
from typing import Literal
99

@@ -18,6 +18,8 @@
1818
_normalise_source,
1919
)
2020

21+
logger = logging.getLogger(__name__)
22+
2123

2224
async def parse_remote_repo(source: str, token: str | None = None) -> IngestionQuery:
2325
"""Parse a repository URL and return an ``IngestionQuery`` object.
@@ -71,16 +73,19 @@ async def parse_remote_repo(source: str, token: str | None = None) -> IngestionQ
7173
# TODO: Handle issues and pull requests
7274
if query.type in {PathKind.ISSUES, PathKind.PULL}:
7375
msg = f"Warning: Issues and pull requests are not yet supported: {url}. Returning repository root."
76+
logger.warning(msg)
7477
return await _fallback_to_root(query, token=token, warn_msg=msg)
7578

7679
# If no extra path parts, just return
7780
if not path_parts:
7881
msg = f"Warning: No extra path parts: {url}. Returning repository root."
82+
logger.warning(msg)
7983
return await _fallback_to_root(query, token=token, warn_msg=msg)
8084

8185
if query.type not in {PathKind.TREE, PathKind.BLOB}:
8286
# TODO: Handle other types
8387
msg = f"Warning: Type '{query.type}' is not yet supported: {url}. Returning repository root."
88+
logger.warning(msg)
8489
return await _fallback_to_root(query, token=token, warn_msg=msg)
8590

8691
# Commit, branch, or tag
@@ -169,7 +174,7 @@ async def _configure_branch_or_tag(
169174
except RuntimeError as exc:
170175
# If remote discovery fails, we optimistically treat the first path segment as the branch/tag.
171176
msg = f"Warning: Failed to fetch {_ref_type}: {exc}"
172-
warnings.warn(msg, RuntimeWarning, stacklevel=2)
177+
logger.warning(msg)
173178
return path_parts.pop(0) if path_parts else None
174179

175180
# Iterate over the path components and try to find a matching branch/tag

src/gitingest/utils/git_utils.py

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
import asyncio
66
import base64
7+
import logging
78
import re
89
import sys
910
from pathlib import Path
@@ -15,11 +16,12 @@
1516

1617
from gitingest.utils.compat_func import removesuffix
1718
from gitingest.utils.exceptions import InvalidGitHubTokenError
18-
from server.server_utils import Colors
1919

2020
if TYPE_CHECKING:
2121
from gitingest.schemas import CloneConfig
2222

23+
logger = logging.getLogger(__name__)
24+
2325
# GitHub Personal-Access tokens (classic + fine-grained).
2426
# - ghp_ / gho_ / ghu_ / ghs_ / ghr_ → 36 alphanumerics
2527
# - github_pat_ → 22 alphanumerics + "_" + 59 alphanumerics
@@ -97,13 +99,10 @@ async def ensure_git_installed() -> None:
9799
try:
98100
stdout, _ = await run_command("git", "config", "core.longpaths")
99101
if stdout.decode().strip().lower() != "true":
100-
print(
101-
f"{Colors.BROWN}WARN{Colors.END}: {Colors.RED}Git clone may fail on Windows "
102-
f"due to long file paths:{Colors.END}",
103-
)
104-
print(f"{Colors.RED}To avoid this issue, consider enabling long path support with:{Colors.END}")
105-
print(f"{Colors.RED} git config --global core.longpaths true{Colors.END}")
106-
print(f"{Colors.RED}Note: This command may require administrator privileges.{Colors.END}")
102+
logger.warning("WARN: Git clone may fail on Windows due to long file paths:")
103+
logger.warning("To avoid this issue, consider enabling long path support with:")
104+
logger.warning(" git config --global core.longpaths true")
105+
logger.warning("Note: This command may require administrator privileges.")
107106
except RuntimeError:
108107
# Ignore if checking 'core.longpaths' fails.
109108
pass

0 commit comments

Comments
 (0)