Skip to content

Commit 6b75463

Browse files
committed
Add LOG_FORMAT env, add logging for the backend
1 parent 0a7d977 commit 6b75463

File tree

12 files changed

+149
-149
lines changed

12 files changed

+149
-149
lines changed

.env.example

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,3 +56,4 @@ S3_REGION=us-east-1
5656
S3_ALIAS_HOST=127.0.0.1:9000/gitingest-bucket
5757
# Optional prefix for S3 file paths (if set, prefixes all S3 paths with this value)
5858
# S3_DIRECTORY_PREFIX=my-prefix
59+
LOG_FORMAT=JSON

src/gitingest/__main__.py

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,9 @@
1212

1313
from gitingest.config import MAX_FILE_SIZE, OUTPUT_FILE_NAME
1414
from gitingest.entrypoint import ingest_async
15-
from gitingest.logging_config import setup_json_logging
15+
from gitingest.logging_config import setup_logging
1616

17-
setup_json_logging()
17+
setup_logging()
1818

1919
logger = logging.getLogger(__name__)
2020

@@ -169,9 +169,9 @@ async def _async_main(
169169
output_target = output if output is not None else OUTPUT_FILE_NAME
170170

171171
if output_target == "-":
172-
click.echo("Analyzing source, preparing output for stdout...", err=True)
172+
logger.debug("Analyzing source, preparing output for stdout...")
173173
else:
174-
click.echo(f"Analyzing source, output will be written to '{output_target}'...", err=True)
174+
logger.debug("Analyzing source, output will be written to '%s'...", output_target)
175175

176176
summary, _, _ = await ingest_async(
177177
source,
@@ -186,18 +186,18 @@ async def _async_main(
186186
)
187187
except Exception as exc:
188188
# Convert any exception into Click.Abort so that exit status is non-zero
189-
click.echo(f"Error: {exc}", err=True)
189+
logger.exception("Ingest failed.", exc_info=exc)
190190
raise click.Abort from exc
191191

192192
if output_target == "-": # stdout
193-
click.echo("\n--- Summary ---", err=True)
194-
click.echo(summary, err=True)
195-
click.echo("--- End Summary ---", err=True)
196-
click.echo("Analysis complete! Output sent to stdout.", err=True)
193+
logger.info("--- Summary ---")
194+
logger.info(summary)
195+
logger.info("--- End Summary ---")
196+
logger.info("Analysis complete! Output sent to stdout.")
197197
else: # file
198-
click.echo(f"Analysis complete! Output written to: {output_target}")
199-
click.echo("\nSummary:")
200-
click.echo(summary)
198+
logger.info("Analysis complete! Output written to: %s", output_target)
199+
logger.info("Summary:")
200+
logger.info(summary)
201201

202202

203203
if __name__ == "__main__":

src/gitingest/entrypoint.py

Lines changed: 12 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -302,22 +302,17 @@ def _handle_remove_readonly(
302302

303303

304304
async def _write_output(tree: str, content: str, target: str | None) -> None:
305-
"""Write combined output to ``target`` (``"-"`` ⇒ stdout).
306-
307-
Parameters
308-
----------
309-
tree : str
310-
The tree-like string representation of the file structure.
311-
content : str
312-
The content of the files in the repository or directory.
313-
target : str | None
314-
The path to the output file. If ``None``, the results are not written to a file.
315-
316-
"""
305+
"""Write combined output to ``target`` (``"-"`` ⇒ stdout)."""
317306
data = f"{tree}\n{content}"
318307
loop = asyncio.get_running_loop()
319-
if target == "-":
320-
await loop.run_in_executor(None, sys.stdout.write, data)
321-
await loop.run_in_executor(None, sys.stdout.flush)
322-
elif target is not None:
323-
await loop.run_in_executor(None, Path(target).write_text, data, "utf-8")
308+
try:
309+
if target == "-":
310+
logger.debug("Writing output to stdout.")
311+
await loop.run_in_executor(None, sys.stdout.write, data)
312+
await loop.run_in_executor(None, sys.stdout.flush)
313+
elif target is not None:
314+
logger.debug("Writing output to file: %s", target)
315+
await loop.run_in_executor(None, Path(target).write_text, data, "utf-8")
316+
except Exception as exc:
317+
logger.exception("Failed to write output to %s.", target, exc_info=exc)
318+
raise

src/gitingest/ingestion.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ def _process_node(node: FileSystemNode, query: IngestionQuery, stats: FileSystem
114114
_process_symlink(path=sub_path, parent_node=node, stats=stats, local_path=query.local_path)
115115
elif sub_path.is_file():
116116
if sub_path.stat().st_size > query.max_file_size:
117-
logger.info("Skipping file %s: would exceed max file size limit", sub_path)
117+
logger.debug("Skipping file %s: would exceed max file size limit", sub_path)
118118
continue
119119
_process_file(path=sub_path, parent_node=node, stats=stats, local_path=query.local_path)
120120
elif sub_path.is_dir():
@@ -194,7 +194,7 @@ def _process_file(path: Path, parent_node: FileSystemNode, stats: FileSystemStat
194194

195195
file_size = path.stat().st_size
196196
if stats.total_size + file_size > MAX_TOTAL_SIZE_BYTES:
197-
logger.info("Skipping file %s: would exceed total size limit", path)
197+
logger.debug("Skipping file %s: would exceed total size limit", path)
198198
return
199199

200200
stats.total_files += 1

src/gitingest/logging_config.py

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,29 @@
11
"""Global logger configuration."""
22

33
import logging
4+
import os
45
from typing import Literal
56

67
from pythonjsonlogger import jsonlogger
78

89

9-
def setup_json_logging(level: Literal = logging.INFO) -> None:
10-
"""Configure json logger for the whole gitingest module."""
11-
logger = logging.getLogger(__name__)
10+
def setup_logging(level: Literal = logging.INFO) -> None:
11+
"""Configure logger for the whole gitingest module.
12+
13+
Selects formatter based on LOG_FORMAT env variable:
14+
- 'json': JSON formatter (time/level/msg, then extras)
15+
- any other value or unset: default formatter
16+
"""
17+
logger = logging.getLogger()
1218
logger.setLevel(level)
1319
log_handler = logging.StreamHandler()
14-
formatter = jsonlogger.JsonFormatter("%(asctime)s %(levelname)s %(name)s %(message)s")
20+
21+
log_format = os.getenv("LOG_FORMAT", "default").lower()
22+
if log_format == "json":
23+
formatter = jsonlogger.JsonFormatter(
24+
"%(asctime)s %(levelname)s %(message)s %(name)s %(module)s %(funcName)s %(lineno)d",
25+
)
26+
else:
27+
formatter = logging.Formatter("%(asctime)s %(levelname)s %(message)s")
1528
log_handler.setFormatter(formatter)
1629
logger.handlers = [log_handler]

src/gitingest/output_formatter.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,7 @@ def _format_token_count(text: str) -> str | None:
193193
encoding = tiktoken.get_encoding("o200k_base") # gpt-4o, gpt-4o-mini
194194
total_tokens = len(encoding.encode(text, disallowed_special=()))
195195
except (ValueError, UnicodeEncodeError):
196-
logger.exception()
196+
logger.exception("Failed to estimate token size.")
197197
return None
198198

199199
for threshold, suffix in _TOKEN_THRESHOLDS:

src/gitingest/schemas/ingestion.py

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
from __future__ import annotations
44

5+
import logging
56
from pathlib import Path # noqa: TC003 (typing-only-standard-library-import) needed for type checking (pydantic)
67
from uuid import UUID # noqa: TC003 (typing-only-standard-library-import) needed for type checking (pydantic)
78

@@ -10,6 +11,8 @@
1011
from gitingest.config import MAX_FILE_SIZE
1112
from gitingest.schemas.cloning import CloneConfig
1213

14+
logger = logging.getLogger(__name__)
15+
1316

1417
class IngestionQuery(BaseModel): # pylint: disable=too-many-instance-attributes
1518
"""Pydantic model to store the parsed details of the repository or file path.
@@ -72,21 +75,18 @@ class IngestionQuery(BaseModel): # pylint: disable=too-many-instance-attributes
7275
s3_url: str | None = None
7376

7477
def extract_clone_config(self) -> CloneConfig:
75-
"""Extract the relevant fields for the CloneConfig object.
76-
77-
Returns
78-
-------
79-
CloneConfig
80-
A CloneConfig object containing the relevant fields.
81-
82-
Raises
83-
------
84-
ValueError
85-
If the ``url`` parameter is not provided.
86-
87-
"""
78+
"""Extract the relevant fields for the CloneConfig object."""
79+
logger.debug(
80+
"Extracting CloneConfig for url=%s, local_path=%s, branch=%s, tag=%s, commit=%s",
81+
self.url,
82+
self.local_path,
83+
self.branch,
84+
self.tag,
85+
self.commit,
86+
)
8887
if not self.url:
8988
msg = "The 'url' parameter is required."
89+
logger.error(msg)
9090
raise ValueError(msg)
9191

9292
return CloneConfig(

src/gitingest/utils/git_utils.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -99,10 +99,12 @@ async def ensure_git_installed() -> None:
9999
try:
100100
stdout, _ = await run_command("git", "config", "core.longpaths")
101101
if stdout.decode().strip().lower() != "true":
102-
logger.warning("WARN: Git clone may fail on Windows due to long file paths:")
103-
logger.warning("To avoid this issue, consider enabling long path support with:")
104-
logger.warning(" git config --global core.longpaths true")
105-
logger.warning("Note: This command may require administrator privileges.")
102+
logger.warning(
103+
"""Git clone may fail on Windows due to long file paths:
104+
To avoid this issue, consider enabling long path support with:
105+
git config --global core.longpaths true
106+
Note: This command may require administrator privileges.""",
107+
)
106108
except RuntimeError:
107109
# Ignore if checking 'core.longpaths' fails.
108110
pass

src/gitingest/utils/notebook.py

Lines changed: 3 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ def process_notebook(file: Path, *, include_output: bool = True) -> str:
4141
notebook: dict[str, Any] = json.load(f)
4242
except json.JSONDecodeError as exc:
4343
msg = f"Invalid JSON in notebook: {file}"
44+
logger.exception(msg)
4445
raise InvalidNotebookError(msg) from exc
4546

4647
# Check if the notebook contains worksheets
@@ -125,24 +126,7 @@ def _process_cell(cell: dict[str, Any], *, include_output: bool) -> str | None:
125126

126127

127128
def _extract_output(output: dict[str, Any]) -> list[str]:
128-
"""Extract the output from a Jupyter notebook cell.
129-
130-
Parameters
131-
----------
132-
output : dict[str, Any]
133-
The output dictionary from a Jupyter notebook cell.
134-
135-
Returns
136-
-------
137-
list[str]
138-
The output as a list of strings.
139-
140-
Raises
141-
------
142-
ValueError
143-
If an unknown output type is encountered.
144-
145-
"""
129+
"""Extract the output from a Jupyter notebook cell."""
146130
output_type = output["output_type"]
147131

148132
if output_type == "stream":
@@ -155,4 +139,5 @@ def _extract_output(output: dict[str, Any]) -> list[str]:
155139
return [f"Error: {output['ename']}: {output['evalue']}"]
156140

157141
msg = f"Unknown output type: {output_type}"
142+
logger.error(msg)
158143
raise ValueError(msg)

src/server/main.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from slowapi.errors import RateLimitExceeded
1515
from starlette.middleware.trustedhost import TrustedHostMiddleware
1616

17+
from gitingest.logging_config import setup_logging
1718
from server.metrics_server import start_metrics_server
1819
from server.routers import dynamic, index, ingest
1920
from server.server_config import templates
@@ -22,6 +23,9 @@
2223
# Load environment variables from .env file
2324
load_dotenv()
2425

26+
# Setup logging based on LOG_FORMAT env variable
27+
setup_logging()
28+
2529
# Initialize Sentry SDK if enabled
2630
if os.getenv("GITINGEST_SENTRY_ENABLED") is not None:
2731
sentry_dsn = os.getenv("GITINGEST_SENTRY_DSN")

0 commit comments

Comments
 (0)