Skip to content

Commit e8a23bf

Browse files
committed
feat: add centralized JSON logging and integrate into S3 utilities
- Implement `JSONFormatter` and methods for structured logging. - Integrate logging into S3 client creation, uploads, and URL lookups. - Enhance logging with extra fields for better traceability.
1 parent 3835f5f commit e8a23bf

File tree

2 files changed

+215
-17
lines changed

2 files changed

+215
-17
lines changed

src/gitingest/utils/logging_config.py

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
"""Centralized logging configuration for JSON logging in k8s environments."""
2+
3+
from __future__ import annotations
4+
5+
import json
6+
import logging
7+
import sys
8+
9+
10+
class JSONFormatter(logging.Formatter):
11+
"""Custom JSON formatter for structured logging."""
12+
13+
def format(self, record: logging.LogRecord) -> str:
14+
"""Format log record as JSON."""
15+
log_entry = {
16+
"timestamp": self.formatTime(record, self.datefmt),
17+
"level": record.levelname,
18+
"logger": record.name,
19+
"message": record.getMessage(),
20+
"module": record.module,
21+
"function": record.funcName,
22+
"line": record.lineno,
23+
}
24+
25+
# Add exception info if present
26+
if record.exc_info:
27+
log_entry["exception"] = self.formatException(record.exc_info)
28+
29+
# Add extra fields if present
30+
if hasattr(record, "extra_fields"):
31+
log_entry.update(record.extra_fields)
32+
33+
return json.dumps(log_entry)
34+
35+
36+
def configure_json_logging(level: str = "INFO") -> None:
37+
"""Configure JSON logging for the application.
38+
39+
Parameters
40+
----------
41+
level : str
42+
Logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL)
43+
44+
"""
45+
# Convert string level to logging constant
46+
numeric_level = getattr(logging, level.upper(), logging.INFO)
47+
48+
# Create JSON formatter
49+
formatter = JSONFormatter(datefmt="%Y-%m-%dT%H:%M:%S")
50+
51+
# Configure root logger
52+
root_logger = logging.getLogger()
53+
root_logger.setLevel(numeric_level)
54+
55+
# Remove existing handlers to avoid duplicates
56+
for handler in root_logger.handlers[:]:
57+
root_logger.removeHandler(handler)
58+
59+
# Create console handler for stdout
60+
console_handler = logging.StreamHandler(sys.stdout)
61+
console_handler.setLevel(numeric_level)
62+
console_handler.setFormatter(formatter)
63+
64+
# Add handler to root logger
65+
root_logger.addHandler(console_handler)
66+
67+
68+
def get_logger(name: str) -> logging.Logger:
69+
"""Get a logger instance with the given name.
70+
71+
Parameters
72+
----------
73+
name : str
74+
Logger name (typically __name__)
75+
76+
Returns
77+
-------
78+
logging.Logger
79+
Configured logger instance
80+
81+
"""
82+
return logging.getLogger(name)
83+
84+
85+
def log_with_extra(logger: logging.Logger, level: str, message: str, **extra_fields: str | int | bool | None) -> None:
86+
"""Log a message with extra fields.
87+
88+
Parameters
89+
----------
90+
logger : logging.Logger
91+
Logger instance
92+
level : str
93+
Log level (debug, info, warning, error, critical)
94+
message : str
95+
Log message
96+
**extra_fields : str | int | bool | None
97+
Additional fields to include in the log entry
98+
99+
"""
100+
# Create a LogRecord with extra fields
101+
record = logger.makeRecord(
102+
logger.name,
103+
getattr(logging, level.upper()),
104+
"",
105+
0,
106+
message,
107+
(),
108+
None,
109+
)
110+
record.extra_fields = extra_fields
111+
logger.handle(record)

src/gitingest/utils/s3_utils.py

Lines changed: 104 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,11 @@
1010
from boto3 import client as boto_client
1111
from botocore.exceptions import ClientError
1212

13+
from gitingest.utils.logging_config import get_logger, log_with_extra
14+
15+
# Initialize logger for this module
16+
logger = get_logger(__name__)
17+
1318

1419
class S3UploadError(Exception):
1520
"""Custom exception for S3 upload failures."""
@@ -131,6 +136,17 @@ def generate_s3_file_path(
131136
def create_s3_client() -> boto_client: # type: ignore[name-defined]
132137
"""Create and return an S3 client with configuration from environment."""
133138
config = get_s3_config()
139+
140+
# Log S3 client creation with configuration details (excluding sensitive info)
141+
log_config = {k: v for k, v in config.items() if k not in ["aws_access_key_id", "aws_secret_access_key"]}
142+
log_with_extra(
143+
logger,
144+
"debug",
145+
"Creating S3 client",
146+
s3_config=log_config,
147+
has_credentials=bool(config.get("aws_access_key_id")),
148+
)
149+
134150
return boto_client("s3", **config)
135151

136152

@@ -166,10 +182,21 @@ def upload_to_s3(content: str, s3_file_path: str, ingest_id: UUID) -> str:
166182
msg = "S3 is not enabled"
167183
raise ValueError(msg)
168184

169-
try:
170-
s3_client = create_s3_client()
171-
bucket_name = get_s3_bucket_name()
185+
s3_client = create_s3_client()
186+
bucket_name = get_s3_bucket_name()
187+
188+
# Log upload attempt
189+
log_with_extra(
190+
logger,
191+
"debug",
192+
"Starting S3 upload",
193+
bucket_name=bucket_name,
194+
s3_file_path=s3_file_path,
195+
ingest_id=str(ingest_id),
196+
content_size=len(content),
197+
)
172198

199+
try:
173200
# Upload the content with ingest_id as tag
174201
s3_client.put_object(
175202
Bucket=bucket_name,
@@ -178,21 +205,46 @@ def upload_to_s3(content: str, s3_file_path: str, ingest_id: UUID) -> str:
178205
ContentType="text/plain",
179206
Tagging=f"ingest_id={ingest_id!s}",
180207
)
208+
except ClientError as e:
209+
# Log upload failure
210+
log_with_extra(
211+
logger,
212+
"error",
213+
"S3 upload failed",
214+
bucket_name=bucket_name,
215+
s3_file_path=s3_file_path,
216+
ingest_id=str(ingest_id),
217+
error_code=e.response.get("Error", {}).get("Code"),
218+
error_message=str(e),
219+
)
220+
msg = f"Failed to upload to S3: {e}"
221+
raise S3UploadError(msg) from e
181222

182-
# Generate public URL
183-
alias_host = get_s3_alias_host()
184-
if alias_host:
185-
# Use alias host if configured
186-
return f"{alias_host.rstrip('/')}/{s3_file_path}"
223+
# Generate public URL
224+
alias_host = get_s3_alias_host()
225+
if alias_host:
226+
# Use alias host if configured
227+
public_url = f"{alias_host.rstrip('/')}/{s3_file_path}"
228+
else:
187229
# Fallback to direct S3 URL
188-
endpoint = get_s3_config()["endpoint_url"]
230+
endpoint = get_s3_config().get("endpoint_url")
189231
if endpoint:
190-
return f"{endpoint.rstrip('/')}/{bucket_name}/{s3_file_path}"
191-
return f"https://{bucket_name}.s3.{get_s3_config()['region_name']}.amazonaws.com/{s3_file_path}"
232+
public_url = f"{endpoint.rstrip('/')}/{bucket_name}/{s3_file_path}"
233+
else:
234+
public_url = f"https://{bucket_name}.s3.{get_s3_config()['region_name']}.amazonaws.com/{s3_file_path}"
192235

193-
except ClientError as e:
194-
msg = f"Failed to upload to S3: {e}"
195-
raise S3UploadError(msg) from e
236+
# Log successful upload
237+
log_with_extra(
238+
logger,
239+
"debug",
240+
"S3 upload completed successfully",
241+
bucket_name=bucket_name,
242+
s3_file_path=s3_file_path,
243+
ingest_id=str(ingest_id),
244+
public_url=public_url,
245+
)
246+
247+
return public_url
196248

197249

198250
def _build_s3_url(key: str) -> str:
@@ -241,8 +293,16 @@ def get_s3_url_for_ingest_id(ingest_id: UUID) -> str | None:
241293
242294
"""
243295
if not is_s3_enabled():
296+
logger.debug("S3 not enabled, skipping URL lookup for ingest_id: %s", ingest_id)
244297
return None
245298

299+
log_with_extra(
300+
logger,
301+
"debug",
302+
"Starting S3 URL lookup for ingest ID",
303+
ingest_id=str(ingest_id),
304+
)
305+
246306
try:
247307
s3_client = create_s3_client()
248308
bucket_name = get_s3_bucket_name()
@@ -254,16 +314,43 @@ def get_s3_url_for_ingest_id(ingest_id: UUID) -> str | None:
254314
Prefix="ingest/",
255315
)
256316

317+
objects_checked = 0
257318
for page in page_iterator:
258319
if "Contents" not in page:
259320
continue
260321

261322
for obj in page["Contents"]:
262323
key = obj["Key"]
324+
objects_checked += 1
263325
if _check_object_tags(s3_client, bucket_name, key, ingest_id):
264-
return _build_s3_url(key)
326+
s3_url = _build_s3_url(key)
327+
log_with_extra(
328+
logger,
329+
"debug",
330+
"Found S3 object for ingest ID",
331+
ingest_id=str(ingest_id),
332+
s3_key=key,
333+
s3_url=s3_url,
334+
objects_checked=objects_checked,
335+
)
336+
return s3_url
337+
338+
log_with_extra(
339+
logger,
340+
"debug",
341+
"No S3 object found for ingest ID",
342+
ingest_id=str(ingest_id),
343+
objects_checked=objects_checked,
344+
)
265345

266-
except ClientError:
267-
pass
346+
except ClientError as e:
347+
log_with_extra(
348+
logger,
349+
"error",
350+
"Error during S3 URL lookup",
351+
ingest_id=str(ingest_id),
352+
error_code=e.response.get("Error", {}).get("Code"),
353+
error_message=str(e),
354+
)
268355

269356
return None

0 commit comments

Comments
 (0)