10
10
from boto3 import client as boto_client
11
11
from botocore .exceptions import ClientError
12
12
13
+ from gitingest .utils .logging_config import get_logger , log_with_extra
14
+
15
+ # Initialize logger for this module
16
+ logger = get_logger (__name__ )
17
+
13
18
14
19
class S3UploadError (Exception ):
15
20
"""Custom exception for S3 upload failures."""
@@ -131,6 +136,17 @@ def generate_s3_file_path(
131
136
def create_s3_client () -> boto_client : # type: ignore[name-defined]
132
137
"""Create and return an S3 client with configuration from environment."""
133
138
config = get_s3_config ()
139
+
140
+ # Log S3 client creation with configuration details (excluding sensitive info)
141
+ log_config = {k : v for k , v in config .items () if k not in ["aws_access_key_id" , "aws_secret_access_key" ]}
142
+ log_with_extra (
143
+ logger ,
144
+ "debug" ,
145
+ "Creating S3 client" ,
146
+ s3_config = log_config ,
147
+ has_credentials = bool (config .get ("aws_access_key_id" )),
148
+ )
149
+
134
150
return boto_client ("s3" , ** config )
135
151
136
152
@@ -166,10 +182,21 @@ def upload_to_s3(content: str, s3_file_path: str, ingest_id: UUID) -> str:
166
182
msg = "S3 is not enabled"
167
183
raise ValueError (msg )
168
184
169
- try :
170
- s3_client = create_s3_client ()
171
- bucket_name = get_s3_bucket_name ()
185
+ s3_client = create_s3_client ()
186
+ bucket_name = get_s3_bucket_name ()
187
+
188
+ # Log upload attempt
189
+ log_with_extra (
190
+ logger ,
191
+ "debug" ,
192
+ "Starting S3 upload" ,
193
+ bucket_name = bucket_name ,
194
+ s3_file_path = s3_file_path ,
195
+ ingest_id = str (ingest_id ),
196
+ content_size = len (content ),
197
+ )
172
198
199
+ try :
173
200
# Upload the content with ingest_id as tag
174
201
s3_client .put_object (
175
202
Bucket = bucket_name ,
@@ -178,21 +205,46 @@ def upload_to_s3(content: str, s3_file_path: str, ingest_id: UUID) -> str:
178
205
ContentType = "text/plain" ,
179
206
Tagging = f"ingest_id={ ingest_id !s} " ,
180
207
)
208
+ except ClientError as e :
209
+ # Log upload failure
210
+ log_with_extra (
211
+ logger ,
212
+ "error" ,
213
+ "S3 upload failed" ,
214
+ bucket_name = bucket_name ,
215
+ s3_file_path = s3_file_path ,
216
+ ingest_id = str (ingest_id ),
217
+ error_code = e .response .get ("Error" , {}).get ("Code" ),
218
+ error_message = str (e ),
219
+ )
220
+ msg = f"Failed to upload to S3: { e } "
221
+ raise S3UploadError (msg ) from e
181
222
182
- # Generate public URL
183
- alias_host = get_s3_alias_host ()
184
- if alias_host :
185
- # Use alias host if configured
186
- return f"{ alias_host .rstrip ('/' )} /{ s3_file_path } "
223
+ # Generate public URL
224
+ alias_host = get_s3_alias_host ()
225
+ if alias_host :
226
+ # Use alias host if configured
227
+ public_url = f"{ alias_host .rstrip ('/' )} /{ s3_file_path } "
228
+ else :
187
229
# Fallback to direct S3 URL
188
- endpoint = get_s3_config ()[ "endpoint_url" ]
230
+ endpoint = get_s3_config (). get ( "endpoint_url" )
189
231
if endpoint :
190
- return f"{ endpoint .rstrip ('/' )} /{ bucket_name } /{ s3_file_path } "
191
- return f"https://{ bucket_name } .s3.{ get_s3_config ()['region_name' ]} .amazonaws.com/{ s3_file_path } "
232
+ public_url = f"{ endpoint .rstrip ('/' )} /{ bucket_name } /{ s3_file_path } "
233
+ else :
234
+ public_url = f"https://{ bucket_name } .s3.{ get_s3_config ()['region_name' ]} .amazonaws.com/{ s3_file_path } "
192
235
193
- except ClientError as e :
194
- msg = f"Failed to upload to S3: { e } "
195
- raise S3UploadError (msg ) from e
236
+ # Log successful upload
237
+ log_with_extra (
238
+ logger ,
239
+ "debug" ,
240
+ "S3 upload completed successfully" ,
241
+ bucket_name = bucket_name ,
242
+ s3_file_path = s3_file_path ,
243
+ ingest_id = str (ingest_id ),
244
+ public_url = public_url ,
245
+ )
246
+
247
+ return public_url
196
248
197
249
198
250
def _build_s3_url (key : str ) -> str :
@@ -241,8 +293,16 @@ def get_s3_url_for_ingest_id(ingest_id: UUID) -> str | None:
241
293
242
294
"""
243
295
if not is_s3_enabled ():
296
+ logger .debug ("S3 not enabled, skipping URL lookup for ingest_id: %s" , ingest_id )
244
297
return None
245
298
299
+ log_with_extra (
300
+ logger ,
301
+ "debug" ,
302
+ "Starting S3 URL lookup for ingest ID" ,
303
+ ingest_id = str (ingest_id ),
304
+ )
305
+
246
306
try :
247
307
s3_client = create_s3_client ()
248
308
bucket_name = get_s3_bucket_name ()
@@ -254,16 +314,43 @@ def get_s3_url_for_ingest_id(ingest_id: UUID) -> str | None:
254
314
Prefix = "ingest/" ,
255
315
)
256
316
317
+ objects_checked = 0
257
318
for page in page_iterator :
258
319
if "Contents" not in page :
259
320
continue
260
321
261
322
for obj in page ["Contents" ]:
262
323
key = obj ["Key" ]
324
+ objects_checked += 1
263
325
if _check_object_tags (s3_client , bucket_name , key , ingest_id ):
264
- return _build_s3_url (key )
326
+ s3_url = _build_s3_url (key )
327
+ log_with_extra (
328
+ logger ,
329
+ "debug" ,
330
+ "Found S3 object for ingest ID" ,
331
+ ingest_id = str (ingest_id ),
332
+ s3_key = key ,
333
+ s3_url = s3_url ,
334
+ objects_checked = objects_checked ,
335
+ )
336
+ return s3_url
337
+
338
+ log_with_extra (
339
+ logger ,
340
+ "debug" ,
341
+ "No S3 object found for ingest ID" ,
342
+ ingest_id = str (ingest_id ),
343
+ objects_checked = objects_checked ,
344
+ )
265
345
266
- except ClientError :
267
- pass
346
+ except ClientError as e :
347
+ log_with_extra (
348
+ logger ,
349
+ "error" ,
350
+ "Error during S3 URL lookup" ,
351
+ ingest_id = str (ingest_id ),
352
+ error_code = e .response .get ("Error" , {}).get ("Code" ),
353
+ error_message = str (e ),
354
+ )
268
355
269
356
return None
0 commit comments