Skip to content

Commit 13d13de

Browse files
committed
added logs for cloud fetch speed
1 parent 71d306f commit 13d13de

File tree

1 file changed

+35
-0
lines changed

1 file changed

+35
-0
lines changed

src/databricks/sql/cloudfetch/downloader.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,12 +51,14 @@ class DownloadableResultSettings:
5151
link_expiry_buffer_secs (int): Time in seconds to prevent download of a link before it expires. Default 0 secs.
5252
download_timeout (int): Timeout for download requests. Default 60 secs.
5353
max_consecutive_file_download_retries (int): Number of consecutive download retries before shutting down.
54+
speed_warning_threshold_mbps (float): Threshold in MB/s below which to log warning. Default 0.1 MB/s.
5455
"""
5556

5657
is_lz4_compressed: bool
5758
link_expiry_buffer_secs: int = 0
5859
download_timeout: int = 60
5960
max_consecutive_file_download_retries: int = 0
61+
speed_warning_threshold_mbps: float = 0.1
6062

6163

6264
class ResultSetDownloadHandler:
@@ -90,6 +92,8 @@ def run(self) -> DownloadedFile:
9092
self.link, self.settings.link_expiry_buffer_secs
9193
)
9294

95+
start_time = time.time()
96+
9397
with self._http_client.execute(
9498
method=HttpMethod.GET,
9599
url=self.link.fileLink,
@@ -102,6 +106,13 @@ def run(self) -> DownloadedFile:
102106

103107
# Save (and decompress if needed) the downloaded file
104108
compressed_data = response.content
109+
110+
# Log download metrics
111+
download_duration = time.time() - start_time
112+
self._log_download_metrics(
113+
self.link.fileLink, len(compressed_data), download_duration
114+
)
115+
105116
decompressed_data = (
106117
ResultSetDownloadHandler._decompress_data(compressed_data)
107118
if self.settings.is_lz4_compressed
@@ -128,6 +139,30 @@ def run(self) -> DownloadedFile:
128139
self.link.rowCount,
129140
)
130141

142+
def _log_download_metrics(
143+
self, url: str, bytes_downloaded: int, duration_seconds: float
144+
):
145+
"""Log download speed metrics at INFO/WARN levels."""
146+
if duration_seconds <= 0:
147+
return
148+
149+
# Calculate speed in MB/s (ensure float division for precision)
150+
speed_mbps = (float(bytes_downloaded) / (1024 * 1024)) / duration_seconds
151+
152+
urlEndpoint = url.split("?")[0]
153+
# INFO level logging
154+
logger.info(
155+
f"CloudFetch download completed: {speed_mbps:.4f} MB/s, "
156+
f"{bytes_downloaded} bytes in {duration_seconds:.3f}s from {urlEndpoint}"
157+
)
158+
159+
# WARN level logging if below threshold
160+
if speed_mbps < self.settings.speed_warning_threshold_mbps:
161+
logger.warning(
162+
f"CloudFetch download slower than threshold: {speed_mbps:.4f} MB/s "
163+
f"(threshold: {self.settings.speed_warning_threshold_mbps:.1f} MB/s) from {url}"
164+
)
165+
131166
@staticmethod
132167
def _validate_link(link: TSparkArrowResultLink, expiry_buffer_secs: int):
133168
"""

0 commit comments

Comments
 (0)