Skip to content

Commit 6ec8656

Browse files
align remaining_rows with prev impl
Signed-off-by: varun-edachali-dbx <varun.edachali@databricks.com>
1 parent be16634 commit 6ec8656

File tree

1 file changed

+13
-21
lines changed

1 file changed

+13
-21
lines changed

src/databricks/sql/utils.py

Lines changed: 13 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -255,27 +255,19 @@ def next_n_rows(self, num_rows: int) -> "pyarrow.Table":
255255
# Return empty pyarrow table to cause retry of fetch
256256
return self._create_empty_table()
257257
logger.debug("CloudFetchQueue: trying to get {} next rows".format(num_rows))
258-
259258
results = self.table.slice(0, 0)
260-
261259
while num_rows > 0 and self.table:
262260
# Get remaining of num_rows or the rest of the current table, whichever is smaller
263261
length = min(num_rows, self.table.num_rows - self.table_row_index)
264262
table_slice = self.table.slice(self.table_row_index, length)
265-
266-
# Concatenate results if we have any
267-
if results.num_rows > 0:
268-
results = pyarrow.concat_tables([results, table_slice])
269-
else:
270-
results = table_slice
263+
results = pyarrow.concat_tables([results, table_slice])
271264

272265
self.table_row_index += table_slice.num_rows
273266

274267
# Replace current table with the next table if we are at the end of the current table
275268
if self.table_row_index == self.table.num_rows:
276269
self.table = self._create_next_table()
277270
self.table_row_index = 0
278-
279271
num_rows -= table_slice.num_rows
280272

281273
logger.debug("CloudFetchQueue: collected {} next rows".format(results.num_rows))
@@ -288,32 +280,23 @@ def remaining_rows(self) -> "pyarrow.Table":
288280
Returns:
289281
pyarrow.Table
290282
"""
283+
291284
if not self.table:
292285
# Return empty pyarrow table to cause retry of fetch
293286
return self._create_empty_table()
294287

295-
results = pyarrow.Table.from_pydict({}) # Empty table
288+
results = self.table.slice(0, 0)
296289
while self.table:
297290
table_slice = self.table.slice(
298291
self.table_row_index, self.table.num_rows - self.table_row_index
299292
)
300-
if results.num_rows > 0:
301-
results = pyarrow.concat_tables([results, table_slice])
302-
else:
303-
results = table_slice
293+
results = pyarrow.concat_tables([results, table_slice])
304294

305295
self.table_row_index += table_slice.num_rows
306296
self.table = self._create_next_table()
307297
self.table_row_index = 0
308-
309298
return results
310299

311-
def _create_empty_table(self) -> "pyarrow.Table":
312-
"""Create a 0-row table with just the schema bytes."""
313-
if not self.schema_bytes:
314-
return pyarrow.Table.from_pydict({})
315-
return create_arrow_table_from_arrow_file(self.schema_bytes, self.description)
316-
317300
def _create_table_at_offset(self, offset: int) -> Union["pyarrow.Table", None]:
318301
"""Create next table by retrieving the logical next downloaded file."""
319302
# Create next table by retrieving the logical next downloaded file, or return None to signal end of queue
@@ -323,6 +306,9 @@ def _create_table_at_offset(self, offset: int) -> Union["pyarrow.Table", None]:
323306

324307
downloaded_file = self.download_manager.get_next_downloaded_file(offset)
325308
if not downloaded_file:
309+
logger.debug(
310+
"CloudFetchQueue: Cannot find downloaded file for row {}".format(offset)
311+
)
326312
# None signals no more Arrow tables can be built from the remaining handlers if any remain
327313
return None
328314

@@ -345,6 +331,12 @@ def _create_next_table(self) -> Union["pyarrow.Table", None]:
345331
"""Create next table by retrieving the logical next downloaded file."""
346332
pass
347333

334+
def _create_empty_table(self) -> "pyarrow.Table":
335+
"""Create a 0-row table with just the schema bytes."""
336+
if not self.schema_bytes:
337+
return pyarrow.Table.from_pydict({})
338+
return create_arrow_table_from_arrow_file(self.schema_bytes, self.description)
339+
348340

349341
class ThriftCloudFetchQueue(CloudFetchQueue):
350342
"""Queue implementation for EXTERNAL_LINKS disposition with ARROW format for Thrift backend."""

0 commit comments

Comments
 (0)