databricks
diff --git a/‎src/databricks/sql/backend/databricks_client.py‎
Lines changed: 2 additions & 0 deletions b/‎src/databricks/sql/backend/databricks_client.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎src/databricks/sql/backend/sea/backend.py‎
Lines changed: 2 additions & 1 deletion b/‎src/databricks/sql/backend/sea/backend.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎src/databricks/sql/backend/sea/queue.py‎
Lines changed: 5 additions & 18 deletions b/‎src/databricks/sql/backend/sea/queue.py‎
Lines changed: 5 additions & 18 deletions
diff --git a/‎src/databricks/sql/backend/thrift_backend.py‎
Lines changed: 3 additions & 1 deletion b/‎src/databricks/sql/backend/thrift_backend.py‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎src/databricks/sql/client.py‎
Lines changed: 20 additions & 8 deletions b/‎src/databricks/sql/client.py‎
Lines changed: 20 additions & 8 deletions
diff --git a/‎src/databricks/sql/utils.py‎
Lines changed: 46 additions & 79 deletions b/‎src/databricks/sql/utils.py‎
Lines changed: 46 additions & 79 deletions
@@ -94,6 +94,7 @@ def execute_command(
         parameters: List,
         async_op: bool,
         enforce_embedded_schema_correctness: bool,
+        row_limit: Optional[int] = None,
     ) -> Union["ResultSet", None]:
         """
         Executes a SQL command or query within the specified session.
@@ -112,6 +113,7 @@ def execute_command(
             parameters: List of parameters to bind to the query
             async_op: Whether to execute the command asynchronously
             enforce_embedded_schema_correctness: Whether to enforce schema correctness
+            row_limit: Maximum number of rows in the operation result.
 
         Returns:
             If async_op is False, returns a ResultSet object containing the
 
@@ -410,6 +410,7 @@ def execute_command(
         parameters: List[Dict[str, Any]],
         async_op: bool,
         enforce_embedded_schema_correctness: bool,
+        row_limit: Optional[int] = None,
     ) -> Union[SeaResultSet, None]:
         """
         Execute a SQL command using the SEA backend.
@@ -467,7 +468,7 @@ def execute_command(
             format=format,
             wait_timeout=(WaitTimeout.ASYNC if async_op else WaitTimeout.SYNC).value,
             on_wait_timeout="CONTINUE",
-            row_limit=max_rows,
+            row_limit=row_limit,
             parameters=sea_parameters if sea_parameters else None,
             result_compression=result_compression,
         )
 
@@ -35,11 +35,11 @@ def build_queue(
         result_data: ResultData,
         manifest: ResultManifest,
         statement_id: str,
-        ssl_options: Optional[SSLOptions] = None,
-        description: List[Tuple] = [],
-        max_download_threads: Optional[int] = None,
-        sea_client: Optional[SeaDatabricksClient] = None,
-        lz4_compressed: bool = False,
+        ssl_options: SSLOptions,
+        description: List[Tuple],
+        max_download_threads: int,
+        sea_client: SeaDatabricksClient,
+        lz4_compressed: bool,
     ) -> ResultSetQueue:
         """
         Factory method to build a result set queue for SEA backend.
@@ -62,19 +62,6 @@ def build_queue(
             return JsonQueue(result_data.data)
         elif manifest.format == ResultFormat.ARROW_STREAM.value:
             # EXTERNAL_LINKS disposition
-            if not max_download_threads:
-                raise ValueError(
-                    "Max download threads is required for EXTERNAL_LINKS disposition"
-                )
-            if not ssl_options:
-                raise ValueError(
-                    "SSL options are required for EXTERNAL_LINKS disposition"
-                )
-            if not sea_client:
-                raise ValueError(
-                    "SEA client is required for EXTERNAL_LINKS disposition"
-                )
-
             return SeaCloudFetchQueue(
                 initial_links=result_data.external_links or [],
                 max_download_threads=max_download_threads,
 
@@ -4,7 +4,7 @@
 import math
 import time
 import threading
-from typing import List, Union, Any, TYPE_CHECKING
+from typing import List, Optional, Union, Any, TYPE_CHECKING
 
 if TYPE_CHECKING:
     from databricks.sql.client import Cursor
@@ -925,6 +925,7 @@ def execute_command(
         parameters=[],
         async_op=False,
         enforce_embedded_schema_correctness=False,
+        row_limit: Optional[int] = None,
     ) -> Union["ResultSet", None]:
         thrift_handle = session_id.to_thrift_handle()
         if not thrift_handle:
@@ -965,6 +966,7 @@ def execute_command(
             useArrowNativeTypes=spark_arrow_types,
             parameters=parameters,
             enforceEmbeddedSchemaCorrectness=enforce_embedded_schema_correctness,
+            resultRowLimit=row_limit,
         )
         resp = self.make_request(self._client.ExecuteStatement, req)
 
 
@@ -335,8 +335,14 @@ def cursor(
         self,
         arraysize: int = DEFAULT_ARRAY_SIZE,
         buffer_size_bytes: int = DEFAULT_RESULT_BUFFER_SIZE_BYTES,
+        row_limit: Optional[int] = None,
     ) -> "Cursor":
         """
+        Args:
+            arraysize: The maximum number of rows in direct results.
+            buffer_size_bytes: The maximum number of bytes in direct results.
+            row_limit: The maximum number of rows in the result.
+
         Return a new Cursor object using the connection.
 
         Will throw an Error if the connection has been closed.
@@ -349,6 +355,7 @@ def cursor(
             self.session.backend,
             arraysize=arraysize,
             result_buffer_size_bytes=buffer_size_bytes,
+            row_limit=row_limit,
         )
         self._cursors.append(cursor)
         return cursor
@@ -382,6 +389,7 @@ def __init__(
         backend: DatabricksClient,
         result_buffer_size_bytes: int = DEFAULT_RESULT_BUFFER_SIZE_BYTES,
         arraysize: int = DEFAULT_ARRAY_SIZE,
+        row_limit: Optional[int] = None,
     ) -> None:
         """
         These objects represent a database cursor, which is used to manage the context of a fetch
@@ -391,16 +399,18 @@ def __init__(
         visible by other cursors or connections.
         """
 
-        self.connection = connection
-        self.rowcount = -1  # Return -1 as this is not supported
-        self.buffer_size_bytes = result_buffer_size_bytes
+        self.connection: Connection = connection
+
+        self.rowcount: int = -1  # Return -1 as this is not supported
+        self.buffer_size_bytes: int = result_buffer_size_bytes
         self.active_result_set: Union[ResultSet, None] = None
-        self.arraysize = arraysize
+        self.arraysize: int = arraysize
+        self.row_limit: Optional[int] = row_limit
         # Note that Cursor closed => active result set closed, but not vice versa
-        self.open = True
-        self.executing_command_id = None
-        self.backend = backend
-        self.active_command_id = None
+        self.open: bool = True
+        self.executing_command_id: Optional[CommandId] = None
+        self.backend: DatabricksClient = backend
+        self.active_command_id: Optional[CommandId] = None
         self.escaper = ParamEscaper()
         self.lastrowid = None
 
@@ -779,6 +789,7 @@ def execute(
             parameters=prepared_params,
             async_op=False,
             enforce_embedded_schema_correctness=enforce_embedded_schema_correctness,
+            row_limit=self.row_limit,
         )
 
         if self.active_result_set and self.active_result_set.is_staging_operation:
@@ -835,6 +846,7 @@ def execute_async(
             parameters=prepared_params,
             async_op=True,
             enforce_embedded_schema_correctness=enforce_embedded_schema_correctness,
+            row_limit=self.row_limit,
         )
 
         return self
 
@@ -1,5 +1,5 @@
 from __future__ import annotations
-from typing import Any, Dict, List, Optional, Union, TYPE_CHECKING
+from typing import Dict, List, Optional, Union
 
 from dateutil import parser
 import datetime
@@ -9,34 +9,25 @@
 from collections.abc import Mapping
 from decimal import Decimal
 from enum import Enum
-from typing import Any, Dict, List, Optional, Tuple, Union, Sequence
+from typing import Dict, List, Optional, Tuple, Union, Sequence
 import re
 
-import dateutil
 import lz4.frame
 
-from databricks.sql.backend.sea.backend import SeaDatabricksClient
-from databricks.sql.backend.sea.models.base import ResultData, ResultManifest
-
 try:
     import pyarrow
 except ImportError:
     pyarrow = None
 
 from databricks.sql import OperationalError
-from databricks.sql.exc import ProgrammingError
 from databricks.sql.cloudfetch.download_manager import ResultFileDownloadManager
 from databricks.sql.thrift_api.TCLIService.ttypes import (
     TRowSet,
     TSparkArrowResultLink,
     TSparkRowSetType,
 )
 from databricks.sql.types import SSLOptions
-from databricks.sql.backend.sea.models.base import (
-    ResultData,
-    ExternalLink,
-    ResultManifest,
-)
+
 from databricks.sql.parameters.native import ParameterStructure, TDbsqlParameter
 
 import logging
@@ -227,11 +218,12 @@ def __init__(
             lz4_compressed: Whether the data is LZ4 compressed
             description: Column descriptions
         """
+
+        self.schema_bytes = schema_bytes
+        self.max_download_threads = max_download_threads
         self.lz4_compressed = lz4_compressed
         self.description = description
-        self.schema_bytes = schema_bytes
         self._ssl_options = ssl_options
-        self.max_download_threads = max_download_threads
 
         # Table state
         self.table = None
@@ -240,104 +232,73 @@ def __init__(
         # Initialize download manager
         self.download_manager: Optional["ResultFileDownloadManager"] = None
 
-    def remaining_rows(self) -> "pyarrow.Table":
+    def next_n_rows(self, num_rows: int) -> "pyarrow.Table":
         """
-        Get all remaining rows of the cloud fetch Arrow dataframes.
+        Get up to the next n rows of the cloud fetch Arrow dataframes.
 
+        Args:
+            num_rows (int): Number of rows to retrieve.
         Returns:
             pyarrow.Table
         """
         if not self.table:
+            logger.debug("CloudFetchQueue: no more rows available")
             # Return empty pyarrow table to cause retry of fetch
             return self._create_empty_table()
-
-        results = pyarrow.Table.from_pydict({})  # Empty table
-        while self.table:
-            table_slice = self.table.slice(
-                self.table_row_index, self.table.num_rows - self.table_row_index
-            )
-            if results.num_rows > 0:
-                results = pyarrow.concat_tables([results, table_slice])
-            else:
-                results = table_slice
-
-            self.table_row_index += table_slice.num_rows
-            self.table = self._create_next_table()
-            self.table_row_index = 0
-
-        return results
-
-    def next_n_rows(self, num_rows: int) -> "pyarrow.Table":
-        """Get up to the next n rows of the cloud fetch Arrow dataframes."""
-        if not self.table:
-            # Return empty pyarrow table to cause retry of fetch
-            return self._create_empty_table()
-
-        logger.info("SeaCloudFetchQueue: Retrieving up to {} rows".format(num_rows))
-        results = pyarrow.Table.from_pydict({})  # Empty table
-        rows_fetched = 0
-
+        logger.debug("CloudFetchQueue: trying to get {} next rows".format(num_rows))
+        results = self.table.slice(0, 0)
         while num_rows > 0 and self.table:
             # Get remaining of num_rows or the rest of the current table, whichever is smaller
             length = min(num_rows, self.table.num_rows - self.table_row_index)
-            logger.info(
-                "CloudFetchQueue: Slicing table from index {} for {} rows (table has {} rows total)".format(
-                    self.table_row_index, length, self.table.num_rows
-                )
-            )
             table_slice = self.table.slice(self.table_row_index, length)
-
-            # Concatenate results if we have any
-            if results.num_rows > 0:
-                logger.info(
-                    "CloudFetchQueue: Concatenating {} rows to existing {} rows".format(
-                        table_slice.num_rows, results.num_rows
-                    )
-                )
-                results = pyarrow.concat_tables([results, table_slice])
-            else:
-                results = table_slice
-
+            results = pyarrow.concat_tables([results, table_slice])
             self.table_row_index += table_slice.num_rows
-            rows_fetched += table_slice.num_rows
-
-            logger.info(
-                "CloudFetchQueue: After slice, table_row_index={}, rows_fetched={}".format(
-                    self.table_row_index, rows_fetched
-                )
-            )
 
             # Replace current table with the next table if we are at the end of the current table
             if self.table_row_index == self.table.num_rows:
-                logger.info(
-                    "CloudFetchQueue: Reached end of current table, fetching next"
-                )
                 self.table = self._create_next_table()
                 self.table_row_index = 0
-
             num_rows -= table_slice.num_rows
 
-        logger.info("CloudFetchQueue: Retrieved {} rows".format(results.num_rows))
+        logger.debug("CloudFetchQueue: collected {} next rows".format(results.num_rows))
         return results
 
-    def _create_empty_table(self) -> "pyarrow.Table":
-        """Create a 0-row table with just the schema bytes."""
-        if not self.schema_bytes:
-            return pyarrow.Table.from_pydict({})
-        return create_arrow_table_from_arrow_file(self.schema_bytes, self.description)
+    def remaining_rows(self) -> "pyarrow.Table":
+        """
+        Get all remaining rows of the cloud fetch Arrow dataframes.
+
+        Returns:
+            pyarrow.Table
+        """
+
+        if not self.table:
+            # Return empty pyarrow table to cause retry of fetch
+            return self._create_empty_table()
+        results = self.table.slice(0, 0)
+        while self.table:
+            table_slice = self.table.slice(
+                self.table_row_index, self.table.num_rows - self.table_row_index
+            )
+            results = pyarrow.concat_tables([results, table_slice])
+            self.table_row_index += table_slice.num_rows
+            self.table = self._create_next_table()
+            self.table_row_index = 0
+        return results
 
     def _create_table_at_offset(self, offset: int) -> Union["pyarrow.Table", None]:
-        """Create next table by retrieving the logical next downloaded file."""
+        """Create next table at the given row offset"""
         # Create next table by retrieving the logical next downloaded file, or return None to signal end of queue
         if not self.download_manager:
             logger.debug("CloudFetchQueue: No download manager available")
             return None
 
         downloaded_file = self.download_manager.get_next_downloaded_file(offset)
         if not downloaded_file:
+            logger.debug(
+                "CloudFetchQueue: Cannot find downloaded file for row {}".format(offset)
+            )
             # None signals no more Arrow tables can be built from the remaining handlers if any remain
             return None
-
         arrow_table = create_arrow_table_from_arrow_file(
             downloaded_file.file_bytes, self.description
         )
@@ -357,6 +318,12 @@ def _create_next_table(self) -> Union["pyarrow.Table", None]:
         """Create next table by retrieving the logical next downloaded file."""
         pass
 
+    def _create_empty_table(self) -> "pyarrow.Table":
+        """Create a 0-row table with just the schema bytes."""
+        if not self.schema_bytes:
+            return pyarrow.Table.from_pydict({})
+        return create_arrow_table_from_arrow_file(self.schema_bytes, self.description)
+
 
 class ThriftCloudFetchQueue(CloudFetchQueue):
     """Queue implementation for EXTERNAL_LINKS disposition with ARROW format for Thrift backend."""