databricks
diff --git a/‎examples/experimental/sea_connector_test.py‎
Lines changed: 119 additions & 2 deletions b/‎examples/experimental/sea_connector_test.py‎
Lines changed: 119 additions & 2 deletions
diff --git a/‎src/databricks/sql/backend/models/base.py‎
Lines changed: 11 additions & 2 deletions b/‎src/databricks/sql/backend/models/base.py‎
Lines changed: 11 additions & 2 deletions
diff --git a/‎src/databricks/sql/backend/models/responses.py‎
Lines changed: 125 additions & 4 deletions b/‎src/databricks/sql/backend/models/responses.py‎
Lines changed: 125 additions & 4 deletions
@@ -273,9 +273,126 @@ def test_sea_session():
     logger.info("SEA session test completed successfully")
 
 
+def test_sea_result_set_arrow_external_links():
+    """
+    Test the SEA result set implementation with ARROW format and EXTERNAL_LINKS disposition.
+    
+    This function connects to a Databricks SQL endpoint using the SEA backend,
+    executes a query that returns a large result set (which will use EXTERNAL_LINKS disposition),
+    and tests the various fetch methods to verify the result set implementation works correctly.
+    """
+    server_hostname = os.environ.get("DATABRICKS_SERVER_HOSTNAME")
+    http_path = os.environ.get("DATABRICKS_HTTP_PATH")
+    access_token = os.environ.get("DATABRICKS_TOKEN")
+    catalog = os.environ.get("DATABRICKS_CATALOG", "samples")
+    schema = os.environ.get("DATABRICKS_SCHEMA", "tpch")
+
+    if not all([server_hostname, http_path, access_token]):
+        logger.error("Missing required environment variables.")
+        logger.error(
+            "Please set DATABRICKS_SERVER_HOSTNAME, DATABRICKS_HTTP_PATH, and DATABRICKS_TOKEN."
+        )
+        sys.exit(1)
+
+    try:
+        # Create connection with SEA backend
+        logger.info("Creating connection with SEA backend...")
+        connection = Connection(
+            server_hostname=server_hostname,
+            http_path=http_path,
+            access_token=access_token,
+            catalog=catalog,
+            schema=schema,
+            use_sea=True,
+            use_cloud_fetch=True,  # Enable cloud fetch to trigger EXTERNAL_LINKS + ARROW
+            user_agent_entry="SEA-Test-Client",
+            # Use a smaller arraysize to potentially force multiple chunks
+            arraysize=1000,
+        )
+
+        logger.info(
+            f"Successfully opened SEA session with ID: {connection.get_session_id_hex()}"
+        )
+        
+        # Create cursor
+        cursor = connection.cursor()
+        
+        # Execute a query that returns a large result set (will use EXTERNAL_LINKS disposition)
+        # Use a larger result set to ensure multiple chunks
+        # Using a CROSS JOIN to generate a larger result set
+        logger.info("Executing query: SELECT a.id as id1, b.id as id2 FROM range(1, 1000) a CROSS JOIN range(1, 1000) b LIMIT 100000")
+        cursor.execute("SELECT a.id as id1, b.id as id2 FROM range(1, 1000) a CROSS JOIN range(1, 1000) b LIMIT 100000")
+        
+        # Test the manifest to verify we're getting multiple chunks
+        # We can't easily access the manifest in the SeaResultSet, so we'll just continue with the test
+        # Note: The server might optimize results to fit into a single chunk, but our implementation
+        # is designed to handle multiple chunks by fetching additional chunks when needed
+        logger.info("Proceeding with fetch operations...")
+        
+        # Test fetchone
+        logger.info("Testing fetchone...")
+        row = cursor.fetchone()
+        logger.info(f"First row: {row}")
+        
+        # Test fetchmany with a moderate size
+        fetch_size = 500
+        logger.info(f"Testing fetchmany({fetch_size})...")
+        rows = cursor.fetchmany(fetch_size)
+        logger.info(f"Fetched {len(rows)} rows with fetchmany")
+        
+        # Test fetchall for remaining rows
+        logger.info("Testing fetchall...")
+        remaining_rows = cursor.fetchall()
+        logger.info(f"Fetched {len(remaining_rows)} remaining rows with fetchall")
+        
+        # Calculate total rows fetched
+        total_rows = 1 + len(rows) + len(remaining_rows)
+        logger.info(f"Total rows fetched: {total_rows}")
+        
+        # Execute another query to test arrow fetch methods
+        logger.info("\nExecuting second query for Arrow testing: SELECT * FROM range(1, 20000) as id LIMIT 20000")
+        cursor.execute("SELECT * FROM range(1, 20000) as id LIMIT 20000")
+        
+        try:
+            # Test fetchmany_arrow with a moderate size
+            arrow_fetch_size = 1000
+            logger.info(f"Testing fetchmany_arrow({arrow_fetch_size})...")
+            arrow_batch = cursor.fetchmany_arrow(arrow_fetch_size)
+            logger.info(f"Arrow batch num rows: {arrow_batch.num_rows}")
+            logger.info(f"Arrow batch columns: {arrow_batch.column_names}")
+            
+            # Test fetchall_arrow
+            logger.info("Testing fetchall_arrow...")
+            remaining_arrow_batch = cursor.fetchall_arrow()
+            logger.info(f"Remaining arrow batch num rows: {remaining_arrow_batch.num_rows}")
+            
+            # Calculate total rows fetched with Arrow
+            total_arrow_rows = arrow_batch.num_rows + remaining_arrow_batch.num_rows
+            logger.info(f"Total rows fetched with Arrow: {total_arrow_rows}")
+            
+        except ImportError:
+            logger.warning("PyArrow not installed, skipping Arrow tests")
+        
+        # Close cursor and connection
+        cursor.close()
+        connection.close()
+        logger.info("Successfully closed SEA session")
+
+    except Exception as e:
+        logger.error(f"Error during SEA result set test: {str(e)}")
+        import traceback
+        logger.error(traceback.format_exc())
+        sys.exit(1)
+
+    logger.info("SEA result set test with ARROW format and EXTERNAL_LINKS disposition completed successfully")
+
+
 if __name__ == "__main__":
     # Test session management
-    test_sea_session()
+    # test_sea_session()
 
     # Test result set implementation with metadata commands
-    test_sea_result_set_json_array_inline()
+    # test_sea_result_set_json_array_inline()
+    
+    # Test result set implementation with ARROW format and EXTERNAL_LINKS disposition
+    test_sea_result_set_arrow_external_links()
@@ -34,6 +34,12 @@ class ExternalLink:
     external_link: str
     expiration: str
     chunk_index: int
+    byte_count: int = 0
+    row_count: int = 0
+    row_offset: int = 0
+    next_chunk_index: Optional[int] = None
+    next_chunk_internal_link: Optional[str] = None
+    http_headers: Optional[Dict[str, str]] = None
 
 
 @dataclass
@@ -61,8 +67,11 @@ class ColumnInfo:
 class ResultManifest:
     """Manifest information for a result set."""
 
-    schema: List[ColumnInfo]
+    format: str
+    schema: Dict[str, Any]  # Will contain column information
     total_row_count: int
     total_byte_count: int
+    total_chunk_count: int
     truncated: bool = False
-    chunk_count: Optional[int] = None
+    chunks: Optional[List[Dict[str, Any]]] = None
+    result_compression: Optional[str] = None
@@ -13,6 +13,8 @@
     ResultManifest,
     ResultData,
     ServiceError,
+    ExternalLink,
+    ColumnInfo,
 )
 
 
@@ -42,12 +44,55 @@ def from_dict(cls, data: Dict[str, Any]) -> "ExecuteStatementResponse":
             error=error,
             sql_state=status_data.get("sql_state"),
         )
+        
+        # Parse manifest
+        manifest = None
+        if "manifest" in data:
+            manifest_data = data["manifest"]
+            manifest = ResultManifest(
+                format=manifest_data.get("format", ""),
+                schema=manifest_data.get("schema", {}),
+                total_row_count=manifest_data.get("total_row_count", 0),
+                total_byte_count=manifest_data.get("total_byte_count", 0),
+                total_chunk_count=manifest_data.get("total_chunk_count", 0),
+                truncated=manifest_data.get("truncated", False),
+                chunks=manifest_data.get("chunks"),
+                result_compression=manifest_data.get("result_compression"),
+            )
+        
+        # Parse result data
+        result = None
+        if "result" in data:
+            result_data = data["result"]
+            external_links = None
+            
+            if "external_links" in result_data:
+                external_links = []
+                for link_data in result_data["external_links"]:
+                    external_links.append(
+                        ExternalLink(
+                            external_link=link_data.get("external_link", ""),
+                            expiration=link_data.get("expiration", ""),
+                            chunk_index=link_data.get("chunk_index", 0),
+                            byte_count=link_data.get("byte_count", 0),
+                            row_count=link_data.get("row_count", 0),
+                            row_offset=link_data.get("row_offset", 0),
+                            next_chunk_index=link_data.get("next_chunk_index"),
+                            next_chunk_internal_link=link_data.get("next_chunk_internal_link"),
+                            http_headers=link_data.get("http_headers"),
+                        )
+                    )
+            
+            result = ResultData(
+                data=result_data.get("data_array"),
+                external_links=external_links,
+            )
 
         return cls(
             statement_id=data.get("statement_id", ""),
             status=status,
-            manifest=data.get("manifest"),  # We'll parse this more fully if needed
-            result=data.get("result"),  # We'll parse this more fully if needed
+            manifest=manifest,
+            result=result,
         )
 
 
@@ -77,12 +122,55 @@ def from_dict(cls, data: Dict[str, Any]) -> "GetStatementResponse":
             error=error,
             sql_state=status_data.get("sql_state"),
         )
+        
+        # Parse manifest
+        manifest = None
+        if "manifest" in data:
+            manifest_data = data["manifest"]
+            manifest = ResultManifest(
+                format=manifest_data.get("format", ""),
+                schema=manifest_data.get("schema", {}),
+                total_row_count=manifest_data.get("total_row_count", 0),
+                total_byte_count=manifest_data.get("total_byte_count", 0),
+                total_chunk_count=manifest_data.get("total_chunk_count", 0),
+                truncated=manifest_data.get("truncated", False),
+                chunks=manifest_data.get("chunks"),
+                result_compression=manifest_data.get("result_compression"),
+            )
+        
+        # Parse result data
+        result = None
+        if "result" in data:
+            result_data = data["result"]
+            external_links = None
+            
+            if "external_links" in result_data:
+                external_links = []
+                for link_data in result_data["external_links"]:
+                    external_links.append(
+                        ExternalLink(
+                            external_link=link_data.get("external_link", ""),
+                            expiration=link_data.get("expiration", ""),
+                            chunk_index=link_data.get("chunk_index", 0),
+                            byte_count=link_data.get("byte_count", 0),
+                            row_count=link_data.get("row_count", 0),
+                            row_offset=link_data.get("row_offset", 0),
+                            next_chunk_index=link_data.get("next_chunk_index"),
+                            next_chunk_internal_link=link_data.get("next_chunk_internal_link"),
+                            http_headers=link_data.get("http_headers"),
+                        )
+                    )
+            
+            result = ResultData(
+                data=result_data.get("data_array"),
+                external_links=external_links,
+            )
 
         return cls(
             statement_id=data.get("statement_id", ""),
             status=status,
-            manifest=data.get("manifest"),  # We'll parse this more fully if needed
-            result=data.get("result"),  # We'll parse this more fully if needed
+            manifest=manifest,
+            result=result,
         )
 
 
@@ -96,3 +184,36 @@ class CreateSessionResponse:
     def from_dict(cls, data: Dict[str, Any]) -> "CreateSessionResponse":
         """Create a CreateSessionResponse from a dictionary."""
         return cls(session_id=data.get("session_id", ""))
+
+
+@dataclass
+class GetChunksResponse:
+    """Response from getting chunks for a statement."""
+
+    statement_id: str
+    external_links: List[ExternalLink]
+
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> "GetChunksResponse":
+        """Create a GetChunksResponse from a dictionary."""
+        external_links = []
+        if "external_links" in data:
+            for link_data in data["external_links"]:
+                external_links.append(
+                    ExternalLink(
+                        external_link=link_data.get("external_link", ""),
+                        expiration=link_data.get("expiration", ""),
+                        chunk_index=link_data.get("chunk_index", 0),
+                        byte_count=link_data.get("byte_count", 0),
+                        row_count=link_data.get("row_count", 0),
+                        row_offset=link_data.get("row_offset", 0),
+                        next_chunk_index=link_data.get("next_chunk_index"),
+                        next_chunk_internal_link=link_data.get("next_chunk_internal_link"),
+                        http_headers=link_data.get("http_headers"),
+                    )
+                )
+                
+        return cls(
+            statement_id=data.get("statement_id", ""),
+            external_links=external_links,
+        )