From a5c377048356e728b40e64604c8469fab756a370 Mon Sep 17 00:00:00 2001 From: ptiurin Date: Tue, 14 Oct 2025 09:52:26 +0100 Subject: [PATCH 01/21] refactor(FIR-49930): decoupling query preprocessing logic --- src/firebolt/async_db/cursor.py | 124 +++--- src/firebolt/common/cursor/base_cursor.py | 41 +- src/firebolt/common/cursor/query_planners.py | 175 ++++++++ src/firebolt/db/cursor.py | 122 +++--- .../unit/common/cursor/test_query_planners.py | 374 ++++++++++++++++++ 5 files changed, 643 insertions(+), 193 deletions(-) create mode 100644 src/firebolt/common/cursor/query_planners.py create mode 100644 tests/unit/common/cursor/test_query_planners.py diff --git a/src/firebolt/async_db/cursor.py b/src/firebolt/async_db/cursor.py index dafc9a239f..29d013181a 100644 --- a/src/firebolt/async_db/cursor.py +++ b/src/firebolt/async_db/cursor.py @@ -25,7 +25,6 @@ UPDATE_ENDPOINT_HEADER, UPDATE_PARAMETERS_HEADER, CursorState, - ParameterStyle, ) from firebolt.common.cursor.base_cursor import ( BaseCursor, @@ -38,6 +37,10 @@ check_not_closed, check_query_executed, ) +from firebolt.common.cursor.query_planners import ( + ExecutionPlan, + QueryPlannerFactory, +) from firebolt.common.row_set.asynchronous.base import BaseAsyncRowSet from firebolt.common.row_set.asynchronous.in_memory import InMemoryAsyncRowSet from firebolt.common.row_set.asynchronous.streaming import StreamingAsyncRowSet @@ -222,110 +225,69 @@ async def _do_execute( from firebolt.async_db import paramstyle try: - parameter_style = ParameterStyle(paramstyle) - except ValueError: - raise ProgrammingError(f"Unsupported paramstyle: {paramstyle}") - try: - if parameter_style == ParameterStyle.FB_NUMERIC: - await self._execute_fb_numeric( - raw_query, parameters, timeout, async_execution, streaming - ) - else: - queries: List[Union[SetParameter, str]] = ( - [raw_query] - if skip_parsing - else self._formatter.split_format_sql(raw_query, parameters) - ) - timeout_controller = TimeoutController(timeout) - if len(queries) > 1 and async_execution: - raise FireboltError( - "Server side async does not support multi-statement queries" - ) - for query in queries: - await self._execute_single_query( - query, timeout_controller, async_execution, streaming - ) + planner = QueryPlannerFactory.create_planner(paramstyle, self._formatter) + + plan = planner.create_execution_plan( + raw_query, parameters, skip_parsing, async_execution, streaming + ) + await self._execute_plan(plan, timeout, async_execution, streaming) self._state = CursorState.DONE except Exception: self._state = CursorState.ERROR raise - async def _execute_fb_numeric( + async def _execute_plan( self, - query: str, - parameters: Sequence[Sequence[ParameterType]], + plan: ExecutionPlan, timeout: Optional[float], async_execution: bool, streaming: bool, ) -> None: - Cursor._log_query(query) + """Execute an execution plan.""" timeout_controller = TimeoutController(timeout) - timeout_controller.raise_if_timeout() - query_params = self._build_fb_numeric_query_params( - parameters, streaming, async_execution - ) - resp = await self._api_request( - query, - query_params, - timeout=timeout_controller.remaining(), - ) - await self._raise_if_error(resp) - if async_execution: - await resp.aread() - self._parse_async_response(resp) - else: - await self._parse_response_headers(resp.headers) - await self._append_row_set_from_response(resp) + + for query in plan.queries: + if isinstance(query, SetParameter): + if async_execution: + raise FireboltError( + "Server side async does not support set statements, " + "please use execute to set this parameter" + ) + await self._validate_set_parameter( + query, timeout_controller.remaining() + ) + else: + # Regular query execution + await self._execute_single_query( + query, + plan.query_params, + timeout_controller, + async_execution, + streaming, + ) async def _execute_single_query( self, - query: Union[SetParameter, str], + query: str, + query_params: Optional[Dict[str, Any]], timeout_controller: TimeoutController, async_execution: bool, streaming: bool, ) -> None: + """Execute a single query.""" start_time = time.time() Cursor._log_query(query) timeout_controller.raise_if_timeout() - if isinstance(query, SetParameter): - if async_execution: - raise FireboltError( - "Server side async does not support set statements, " - "please use execute to set this parameter" - ) - await self._validate_set_parameter(query, timeout_controller.remaining()) - else: - await self._handle_query_execution( - query, timeout_controller, async_execution, streaming - ) + final_params = query_params or {} - if not async_execution: - logger.info( - f"Query fetched {self.rowcount} rows in" - f" {time.time() - start_time} seconds." - ) - else: - logger.info("Query submitted for async execution.") - - async def _handle_query_execution( - self, - query: str, - timeout_controller: TimeoutController, - async_execution: bool, - streaming: bool, - ) -> None: - query_params: Dict[str, Any] = { - "output_format": self._get_output_format(streaming) - } - if async_execution: - query_params["async"] = True resp = await self._api_request( query, - query_params, + final_params, timeout=timeout_controller.remaining(), ) await self._raise_if_error(resp) + if async_execution: await resp.aread() self._parse_async_response(resp) @@ -333,6 +295,14 @@ async def _handle_query_execution( await self._parse_response_headers(resp.headers) await self._append_row_set_from_response(resp) + if not async_execution: + logger.info( + f"Query fetched {self.rowcount} rows in" + f" {time.time() - start_time} seconds." + ) + else: + logger.info("Query submitted for async execution.") + async def use_database(self, database: str, cache: bool = True) -> None: """Switch the current database context with caching.""" if cache: diff --git a/src/firebolt/common/cursor/base_cursor.py b/src/firebolt/common/cursor/base_cursor.py index 381e43a90f..8493788642 100644 --- a/src/firebolt/common/cursor/base_cursor.py +++ b/src/firebolt/common/cursor/base_cursor.py @@ -1,16 +1,15 @@ from __future__ import annotations -import json import logging import re from types import TracebackType -from typing import Any, Dict, List, Optional, Sequence, Tuple, Type, Union +from typing import Any, Dict, List, Optional, Tuple, Type, Union from httpx import URL, Response from firebolt.client.auth.base import Auth from firebolt.client.client import AsyncClient, Client -from firebolt.common._types import ParameterType, RawColType, SetParameter +from firebolt.common._types import RawColType, SetParameter from firebolt.common.constants import ( DISALLOWED_PARAMETER_LIST, IMMUTABLE_PARAMETER_LIST, @@ -238,42 +237,6 @@ def _log_query(query: Union[str, SetParameter]) -> None: ): logger.debug(f"Running query: {query}") - def _build_fb_numeric_query_params( - self, - parameters: Sequence[Sequence[ParameterType]], - streaming: bool, - async_execution: bool, - ) -> Dict[str, Any]: - """ - Build query parameters dictionary for fb_numeric paramstyle. - - Args: - parameters: A sequence of parameter sequences. For fb_numeric, - only the first parameter sequence is used. - streaming: Whether streaming is enabled - async_execution: Whether async execution is enabled - - Returns: - Dictionary of query parameters to send with the request - """ - param_list = parameters[0] if parameters else [] - query_parameters = [ - { - "name": f"${i+1}", - "value": self._formatter.convert_parameter_for_serialization(value), - } - for i, value in enumerate(param_list) - ] - - query_params: Dict[str, Any] = { - "output_format": self._get_output_format(streaming), - } - if query_parameters: - query_params["query_parameters"] = json.dumps(query_parameters) - if async_execution: - query_params["async"] = True - return query_params - @property def engine_name(self) -> str: """ diff --git a/src/firebolt/common/cursor/query_planners.py b/src/firebolt/common/cursor/query_planners.py new file mode 100644 index 0000000000..0736176789 --- /dev/null +++ b/src/firebolt/common/cursor/query_planners.py @@ -0,0 +1,175 @@ +from __future__ import annotations + +"""Query planning handlers for different parameter styles.""" + +import json +from abc import ABC, abstractmethod +from dataclasses import dataclass +from typing import TYPE_CHECKING, Any, Dict, List, Optional, Sequence, Union + +from firebolt.common._types import ParameterType, SetParameter +from firebolt.common.constants import ( + JSON_LINES_OUTPUT_FORMAT, + JSON_OUTPUT_FORMAT, +) +from firebolt.utils.exception import FireboltError, ProgrammingError + +if TYPE_CHECKING: + from firebolt.common.statement_formatter import StatementFormatter + + +@dataclass +class ExecutionPlan: + """Represents a plan for executing queries.""" + + queries: List[Union[SetParameter, str]] + query_params: Optional[Dict[str, Any]] = None + is_multi_statement: bool = False + + +class BaseQueryPlanner(ABC): + """Base class for query planning handlers.""" + + def __init__(self, formatter: StatementFormatter) -> None: + """Initialize query planner with required dependencies.""" + self.formatter = formatter + + @abstractmethod + def create_execution_plan( + self, + raw_query: str, + parameters: Sequence[Sequence[ParameterType]], + skip_parsing: bool = False, + async_execution: bool = False, + streaming: bool = False, + ) -> ExecutionPlan: + """Create an execution plan for the given query and parameters.""" + + @staticmethod + def _get_output_format(streaming: bool) -> str: + """Get output format for the query.""" + if streaming: + return JSON_LINES_OUTPUT_FORMAT + return JSON_OUTPUT_FORMAT + + +class FbNumericQueryPlanner(BaseQueryPlanner): + """Query planner for fb_numeric parameter style.""" + + def create_execution_plan( + self, + raw_query: str, + parameters: Sequence[Sequence[ParameterType]], + skip_parsing: bool = False, + async_execution: bool = False, + streaming: bool = False, + ) -> ExecutionPlan: + """Create execution plan for fb_numeric parameter style.""" + query_params = self._build_fb_numeric_query_params( + parameters, streaming, async_execution + ) + return ExecutionPlan( + queries=[raw_query], query_params=query_params, is_multi_statement=False + ) + + def _build_fb_numeric_query_params( + self, + parameters: Sequence[Sequence[ParameterType]], + streaming: bool, + async_execution: bool, + ) -> Dict[str, Any]: + """Build query parameters for fb_numeric style.""" + param_list = parameters[0] if parameters else [] + query_parameters = [ + { + "name": f"${i+1}", + "value": self.formatter.convert_parameter_for_serialization(value), + } + for i, value in enumerate(param_list) + ] + + query_params: Dict[str, Any] = { + "output_format": self._get_output_format(streaming), + } + if query_parameters: + query_params["query_parameters"] = json.dumps(query_parameters) + if async_execution: + query_params["async"] = True + return query_params + + +class QmarkQueryPlanner(BaseQueryPlanner): + """Query planner for qmark parameter style.""" + + def create_execution_plan( + self, + raw_query: str, + parameters: Sequence[Sequence[ParameterType]], + skip_parsing: bool = False, + async_execution: bool = False, + streaming: bool = False, + ) -> ExecutionPlan: + """Create execution plan for qmark parameter style.""" + queries: List[Union[SetParameter, str]] = ( + [raw_query] + if skip_parsing + else self.formatter.split_format_sql(raw_query, parameters) + ) + + if len(queries) > 1 and async_execution: + raise FireboltError( + "Server side async does not support multi-statement queries" + ) + + # Build basic query parameters for qmark style + query_params: Dict[str, Any] = { + "output_format": self._get_output_format(streaming), + } + if async_execution: + query_params["async"] = True + + return ExecutionPlan( + queries=queries, + query_params=query_params, + is_multi_statement=len(queries) > 1, + ) + + +class QueryPlannerFactory: + """Factory for creating query planner instances based on paramstyle.""" + + _PLANNER_CLASSES = { + "fb_numeric": FbNumericQueryPlanner, + "qmark": QmarkQueryPlanner, + } + + @classmethod + def create_planner( + cls, paramstyle: str, formatter: StatementFormatter + ) -> BaseQueryPlanner: + """Create a query planner instance for the given paramstyle. + + Args: + paramstyle: The parameter style ('fb_numeric' or 'qmark') + formatter: StatementFormatter instance for query processing + + Returns: + Appropriate query planner instance + + Raises: + ProgrammingError: If paramstyle is not supported + """ + planner_class = cls._PLANNER_CLASSES.get(paramstyle) + if planner_class is None: + raise ProgrammingError(f"Unsupported paramstyle: {paramstyle}") + + return planner_class(formatter) + + @classmethod + def get_supported_paramstyles(cls) -> List[str]: + """Get list of supported parameter styles. + + Returns: + List of supported paramstyle strings + """ + return list(cls._PLANNER_CLASSES.keys()) diff --git a/src/firebolt/db/cursor.py b/src/firebolt/db/cursor.py index b8e1be9784..245a9816bc 100644 --- a/src/firebolt/db/cursor.py +++ b/src/firebolt/db/cursor.py @@ -33,7 +33,6 @@ UPDATE_ENDPOINT_HEADER, UPDATE_PARAMETERS_HEADER, CursorState, - ParameterStyle, ) from firebolt.common.cursor.base_cursor import ( BaseCursor, @@ -46,6 +45,10 @@ check_not_closed, check_query_executed, ) +from firebolt.common.cursor.query_planners import ( + ExecutionPlan, + QueryPlannerFactory, +) from firebolt.common.row_set.synchronous.base import BaseSyncRowSet from firebolt.common.row_set.synchronous.in_memory import InMemoryRowSet from firebolt.common.row_set.synchronous.streaming import StreamingRowSet @@ -228,110 +231,67 @@ def _do_execute( from firebolt.db import paramstyle try: - parameter_style = ParameterStyle(paramstyle) - except ValueError: - raise ProgrammingError(f"Unsupported paramstyle: {paramstyle}") - try: - if parameter_style == ParameterStyle.FB_NUMERIC: - self._execute_fb_numeric( - raw_query, parameters, timeout, async_execution, streaming - ) - else: - queries: List[Union[SetParameter, str]] = ( - [raw_query] - if skip_parsing - else self._formatter.split_format_sql(raw_query, parameters) - ) - timeout_controller = TimeoutController(timeout) - if len(queries) > 1 and async_execution: - raise FireboltError( - "Server side async does not support multi-statement queries" - ) - for query in queries: - self._execute_single_query( - query, timeout_controller, async_execution, streaming - ) + planner = QueryPlannerFactory.create_planner(paramstyle, self._formatter) + + plan = planner.create_execution_plan( + raw_query, parameters, skip_parsing, async_execution, streaming + ) + self._execute_plan(plan, timeout, async_execution, streaming) self._state = CursorState.DONE except Exception: self._state = CursorState.ERROR raise - def _execute_fb_numeric( + def _execute_plan( self, - query: str, - parameters: Sequence[Sequence[ParameterType]], + plan: ExecutionPlan, timeout: Optional[float], async_execution: bool, streaming: bool, ) -> None: - Cursor._log_query(query) + """Execute an execution plan.""" timeout_controller = TimeoutController(timeout) - timeout_controller.raise_if_timeout() - query_params = self._build_fb_numeric_query_params( - parameters, streaming, async_execution - ) - resp = self._api_request( - query, - query_params, - timeout=timeout_controller.remaining(), - ) - self._raise_if_error(resp) - if async_execution: - resp.read() - self._parse_async_response(resp) - else: - self._parse_response_headers(resp.headers) - self._append_row_set_from_response(resp) + + for query in plan.queries: + if isinstance(query, SetParameter): + if async_execution: + raise FireboltError( + "Server side async does not support set statements, " + "please use execute to set this parameter" + ) + self._validate_set_parameter(query, timeout_controller.remaining()) + else: + # Regular query execution + self._execute_single_query( + query, + plan.query_params, + timeout_controller, + async_execution, + streaming, + ) def _execute_single_query( self, - query: Union[SetParameter, str], + query: str, + query_params: Optional[Dict[str, Any]], timeout_controller: TimeoutController, async_execution: bool, streaming: bool, ) -> None: + """Execute a single query.""" start_time = time.time() Cursor._log_query(query) timeout_controller.raise_if_timeout() - if isinstance(query, SetParameter): - if async_execution: - raise FireboltError( - "Server side async does not support set statements, " - "please use execute to set this parameter" - ) - self._validate_set_parameter(query, timeout_controller.remaining()) - else: - self._handle_query_execution( - query, timeout_controller, async_execution, streaming - ) + final_params = query_params or {} - if not async_execution: - logger.info( - f"Query fetched {self.rowcount} rows in" - f" {time.time() - start_time} seconds." - ) - else: - logger.info("Query submitted for async execution.") - - def _handle_query_execution( - self, - query: str, - timeout_controller: TimeoutController, - async_execution: bool, - streaming: bool, - ) -> None: - query_params: Dict[str, Any] = { - "output_format": self._get_output_format(streaming) - } - if async_execution: - query_params["async"] = True resp = self._api_request( query, - query_params, + final_params, timeout=timeout_controller.remaining(), ) self._raise_if_error(resp) + if async_execution: resp.read() self._parse_async_response(resp) @@ -339,6 +299,14 @@ def _handle_query_execution( self._parse_response_headers(resp.headers) self._append_row_set_from_response(resp) + if not async_execution: + logger.info( + f"Query fetched {self.rowcount} rows in" + f" {time.time() - start_time} seconds." + ) + else: + logger.info("Query submitted for async execution.") + def use_database(self, database: str, cache: bool = True) -> None: """Switch the current database context with caching.""" if cache: diff --git a/tests/unit/common/cursor/test_query_planners.py b/tests/unit/common/cursor/test_query_planners.py new file mode 100644 index 0000000000..c90510c03b --- /dev/null +++ b/tests/unit/common/cursor/test_query_planners.py @@ -0,0 +1,374 @@ +"""Unit tests for query planners using plain functions and fixtures.""" + +import json +from unittest.mock import Mock + +import pytest + +from firebolt.common._types import SetParameter +from firebolt.common.constants import ( + JSON_LINES_OUTPUT_FORMAT, + JSON_OUTPUT_FORMAT, +) +from firebolt.common.cursor.query_planners import ( + BaseQueryPlanner, + ExecutionPlan, + FbNumericQueryPlanner, + QmarkQueryPlanner, + QueryPlannerFactory, +) +from firebolt.common.statement_formatter import create_statement_formatter +from firebolt.utils.exception import FireboltError, ProgrammingError + + +# Fixtures +@pytest.fixture +def formatter(): + """Create statement formatter for tests.""" + return create_statement_formatter(version=1) + + +@pytest.fixture +def fb_numeric_planner(formatter): + """Create FbNumericQueryPlanner for tests.""" + return FbNumericQueryPlanner(formatter) + + +@pytest.fixture +def qmark_planner(formatter): + """Create QmarkQueryPlanner for tests.""" + return QmarkQueryPlanner(formatter) + + +# ExecutionPlan tests +def test_execution_plan_creation(): + """Test basic ExecutionPlan creation.""" + plan = ExecutionPlan( + queries=["SELECT 1"], + query_params={"output_format": "JSON_Compact"}, + is_multi_statement=False, + ) + assert plan.queries == ["SELECT 1"] + assert plan.query_params == {"output_format": "JSON_Compact"} + assert plan.is_multi_statement is False + + +def test_execution_plan_defaults(): + """Test ExecutionPlan default values.""" + plan = ExecutionPlan(queries=["SELECT 1"]) + assert plan.queries == ["SELECT 1"] + assert plan.query_params is None + assert plan.is_multi_statement is False + + +# BaseQueryPlanner tests +@pytest.mark.parametrize( + "streaming,expected_format", + [ + (True, JSON_LINES_OUTPUT_FORMAT), + (False, JSON_OUTPUT_FORMAT), + ], +) +def test_get_output_format(streaming, expected_format): + """Test output format selection.""" + assert BaseQueryPlanner._get_output_format(streaming) == expected_format + + +# FbNumericQueryPlanner tests +def test_fb_numeric_planner_initialization(formatter): + """Test planner initialization.""" + planner = FbNumericQueryPlanner(formatter) + assert planner.formatter == formatter + + +def test_fb_numeric_basic_execution_plan(fb_numeric_planner): + """Test basic execution plan creation.""" + plan = fb_numeric_planner.create_execution_plan("SELECT $1", [[42]]) + + assert len(plan.queries) == 1 + assert plan.queries[0] == "SELECT $1" + assert plan.is_multi_statement is False + assert plan.query_params is not None + assert "output_format" in plan.query_params + assert plan.query_params["output_format"] == JSON_OUTPUT_FORMAT + + +def test_fb_numeric_execution_plan_with_parameters(fb_numeric_planner): + """Test execution plan with parameters.""" + parameters = [[42, "test", True]] + plan = fb_numeric_planner.create_execution_plan("SELECT $1, $2, $3", parameters) + + assert plan.query_params is not None + assert "query_parameters" in plan.query_params + + # Parse the JSON to verify structure + query_params = json.loads(plan.query_params["query_parameters"]) + assert len(query_params) == 3 + assert query_params[0] == {"name": "$1", "value": 42} + assert query_params[1] == {"name": "$2", "value": "test"} + assert query_params[2] == {"name": "$3", "value": True} + + +def test_fb_numeric_execution_plan_no_parameters(fb_numeric_planner): + """Test execution plan without parameters.""" + plan = fb_numeric_planner.create_execution_plan("SELECT 1", []) + + assert plan.query_params is not None + assert "query_parameters" not in plan.query_params + assert plan.query_params["output_format"] == JSON_OUTPUT_FORMAT + + +@pytest.mark.parametrize( + "streaming,async_execution,expected_format,expected_async", + [ + (True, False, JSON_LINES_OUTPUT_FORMAT, None), + (False, True, JSON_OUTPUT_FORMAT, True), + (True, True, JSON_LINES_OUTPUT_FORMAT, True), + (False, False, JSON_OUTPUT_FORMAT, None), + ], +) +def test_fb_numeric_execution_plan_options( + fb_numeric_planner, streaming, async_execution, expected_format, expected_async +): + """Test execution plan with various streaming and async options.""" + plan = fb_numeric_planner.create_execution_plan( + "SELECT $1", [[42]], streaming=streaming, async_execution=async_execution + ) + + assert plan.query_params["output_format"] == expected_format + if expected_async: + assert plan.query_params["async"] is True + else: + assert "async" not in plan.query_params + + +def test_fb_numeric_execution_plan_skip_parsing_ignored(fb_numeric_planner): + """Test that skip_parsing is ignored for fb_numeric style.""" + plan = fb_numeric_planner.create_execution_plan( + "SELECT $1", [[42]], skip_parsing=True + ) + + # skip_parsing should be ignored for fb_numeric + assert len(plan.queries) == 1 + assert plan.queries[0] == "SELECT $1" + assert plan.is_multi_statement is False + + +def test_fb_numeric_complex_parameters(fb_numeric_planner): + """Test execution plan with complex parameter types.""" + parameters = [[42, "string", True, None, 3.14, [1, 2, 3]]] + plan = fb_numeric_planner.create_execution_plan( + "SELECT $1, $2, $3, $4, $5, $6", parameters + ) + + query_params = json.loads(plan.query_params["query_parameters"]) + assert len(query_params) == 6 + assert query_params[0]["value"] == 42 + assert query_params[1]["value"] == "string" + assert query_params[2]["value"] is True + assert query_params[3]["value"] is None + assert abs(query_params[4]["value"] - 3.14) < 0.001 + + +# QmarkQueryPlanner tests +def test_qmark_planner_initialization(formatter): + """Test planner initialization.""" + planner = QmarkQueryPlanner(formatter) + assert planner.formatter == formatter + + +def test_qmark_planner_initialization(formatter): + """Test planner initialization.""" + planner = QmarkQueryPlanner(formatter) + assert planner.formatter == formatter + + +def test_qmark_basic_execution_plan(qmark_planner): + """Test basic execution plan creation.""" + plan = qmark_planner.create_execution_plan("SELECT ?", [[42]]) + + assert len(plan.queries) >= 1 # Could be split by formatter + assert plan.query_params is not None + assert "output_format" in plan.query_params + assert plan.query_params["output_format"] == JSON_OUTPUT_FORMAT + + +def test_qmark_execution_plan_skip_parsing(qmark_planner): + """Test execution plan with skip_parsing enabled.""" + plan = qmark_planner.create_execution_plan( + "SELECT ?; SELECT ?", [[42]], skip_parsing=True + ) + + # With skip_parsing, should not split the query + assert len(plan.queries) == 1 + assert plan.queries[0] == "SELECT ?; SELECT ?" + assert plan.is_multi_statement is False + + +@pytest.mark.parametrize( + "streaming,async_execution,expected_format,expected_async", + [ + (True, False, JSON_LINES_OUTPUT_FORMAT, None), + (False, True, JSON_OUTPUT_FORMAT, True), + (True, True, JSON_LINES_OUTPUT_FORMAT, True), + (False, False, JSON_OUTPUT_FORMAT, None), + ], +) +def test_qmark_execution_plan_options( + qmark_planner, streaming, async_execution, expected_format, expected_async +): + """Test execution plan with various streaming and async options.""" + plan = qmark_planner.create_execution_plan( + "SELECT ?", [[42]], streaming=streaming, async_execution=async_execution + ) + + assert plan.query_params["output_format"] == expected_format + if expected_async: + assert plan.query_params["async"] is True + else: + assert "async" not in plan.query_params + + +def test_qmark_multi_statement_async_error(qmark_planner): + """Test that multi-statement queries with async raise error.""" + # Mock the formatter to return multiple queries + qmark_planner.formatter.split_format_sql = Mock( + return_value=["SELECT 1", "SELECT 2"] + ) + + with pytest.raises( + FireboltError, + match="Server side async does not support multi-statement queries", + ): + qmark_planner.create_execution_plan( + "SELECT 1; SELECT 2", [[]], async_execution=True + ) + + +@pytest.mark.parametrize( + "queries,expected_multi", + [ + (["SELECT 1"], False), + (["SELECT 1", "SELECT 2"], True), + ], +) +def test_qmark_multi_statement_detection(qmark_planner, queries, expected_multi): + """Test multi-statement detection.""" + # Mock the formatter to return queries + qmark_planner.formatter.split_format_sql = Mock(return_value=queries) + + plan = qmark_planner.create_execution_plan("SELECT 1; SELECT 2", [[]]) + + assert plan.is_multi_statement is expected_multi + assert len(plan.queries) == len(queries) + + +def test_qmark_set_parameter_handling(qmark_planner): + """Test handling of SET parameters.""" + set_param = SetParameter("test_param", "test_value") + qmark_planner.formatter.split_format_sql = Mock( + return_value=[set_param, "SELECT 1"] + ) + + plan = qmark_planner.create_execution_plan( + "SET test_param = test_value; SELECT 1", [[]] + ) + + assert plan.is_multi_statement is True + assert len(plan.queries) == 2 + assert isinstance(plan.queries[0], SetParameter) + assert plan.queries[1] == "SELECT 1" + + +# QueryPlannerFactory tests +@pytest.mark.parametrize( + "paramstyle,expected_class", + [ + ("fb_numeric", FbNumericQueryPlanner), + ("qmark", QmarkQueryPlanner), + ], +) +def test_query_planner_factory_creates_correct_planners( + formatter, paramstyle, expected_class +): + """Test that factory creates correct planner types.""" + planner = QueryPlannerFactory.create_planner(paramstyle, formatter) + + assert isinstance(planner, expected_class) + assert planner.formatter == formatter + + +def test_query_planner_factory_unsupported_paramstyle(formatter): + """Test error for unsupported paramstyle.""" + with pytest.raises(ProgrammingError, match="Unsupported paramstyle: unsupported"): + QueryPlannerFactory.create_planner("unsupported", formatter) + + +def test_query_planner_factory_get_supported_paramstyles(): + """Test getting supported parameter styles.""" + supported = QueryPlannerFactory.get_supported_paramstyles() + + assert isinstance(supported, list) + assert "fb_numeric" in supported + assert "qmark" in supported + assert len(supported) == 2 + + +# Edge cases and error conditions +def test_fb_numeric_empty_parameters_list(formatter): + """Test fb_numeric with empty parameters list.""" + planner = FbNumericQueryPlanner(formatter) + plan = planner.create_execution_plan("SELECT 1", []) + + assert "query_parameters" not in plan.query_params + assert plan.query_params["output_format"] == JSON_OUTPUT_FORMAT + + +def test_fb_numeric_none_parameters(formatter): + """Test fb_numeric with None in parameters.""" + planner = FbNumericQueryPlanner(formatter) + plan = planner.create_execution_plan("SELECT $1, $2", [[None, "test"]]) + + query_params = json.loads(plan.query_params["query_parameters"]) + assert query_params[0]["value"] is None + assert query_params[1]["value"] == "test" + + +def test_qmark_empty_query(formatter): + """Test qmark with empty query.""" + planner = QmarkQueryPlanner(formatter) + plan = planner.create_execution_plan("", [[]]) + + assert plan.query_params is not None + assert "output_format" in plan.query_params + + +@pytest.mark.parametrize("invalid_paramstyle", ["FB_NUMERIC", "QMARK", "invalid"]) +def test_factory_case_sensitivity(formatter, invalid_paramstyle): + """Test factory case sensitivity.""" + with pytest.raises(ProgrammingError): + QueryPlannerFactory.create_planner(invalid_paramstyle, formatter) + + +def test_execution_plan_immutability(): + """Test that ExecutionPlan behaves correctly as a dataclass.""" + plan = ExecutionPlan(queries=["SELECT 1"]) + + # Test that we can create multiple instances + plan2 = ExecutionPlan(queries=["SELECT 2"], is_multi_statement=True) + + assert plan.queries != plan2.queries + assert plan.is_multi_statement != plan2.is_multi_statement + + +def test_supported_paramstyles_immutability(): + """Test that get_supported_paramstyles returns a copy.""" + styles1 = QueryPlannerFactory.get_supported_paramstyles() + styles2 = QueryPlannerFactory.get_supported_paramstyles() + + # Modify one list + styles1.append("test") + + # Should not affect the other + assert "test" not in styles2 + assert len(styles2) == 2 From 72969a5590fcf0a18d7ba94f3b00cbc4f77c1f96 Mon Sep 17 00:00:00 2001 From: ptiurin Date: Tue, 14 Oct 2025 10:41:29 +0100 Subject: [PATCH 02/21] fix test --- .../resource_manager/V2/test_engine.py | 30 ++++++++++--------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/tests/integration/resource_manager/V2/test_engine.py b/tests/integration/resource_manager/V2/test_engine.py index 96f43bd213..3b21ff83e5 100644 --- a/tests/integration/resource_manager/V2/test_engine.py +++ b/tests/integration/resource_manager/V2/test_engine.py @@ -27,29 +27,31 @@ def test_create_start_stop_engine( auto_stop=120, ) assert engine.name == name + database = None try: database = rm.databases.create(name=name) assert database.name == name - try: - engine.attach_to_database(database) - assert engine.database == database + engine.attach_to_database(database) + assert engine.database == database - engine.start() - assert engine.current_status == EngineStatus.RUNNING + engine.start() + assert engine.current_status == EngineStatus.RUNNING - engine.stop() - assert engine.current_status in { - EngineStatus.STOPPING, - EngineStatus.STOPPED, - } - finally: - database.delete() + engine.stop() + assert engine.current_status in { + EngineStatus.STOPPING, + EngineStatus.STOPPED, + } finally: - engine.stop() - engine.delete() + # Engine needs to be deleted first + if engine: + engine.stop() + engine.delete() + if database: + database.delete() ParamValue = namedtuple("ParamValue", "set expected") From 7979917900579716cddd75cc693cd5a7c4c5c5a3 Mon Sep 17 00:00:00 2001 From: ptiurin Date: Tue, 14 Oct 2025 13:15:58 +0100 Subject: [PATCH 03/21] remove unused method --- src/firebolt/common/cursor/query_planners.py | 9 -------- .../unit/common/cursor/test_query_planners.py | 23 ------------------- 2 files changed, 32 deletions(-) diff --git a/src/firebolt/common/cursor/query_planners.py b/src/firebolt/common/cursor/query_planners.py index 0736176789..57505ba694 100644 --- a/src/firebolt/common/cursor/query_planners.py +++ b/src/firebolt/common/cursor/query_planners.py @@ -164,12 +164,3 @@ def create_planner( raise ProgrammingError(f"Unsupported paramstyle: {paramstyle}") return planner_class(formatter) - - @classmethod - def get_supported_paramstyles(cls) -> List[str]: - """Get list of supported parameter styles. - - Returns: - List of supported paramstyle strings - """ - return list(cls._PLANNER_CLASSES.keys()) diff --git a/tests/unit/common/cursor/test_query_planners.py b/tests/unit/common/cursor/test_query_planners.py index c90510c03b..f8968ff439 100644 --- a/tests/unit/common/cursor/test_query_planners.py +++ b/tests/unit/common/cursor/test_query_planners.py @@ -304,16 +304,6 @@ def test_query_planner_factory_unsupported_paramstyle(formatter): QueryPlannerFactory.create_planner("unsupported", formatter) -def test_query_planner_factory_get_supported_paramstyles(): - """Test getting supported parameter styles.""" - supported = QueryPlannerFactory.get_supported_paramstyles() - - assert isinstance(supported, list) - assert "fb_numeric" in supported - assert "qmark" in supported - assert len(supported) == 2 - - # Edge cases and error conditions def test_fb_numeric_empty_parameters_list(formatter): """Test fb_numeric with empty parameters list.""" @@ -359,16 +349,3 @@ def test_execution_plan_immutability(): assert plan.queries != plan2.queries assert plan.is_multi_statement != plan2.is_multi_statement - - -def test_supported_paramstyles_immutability(): - """Test that get_supported_paramstyles returns a copy.""" - styles1 = QueryPlannerFactory.get_supported_paramstyles() - styles2 = QueryPlannerFactory.get_supported_paramstyles() - - # Modify one list - styles1.append("test") - - # Should not affect the other - assert "test" not in styles2 - assert len(styles2) == 2 From bf217d9fde9a1f036887f3ae8ea0fa8a95699ed7 Mon Sep 17 00:00:00 2001 From: ptiurin Date: Tue, 14 Oct 2025 13:28:33 +0100 Subject: [PATCH 04/21] renames --- src/firebolt/async_db/cursor.py | 10 ++-- ...uery_planners.py => statement_planners.py} | 34 +++++------ src/firebolt/db/cursor.py | 10 ++-- ...planners.py => test_statement_planners.py} | 56 +++++++++---------- 4 files changed, 57 insertions(+), 53 deletions(-) rename src/firebolt/common/cursor/{query_planners.py => statement_planners.py} (82%) rename tests/unit/common/cursor/{test_query_planners.py => test_statement_planners.py} (88%) diff --git a/src/firebolt/async_db/cursor.py b/src/firebolt/async_db/cursor.py index 29d013181a..541124e89d 100644 --- a/src/firebolt/async_db/cursor.py +++ b/src/firebolt/async_db/cursor.py @@ -37,9 +37,9 @@ check_not_closed, check_query_executed, ) -from firebolt.common.cursor.query_planners import ( +from firebolt.common.cursor.statement_planners import ( ExecutionPlan, - QueryPlannerFactory, + StatementPlannerFactory, ) from firebolt.common.row_set.asynchronous.base import BaseAsyncRowSet from firebolt.common.row_set.asynchronous.in_memory import InMemoryAsyncRowSet @@ -225,9 +225,11 @@ async def _do_execute( from firebolt.async_db import paramstyle try: - planner = QueryPlannerFactory.create_planner(paramstyle, self._formatter) + statement_planner = StatementPlannerFactory.create_planner( + paramstyle, self._formatter + ) - plan = planner.create_execution_plan( + plan = statement_planner.create_execution_plan( raw_query, parameters, skip_parsing, async_execution, streaming ) await self._execute_plan(plan, timeout, async_execution, streaming) diff --git a/src/firebolt/common/cursor/query_planners.py b/src/firebolt/common/cursor/statement_planners.py similarity index 82% rename from src/firebolt/common/cursor/query_planners.py rename to src/firebolt/common/cursor/statement_planners.py index 57505ba694..050b0d4468 100644 --- a/src/firebolt/common/cursor/query_planners.py +++ b/src/firebolt/common/cursor/statement_planners.py @@ -1,6 +1,6 @@ from __future__ import annotations -"""Query planning handlers for different parameter styles.""" +"""Statement planning handlers for different parameter styles.""" import json from abc import ABC, abstractmethod @@ -27,11 +27,11 @@ class ExecutionPlan: is_multi_statement: bool = False -class BaseQueryPlanner(ABC): - """Base class for query planning handlers.""" +class BaseStatementPlanner(ABC): + """Base class for statement planning handlers.""" def __init__(self, formatter: StatementFormatter) -> None: - """Initialize query planner with required dependencies.""" + """Initialize statement planner with required dependencies.""" self.formatter = formatter @abstractmethod @@ -43,7 +43,7 @@ def create_execution_plan( async_execution: bool = False, streaming: bool = False, ) -> ExecutionPlan: - """Create an execution plan for the given query and parameters.""" + """Create an execution plan for the given statement and parameters.""" @staticmethod def _get_output_format(streaming: bool) -> str: @@ -53,8 +53,8 @@ def _get_output_format(streaming: bool) -> str: return JSON_OUTPUT_FORMAT -class FbNumericQueryPlanner(BaseQueryPlanner): - """Query planner for fb_numeric parameter style.""" +class FbNumericStatementPlanner(BaseStatementPlanner): + """Statement planner for fb_numeric parameter style.""" def create_execution_plan( self, @@ -98,8 +98,8 @@ def _build_fb_numeric_query_params( return query_params -class QmarkQueryPlanner(BaseQueryPlanner): - """Query planner for qmark parameter style.""" +class QmarkStatementPlanner(BaseStatementPlanner): + """Statement planner for qmark parameter style.""" def create_execution_plan( self, @@ -135,26 +135,26 @@ def create_execution_plan( ) -class QueryPlannerFactory: - """Factory for creating query planner instances based on paramstyle.""" +class StatementPlannerFactory: + """Factory for creating statement planner instances based on paramstyle.""" _PLANNER_CLASSES = { - "fb_numeric": FbNumericQueryPlanner, - "qmark": QmarkQueryPlanner, + "fb_numeric": FbNumericStatementPlanner, + "qmark": QmarkStatementPlanner, } @classmethod def create_planner( cls, paramstyle: str, formatter: StatementFormatter - ) -> BaseQueryPlanner: - """Create a query planner instance for the given paramstyle. + ) -> BaseStatementPlanner: + """Create a statement planner instance for the given paramstyle. Args: paramstyle: The parameter style ('fb_numeric' or 'qmark') - formatter: StatementFormatter instance for query processing + formatter: StatementFormatter instance for statement processing Returns: - Appropriate query planner instance + Appropriate statement planner instance Raises: ProgrammingError: If paramstyle is not supported diff --git a/src/firebolt/db/cursor.py b/src/firebolt/db/cursor.py index 245a9816bc..5f464b7fef 100644 --- a/src/firebolt/db/cursor.py +++ b/src/firebolt/db/cursor.py @@ -45,9 +45,9 @@ check_not_closed, check_query_executed, ) -from firebolt.common.cursor.query_planners import ( +from firebolt.common.cursor.statement_planners import ( ExecutionPlan, - QueryPlannerFactory, + StatementPlannerFactory, ) from firebolt.common.row_set.synchronous.base import BaseSyncRowSet from firebolt.common.row_set.synchronous.in_memory import InMemoryRowSet @@ -231,9 +231,11 @@ def _do_execute( from firebolt.db import paramstyle try: - planner = QueryPlannerFactory.create_planner(paramstyle, self._formatter) + statement_planner = StatementPlannerFactory.create_planner( + paramstyle, self._formatter + ) - plan = planner.create_execution_plan( + plan = statement_planner.create_execution_plan( raw_query, parameters, skip_parsing, async_execution, streaming ) self._execute_plan(plan, timeout, async_execution, streaming) diff --git a/tests/unit/common/cursor/test_query_planners.py b/tests/unit/common/cursor/test_statement_planners.py similarity index 88% rename from tests/unit/common/cursor/test_query_planners.py rename to tests/unit/common/cursor/test_statement_planners.py index f8968ff439..368fd51c6d 100644 --- a/tests/unit/common/cursor/test_query_planners.py +++ b/tests/unit/common/cursor/test_statement_planners.py @@ -1,4 +1,4 @@ -"""Unit tests for query planners using plain functions and fixtures.""" +"""Unit tests for statement planners using plain functions and fixtures.""" import json from unittest.mock import Mock @@ -10,12 +10,12 @@ JSON_LINES_OUTPUT_FORMAT, JSON_OUTPUT_FORMAT, ) -from firebolt.common.cursor.query_planners import ( - BaseQueryPlanner, +from firebolt.common.cursor.statement_planners import ( + BaseStatementPlanner, ExecutionPlan, - FbNumericQueryPlanner, - QmarkQueryPlanner, - QueryPlannerFactory, + FbNumericStatementPlanner, + QmarkStatementPlanner, + StatementPlannerFactory, ) from firebolt.common.statement_formatter import create_statement_formatter from firebolt.utils.exception import FireboltError, ProgrammingError @@ -30,14 +30,14 @@ def formatter(): @pytest.fixture def fb_numeric_planner(formatter): - """Create FbNumericQueryPlanner for tests.""" - return FbNumericQueryPlanner(formatter) + """Create FbNumericStatementPlanner for tests.""" + return FbNumericStatementPlanner(formatter) @pytest.fixture def qmark_planner(formatter): - """Create QmarkQueryPlanner for tests.""" - return QmarkQueryPlanner(formatter) + """Create QmarkStatementPlanner for tests.""" + return QmarkStatementPlanner(formatter) # ExecutionPlan tests @@ -61,7 +61,7 @@ def test_execution_plan_defaults(): assert plan.is_multi_statement is False -# BaseQueryPlanner tests +# BaseStatementPlanner tests @pytest.mark.parametrize( "streaming,expected_format", [ @@ -71,13 +71,13 @@ def test_execution_plan_defaults(): ) def test_get_output_format(streaming, expected_format): """Test output format selection.""" - assert BaseQueryPlanner._get_output_format(streaming) == expected_format + assert BaseStatementPlanner._get_output_format(streaming) == expected_format -# FbNumericQueryPlanner tests +# FbNumericStatementPlanner tests def test_fb_numeric_planner_initialization(formatter): """Test planner initialization.""" - planner = FbNumericQueryPlanner(formatter) + planner = FbNumericStatementPlanner(formatter) assert planner.formatter == formatter @@ -170,16 +170,16 @@ def test_fb_numeric_complex_parameters(fb_numeric_planner): assert abs(query_params[4]["value"] - 3.14) < 0.001 -# QmarkQueryPlanner tests +# QmarkStatementPlanner tests def test_qmark_planner_initialization(formatter): """Test planner initialization.""" - planner = QmarkQueryPlanner(formatter) + planner = QmarkStatementPlanner(formatter) assert planner.formatter == formatter def test_qmark_planner_initialization(formatter): """Test planner initialization.""" - planner = QmarkQueryPlanner(formatter) + planner = QmarkStatementPlanner(formatter) assert planner.formatter == formatter @@ -280,34 +280,34 @@ def test_qmark_set_parameter_handling(qmark_planner): assert plan.queries[1] == "SELECT 1" -# QueryPlannerFactory tests +# StatementPlannerFactory tests @pytest.mark.parametrize( "paramstyle,expected_class", [ - ("fb_numeric", FbNumericQueryPlanner), - ("qmark", QmarkQueryPlanner), + ("fb_numeric", FbNumericStatementPlanner), + ("qmark", QmarkStatementPlanner), ], ) -def test_query_planner_factory_creates_correct_planners( +def test_statement_planner_factory_creates_correct_planners( formatter, paramstyle, expected_class ): """Test that factory creates correct planner types.""" - planner = QueryPlannerFactory.create_planner(paramstyle, formatter) + planner = StatementPlannerFactory.create_planner(paramstyle, formatter) assert isinstance(planner, expected_class) assert planner.formatter == formatter -def test_query_planner_factory_unsupported_paramstyle(formatter): +def test_statement_planner_factory_unsupported_paramstyle(formatter): """Test error for unsupported paramstyle.""" with pytest.raises(ProgrammingError, match="Unsupported paramstyle: unsupported"): - QueryPlannerFactory.create_planner("unsupported", formatter) + StatementPlannerFactory.create_planner("unsupported", formatter) # Edge cases and error conditions def test_fb_numeric_empty_parameters_list(formatter): """Test fb_numeric with empty parameters list.""" - planner = FbNumericQueryPlanner(formatter) + planner = FbNumericStatementPlanner(formatter) plan = planner.create_execution_plan("SELECT 1", []) assert "query_parameters" not in plan.query_params @@ -316,7 +316,7 @@ def test_fb_numeric_empty_parameters_list(formatter): def test_fb_numeric_none_parameters(formatter): """Test fb_numeric with None in parameters.""" - planner = FbNumericQueryPlanner(formatter) + planner = FbNumericStatementPlanner(formatter) plan = planner.create_execution_plan("SELECT $1, $2", [[None, "test"]]) query_params = json.loads(plan.query_params["query_parameters"]) @@ -326,7 +326,7 @@ def test_fb_numeric_none_parameters(formatter): def test_qmark_empty_query(formatter): """Test qmark with empty query.""" - planner = QmarkQueryPlanner(formatter) + planner = QmarkStatementPlanner(formatter) plan = planner.create_execution_plan("", [[]]) assert plan.query_params is not None @@ -337,7 +337,7 @@ def test_qmark_empty_query(formatter): def test_factory_case_sensitivity(formatter, invalid_paramstyle): """Test factory case sensitivity.""" with pytest.raises(ProgrammingError): - QueryPlannerFactory.create_planner(invalid_paramstyle, formatter) + StatementPlannerFactory.create_planner(invalid_paramstyle, formatter) def test_execution_plan_immutability(): From 4ca3af312a8e5c14cdb9a577385dac324f10cb33 Mon Sep 17 00:00:00 2001 From: ptiurin Date: Tue, 14 Oct 2025 13:45:16 +0100 Subject: [PATCH 05/21] refactor plan to store async and streaming --- src/firebolt/async_db/cursor.py | 10 ++++------ src/firebolt/common/cursor/statement_planners.py | 10 +++++++++- src/firebolt/db/cursor.py | 10 ++++------ 3 files changed, 17 insertions(+), 13 deletions(-) diff --git a/src/firebolt/async_db/cursor.py b/src/firebolt/async_db/cursor.py index 541124e89d..a03b1eecca 100644 --- a/src/firebolt/async_db/cursor.py +++ b/src/firebolt/async_db/cursor.py @@ -232,7 +232,7 @@ async def _do_execute( plan = statement_planner.create_execution_plan( raw_query, parameters, skip_parsing, async_execution, streaming ) - await self._execute_plan(plan, timeout, async_execution, streaming) + await self._execute_plan(plan, timeout) self._state = CursorState.DONE except Exception: self._state = CursorState.ERROR @@ -242,15 +242,13 @@ async def _execute_plan( self, plan: ExecutionPlan, timeout: Optional[float], - async_execution: bool, - streaming: bool, ) -> None: """Execute an execution plan.""" timeout_controller = TimeoutController(timeout) for query in plan.queries: if isinstance(query, SetParameter): - if async_execution: + if plan.async_execution: raise FireboltError( "Server side async does not support set statements, " "please use execute to set this parameter" @@ -264,8 +262,8 @@ async def _execute_plan( query, plan.query_params, timeout_controller, - async_execution, - streaming, + plan.async_execution, + plan.streaming, ) async def _execute_single_query( diff --git a/src/firebolt/common/cursor/statement_planners.py b/src/firebolt/common/cursor/statement_planners.py index 050b0d4468..a24d5f92b2 100644 --- a/src/firebolt/common/cursor/statement_planners.py +++ b/src/firebolt/common/cursor/statement_planners.py @@ -25,6 +25,8 @@ class ExecutionPlan: queries: List[Union[SetParameter, str]] query_params: Optional[Dict[str, Any]] = None is_multi_statement: bool = False + async_execution: bool = False + streaming: bool = False class BaseStatementPlanner(ABC): @@ -69,7 +71,11 @@ def create_execution_plan( parameters, streaming, async_execution ) return ExecutionPlan( - queries=[raw_query], query_params=query_params, is_multi_statement=False + queries=[raw_query], + query_params=query_params, + is_multi_statement=False, + async_execution=async_execution, + streaming=streaming, ) def _build_fb_numeric_query_params( @@ -132,6 +138,8 @@ def create_execution_plan( queries=queries, query_params=query_params, is_multi_statement=len(queries) > 1, + async_execution=async_execution, + streaming=streaming, ) diff --git a/src/firebolt/db/cursor.py b/src/firebolt/db/cursor.py index 5f464b7fef..0d6deab171 100644 --- a/src/firebolt/db/cursor.py +++ b/src/firebolt/db/cursor.py @@ -238,7 +238,7 @@ def _do_execute( plan = statement_planner.create_execution_plan( raw_query, parameters, skip_parsing, async_execution, streaming ) - self._execute_plan(plan, timeout, async_execution, streaming) + self._execute_plan(plan, timeout) self._state = CursorState.DONE except Exception: self._state = CursorState.ERROR @@ -248,15 +248,13 @@ def _execute_plan( self, plan: ExecutionPlan, timeout: Optional[float], - async_execution: bool, - streaming: bool, ) -> None: """Execute an execution plan.""" timeout_controller = TimeoutController(timeout) for query in plan.queries: if isinstance(query, SetParameter): - if async_execution: + if plan.async_execution: raise FireboltError( "Server side async does not support set statements, " "please use execute to set this parameter" @@ -268,8 +266,8 @@ def _execute_plan( query, plan.query_params, timeout_controller, - async_execution, - streaming, + plan.async_execution, + plan.streaming, ) def _execute_single_query( From 967e28a1d493f8266545bbdd9bd731266293df2d Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Thu, 9 Oct 2025 09:53:20 +0000 Subject: [PATCH 06/21] feat: add bulk_insert parameter to executemany for improved INSERT performance - Add bulk_insert flag to executemany() in both sync and async cursors - Concatenate multiple INSERT queries with semicolons when bulk_insert=True - Send with merge_prepared_statement_batches=true query parameter - Support both QMARK and FB_NUMERIC parameter styles - Add validation to ensure only INSERT statements use bulk_insert - Add comprehensive unit tests for both sync and async cursors - Add integration tests for V2 (sync and async) - Add documentation with examples for both parameter styles Co-Authored-By: petro.tiurin@firebolt.io --- docsrc/Connecting_and_queries.rst | 53 +++++++ src/firebolt/async_db/cursor.py | 127 +++++++++++++++++ src/firebolt/db/cursor.py | 125 +++++++++++++++++ .../dbapi/async/V2/test_queries_async.py | 52 +++++++ .../integration/dbapi/sync/V2/test_queries.py | 52 +++++++ tests/unit/async_db/test_cursor.py | 131 ++++++++++++++++++ tests/unit/db/test_cursor.py | 130 +++++++++++++++++ 7 files changed, 670 insertions(+) diff --git a/docsrc/Connecting_and_queries.rst b/docsrc/Connecting_and_queries.rst index 446f43b31a..5d1624da4e 100644 --- a/docsrc/Connecting_and_queries.rst +++ b/docsrc/Connecting_and_queries.rst @@ -437,6 +437,59 @@ as values in the second argument. cursor.close() +Bulk insert for improved performance +-------------------------------------- + +For inserting large amounts of data more efficiently, you can use the ``bulk_insert`` parameter +with ``executemany()``. This concatenates multiple INSERT statements into a single batch request, +which can significantly improve performance when inserting many rows. + +**Note:** The ``bulk_insert`` parameter only works with INSERT statements. Using it with other +statement types (SELECT, UPDATE, DELETE, etc.) will raise an error. + +Example with QMARK parameter style:: + + import firebolt.db + # Explicitly set paramstyle to "qmark" for QMARK style + firebolt.db.paramstyle = "qmark" + + cursor.executemany( + "INSERT INTO test_table VALUES (?, ?, ?)", + ( + (1, "apple", "2019-01-01"), + (2, "banana", "2020-01-01"), + (3, "carrot", "2021-01-01"), + (4, "donut", "2022-01-01"), + (5, "eggplant", "2023-01-01") + ), + bulk_insert=True # Enable bulk insert for better performance (important-comment) + ) + +Example with FB_NUMERIC parameter style:: + + import firebolt.db + # Set paramstyle to "fb_numeric" for server-side parameter substitution + firebolt.db.paramstyle = "fb_numeric" + + cursor.executemany( + "INSERT INTO test_table VALUES ($1, $2, $3)", + ( + (1, "apple", "2019-01-01"), + (2, "banana", "2020-01-01"), + (3, "carrot", "2021-01-01"), + (4, "donut", "2022-01-01"), + (5, "eggplant", "2023-01-01") + ), + bulk_insert=True # Enable bulk insert for better performance (important-comment) + ) + + cursor.close() + +When ``bulk_insert=True``, the SDK concatenates all INSERT statements into a single batch +and sends them to the server with the ``merge_prepared_statement_batches=true`` parameter, +allowing for optimized batch processing. + + Setting session parameters -------------------------------------- diff --git a/src/firebolt/async_db/cursor.py b/src/firebolt/async_db/cursor.py index a03b1eecca..991ac4bdfb 100644 --- a/src/firebolt/async_db/cursor.py +++ b/src/firebolt/async_db/cursor.py @@ -1,5 +1,6 @@ from __future__ import annotations +import json import logging import time import warnings @@ -16,6 +17,8 @@ TimeoutException, codes, ) +from sqlparse import parse as parse_sql # type: ignore +from sqlparse.tokens import Token as TokenType # type: ignore from firebolt.client.client import AsyncClient, AsyncClientV1, AsyncClientV2 from firebolt.common._types import ColType, ParameterType, SetParameter @@ -385,6 +388,7 @@ async def executemany( query: str, parameters_seq: Sequence[Sequence[ParameterType]], timeout_seconds: Optional[float] = None, + bulk_insert: bool = False, ) -> Union[int, str]: """Prepare and execute a database query. @@ -402,6 +406,10 @@ async def executemany( `SET param=value` statement before it. All parameters are stored in cursor object until it's closed. They can also be removed with `flush_parameters` method call. + Bulk insert: When bulk_insert=True, multiple INSERT queries are + concatenated and sent as a single batch with + merge_prepared_statement_batches=true for improved performance. + Only supported for INSERT statements. Args: query (str): SQL query to execute. @@ -410,13 +418,132 @@ async def executemany( query with actual values from each set in a sequence. Resulting queries for each subset are executed sequentially. timeout_seconds (Optional[float]): Query execution timeout in seconds. + bulk_insert (bool): When True, concatenates multiple INSERT queries + into a single batch request. Only supported for INSERT statements. Returns: int: Query row count. """ + if bulk_insert: + return await self._executemany_bulk_insert( + query, parameters_seq, timeout_seconds + ) await self._do_execute(query, parameters_seq, timeout=timeout_seconds) return self.rowcount + def _validate_bulk_insert_query(self, query: str) -> None: + """Validate that query is an INSERT statement for bulk_insert.""" + statements = parse_sql(query) + if not statements: + raise ProgrammingError("bulk_insert requires a valid INSERT statement") + + if len(statements) > 1: + raise ProgrammingError( + "bulk_insert does not support multi-statement queries" + ) + + first_token = None + for token in statements[0].tokens: + if not token.is_whitespace: + first_token = token + break + + if ( + not first_token + or first_token.ttype != TokenType.Keyword + or first_token.value.upper() != "INSERT" + ): + token_val = first_token.value if first_token else "unknown" + raise ProgrammingError( + f"bulk_insert is only supported for INSERT statements. " + f"Got query starting with: {token_val}" + ) + + async def _executemany_bulk_insert( + self, + query: str, + parameters_seq: Sequence[Sequence[ParameterType]], + timeout_seconds: Optional[float], + ) -> int: + """Execute multiple INSERT queries as a single batch.""" + self._validate_bulk_insert_query(query) + + if not parameters_seq: + raise ProgrammingError("bulk_insert requires at least one parameter set") + + from firebolt.async_db import paramstyle + + try: + parameter_style = ParameterStyle(paramstyle) + except ValueError: + raise ProgrammingError(f"Unsupported paramstyle: {paramstyle}") + + await self._close_rowset_and_reset() + self._row_set = InMemoryAsyncRowSet() + + try: + if parameter_style == ParameterStyle.FB_NUMERIC: + concatenated_query = "; ".join([query] * len(parameters_seq)) + + flattened_params: List[ParameterType] = [] + for param_set in parameters_seq: + flattened_params.extend(param_set) + + query_parameters = [ + { + "name": f"${i+1}", + "value": self._formatter.convert_parameter_for_serialization( + value + ), + } + for i, value in enumerate(flattened_params) + ] + + query_params: Dict[str, Any] = { + "output_format": self._get_output_format(False), + "merge_prepared_statement_batches": "true", + } + if query_parameters: + query_params["query_parameters"] = json.dumps(query_parameters) + + Cursor._log_query(concatenated_query) + resp = await self._api_request( + concatenated_query, query_params, timeout=timeout_seconds + ) + await self._raise_if_error(resp) + await self._parse_response_headers(resp.headers) + await self._append_row_set_from_response(resp) + else: + formatted_queries = [] + statements = parse_sql(query) + for param_set in parameters_seq: + formatted_query = self._formatter.format_statement( + statements[0], param_set + ) + formatted_queries.append(formatted_query) + + concatenated_query = "; ".join(formatted_queries) + + query_params = { + "output_format": self._get_output_format(False), + "merge_prepared_statement_batches": "true", + } + + Cursor._log_query(concatenated_query) + resp = await self._api_request( + concatenated_query, query_params, timeout=timeout_seconds + ) + await self._raise_if_error(resp) + await self._parse_response_headers(resp.headers) + await self._append_row_set_from_response(resp) + + self._state = CursorState.DONE + except Exception: + self._state = CursorState.ERROR + raise + + return self.rowcount + @check_not_closed async def execute_stream( self, diff --git a/src/firebolt/db/cursor.py b/src/firebolt/db/cursor.py index 0d6deab171..65625482e8 100644 --- a/src/firebolt/db/cursor.py +++ b/src/firebolt/db/cursor.py @@ -1,5 +1,6 @@ from __future__ import annotations +import json import logging import time from abc import ABCMeta, abstractmethod @@ -24,6 +25,8 @@ TimeoutException, codes, ) +from sqlparse import parse as parse_sql # type: ignore +from sqlparse.tokens import Token as TokenType # type: ignore from firebolt.client import Client, ClientV1, ClientV2 from firebolt.common._types import ColType, ParameterType, SetParameter @@ -387,6 +390,7 @@ def executemany( query: str, parameters_seq: Sequence[Sequence[ParameterType]], timeout_seconds: Optional[float] = None, + bulk_insert: bool = False, ) -> Union[int, str]: """Prepare and execute a database query. @@ -404,6 +408,10 @@ def executemany( `SET param=value` statement before it. All parameters are stored in cursor object until it's closed. They can also be removed with `flush_parameters` method call. + Bulk insert: When bulk_insert=True, multiple INSERT queries are + concatenated and sent as a single batch with + merge_prepared_statement_batches=true for improved performance. + Only supported for INSERT statements. Args: query (str): SQL query to execute. @@ -412,13 +420,130 @@ def executemany( query with actual values from each set in a sequence. Resulting queries for each subset are executed sequentially. timeout_seconds (Optional[float]): Query execution timeout in seconds. + bulk_insert (bool): When True, concatenates multiple INSERT queries + into a single batch request. Only supported for INSERT statements. Returns: int: Query row count. """ + if bulk_insert: + return self._executemany_bulk_insert(query, parameters_seq, timeout_seconds) self._do_execute(query, parameters_seq, timeout=timeout_seconds) return self.rowcount + def _validate_bulk_insert_query(self, query: str) -> None: + """Validate that query is an INSERT statement for bulk_insert.""" + statements = parse_sql(query) + if not statements: + raise ProgrammingError("bulk_insert requires a valid INSERT statement") + + if len(statements) > 1: + raise ProgrammingError( + "bulk_insert does not support multi-statement queries" + ) + + first_token = None + for token in statements[0].tokens: + if not token.is_whitespace: + first_token = token + break + + if ( + not first_token + or first_token.ttype != TokenType.Keyword + or first_token.value.upper() != "INSERT" + ): + token_val = first_token.value if first_token else "unknown" + raise ProgrammingError( + f"bulk_insert is only supported for INSERT statements. " + f"Got query starting with: {token_val}" + ) + + def _executemany_bulk_insert( + self, + query: str, + parameters_seq: Sequence[Sequence[ParameterType]], + timeout_seconds: Optional[float], + ) -> int: + """Execute multiple INSERT queries as a single batch.""" + self._validate_bulk_insert_query(query) + + if not parameters_seq: + raise ProgrammingError("bulk_insert requires at least one parameter set") + + from firebolt.db import paramstyle + + try: + parameter_style = ParameterStyle(paramstyle) + except ValueError: + raise ProgrammingError(f"Unsupported paramstyle: {paramstyle}") + + self._close_rowset_and_reset() + self._row_set = InMemoryRowSet() + + try: + if parameter_style == ParameterStyle.FB_NUMERIC: + concatenated_query = "; ".join([query] * len(parameters_seq)) + + flattened_params: List[ParameterType] = [] + for param_set in parameters_seq: + flattened_params.extend(param_set) + + query_parameters = [ + { + "name": f"${i+1}", + "value": self._formatter.convert_parameter_for_serialization( + value + ), + } + for i, value in enumerate(flattened_params) + ] + + query_params: Dict[str, Any] = { + "output_format": self._get_output_format(False), + "merge_prepared_statement_batches": "true", + } + if query_parameters: + query_params["query_parameters"] = json.dumps(query_parameters) + + Cursor._log_query(concatenated_query) + resp = self._api_request( + concatenated_query, query_params, timeout=timeout_seconds + ) + self._raise_if_error(resp) + self._parse_response_headers(resp.headers) + self._append_row_set_from_response(resp) + else: + formatted_queries = [] + statements = parse_sql(query) + for param_set in parameters_seq: + formatted_query = self._formatter.format_statement( + statements[0], param_set + ) + formatted_queries.append(formatted_query) + + concatenated_query = "; ".join(formatted_queries) + + query_params = { + "output_format": self._get_output_format(False), + "merge_prepared_statement_batches": "true", + } + + Cursor._log_query(concatenated_query) + resp = self._api_request( + concatenated_query, query_params, timeout=timeout_seconds + ) + self._raise_if_error(resp) + self._parse_response_headers(resp.headers) + self._append_row_set_from_response(resp) + + self._state = CursorState.DONE + except Exception: + self._state = CursorState.ERROR + raise + + return self.rowcount + @check_not_closed def execute_stream( self, diff --git a/tests/integration/dbapi/async/V2/test_queries_async.py b/tests/integration/dbapi/async/V2/test_queries_async.py index e2042e4183..17332d1dd9 100644 --- a/tests/integration/dbapi/async/V2/test_queries_async.py +++ b/tests/integration/dbapi/async/V2/test_queries_async.py @@ -282,6 +282,58 @@ async def test_parameterized_query_with_special_chars(connection: Connection) -> ], "Invalid data in table after parameterized insert" +async def test_executemany_bulk_insert(connection: Connection) -> None: + """executemany with bulk_insert=True inserts data correctly.""" + async with connection.cursor() as c: + await c.execute('DROP TABLE IF EXISTS "test_bulk_insert_async"') + await c.execute( + 'CREATE FACT TABLE "test_bulk_insert_async"(id int, name string) primary index id' + ) + + import firebolt.async_db as db_module + + original_paramstyle = db_module.paramstyle + db_module.paramstyle = "qmark" + + try: + await c.executemany( + 'INSERT INTO "test_bulk_insert_async" VALUES (?, ?)', + [(1, "alice"), (2, "bob"), (3, "charlie")], + bulk_insert=True, + ) + + await c.execute('SELECT * FROM "test_bulk_insert_async" ORDER BY id') + data = await c.fetchall() + assert len(data) == 3 + assert data[0] == [1, "alice"] + assert data[1] == [2, "bob"] + assert data[2] == [3, "charlie"] + finally: + db_module.paramstyle = original_paramstyle + + await c.execute('DELETE FROM "test_bulk_insert_async"') + + db_module.paramstyle = "fb_numeric" + + try: + await c.executemany( + 'INSERT INTO "test_bulk_insert_async" VALUES ($1, $2)', + [(4, "david"), (5, "eve"), (6, "frank")], + bulk_insert=True, + ) + + await c.execute('SELECT * FROM "test_bulk_insert_async" ORDER BY id') + data = await c.fetchall() + assert len(data) == 3 + assert data[0] == [4, "david"] + assert data[1] == [5, "eve"] + assert data[2] == [6, "frank"] + finally: + db_module.paramstyle = original_paramstyle + + await c.execute('DROP TABLE "test_bulk_insert_async"') + + async def test_multi_statement_query(connection: Connection) -> None: """Query parameters are handled properly""" diff --git a/tests/integration/dbapi/sync/V2/test_queries.py b/tests/integration/dbapi/sync/V2/test_queries.py index 9b415cef88..3e10c37516 100644 --- a/tests/integration/dbapi/sync/V2/test_queries.py +++ b/tests/integration/dbapi/sync/V2/test_queries.py @@ -283,6 +283,58 @@ def test_empty_query(c: Cursor, query: str, params: tuple) -> None: ) +def test_executemany_bulk_insert(connection: Connection) -> None: + """executemany with bulk_insert=True inserts data correctly.""" + with connection.cursor() as c: + c.execute('DROP TABLE IF EXISTS "test_bulk_insert"') + c.execute( + 'CREATE FACT TABLE "test_bulk_insert"(id int, name string) primary index id' + ) + + import firebolt.db as db_module + + original_paramstyle = db_module.paramstyle + db_module.paramstyle = "qmark" + + try: + c.executemany( + 'INSERT INTO "test_bulk_insert" VALUES (?, ?)', + [(1, "alice"), (2, "bob"), (3, "charlie")], + bulk_insert=True, + ) + + c.execute('SELECT * FROM "test_bulk_insert" ORDER BY id') + data = c.fetchall() + assert len(data) == 3 + assert data[0] == [1, "alice"] + assert data[1] == [2, "bob"] + assert data[2] == [3, "charlie"] + finally: + db_module.paramstyle = original_paramstyle + + c.execute('DELETE FROM "test_bulk_insert"') + + db_module.paramstyle = "fb_numeric" + + try: + c.executemany( + 'INSERT INTO "test_bulk_insert" VALUES ($1, $2)', + [(4, "david"), (5, "eve"), (6, "frank")], + bulk_insert=True, + ) + + c.execute('SELECT * FROM "test_bulk_insert" ORDER BY id') + data = c.fetchall() + assert len(data) == 3 + assert data[0] == [4, "david"] + assert data[1] == [5, "eve"] + assert data[2] == [6, "frank"] + finally: + db_module.paramstyle = original_paramstyle + + c.execute('DROP TABLE "test_bulk_insert"') + + def test_multi_statement_query(connection: Connection) -> None: """Query parameters are handled properly""" diff --git a/tests/unit/async_db/test_cursor.py b/tests/unit/async_db/test_cursor.py index d26345d163..122a5055e6 100644 --- a/tests/unit/async_db/test_cursor.py +++ b/tests/unit/async_db/test_cursor.py @@ -1,3 +1,4 @@ +import json import re import time from datetime import date, datetime @@ -1505,3 +1506,133 @@ async def test_unsupported_paramstyle_raises(cursor: Cursor) -> None: await cursor.execute("SELECT 1") finally: db.paramstyle = original_paramstyle + + +async def test_executemany_bulk_insert_qmark( + httpx_mock: HTTPXMock, + cursor: Cursor, + query_url: str, + python_query_data: List[List[ColType]], +): + """executemany with bulk_insert=True concatenates INSERT queries with QMARK style.""" + + def bulk_insert_callback(request): + assert request.url.params.get("merge_prepared_statement_batches") == "true" + + query = request.content.decode() + assert query.count("INSERT INTO") == 3 + assert "; " in query + + return Response( + status_code=200, + content=json.dumps( + { + "data": [], + "rows": 0, + "statistics": {}, + } + ), + headers={}, + ) + + httpx_mock.add_callback(bulk_insert_callback, url=query_url) + + result = await cursor.executemany( + "INSERT INTO test_table VALUES (?, ?)", + [(1, "a"), (2, "b"), (3, "c")], + bulk_insert=True, + ) + assert result == 0 + + +async def test_executemany_bulk_insert_fb_numeric( + httpx_mock: HTTPXMock, + cursor: Cursor, + query_url: str, +): + """executemany with bulk_insert=True and FB_NUMERIC style.""" + import firebolt.async_db as db_module + + original_paramstyle = db_module.paramstyle + + try: + db_module.paramstyle = "fb_numeric" + + def bulk_insert_callback(request): + assert request.url.params.get("merge_prepared_statement_batches") == "true" + + query = request.content.decode() + assert query.count("INSERT INTO") == 3 + assert "; " in query + + query_params = json.loads(request.url.params.get("query_parameters", "[]")) + assert len(query_params) == 6 + assert query_params[0]["name"] == "$1" + assert query_params[5]["name"] == "$6" + + return Response( + status_code=200, + content=json.dumps( + { + "data": [], + "rows": 0, + "statistics": {}, + } + ), + headers={}, + ) + + httpx_mock.add_callback(bulk_insert_callback, url=query_url) + + result = await cursor.executemany( + "INSERT INTO test_table VALUES ($1, $2)", + [(1, "a"), (2, "b"), (3, "c")], + bulk_insert=True, + ) + assert result == 0 + finally: + db_module.paramstyle = original_paramstyle + + +async def test_executemany_bulk_insert_non_insert_fails(cursor: Cursor): + """executemany with bulk_insert=True fails for non-INSERT queries.""" + with raises(ProgrammingError, match="bulk_insert is only supported for INSERT"): + await cursor.executemany( + "SELECT * FROM test_table", + [()], + bulk_insert=True, + ) + + with raises(ProgrammingError, match="bulk_insert is only supported for INSERT"): + await cursor.executemany( + "UPDATE test_table SET col = ?", + [(1,)], + bulk_insert=True, + ) + + with raises(ProgrammingError, match="bulk_insert is only supported for INSERT"): + await cursor.executemany( + "DELETE FROM test_table WHERE id = ?", + [(1,)], + bulk_insert=True, + ) + + +async def test_executemany_bulk_insert_multi_statement_fails(cursor: Cursor): + """executemany with bulk_insert=True fails for multi-statement queries.""" + with raises(ProgrammingError, match="does not support multi-statement"): + await cursor.executemany( + "INSERT INTO t1 VALUES (?); INSERT INTO t2 VALUES (?)", + [(1,), (2,)], + bulk_insert=True, + ) + + +async def test_executemany_bulk_insert_empty_params_fails(cursor: Cursor): + """executemany with bulk_insert=True fails with empty parameters.""" + with raises(ProgrammingError, match="requires at least one parameter set"): + await cursor.executemany( + "INSERT INTO test_table VALUES (?)", + [], + bulk_insert=True, + ) diff --git a/tests/unit/db/test_cursor.py b/tests/unit/db/test_cursor.py index e6325b6a5e..bdf7c2bf2e 100644 --- a/tests/unit/db/test_cursor.py +++ b/tests/unit/db/test_cursor.py @@ -1391,3 +1391,133 @@ def test_unsupported_paramstyle_raises(cursor): cursor.execute("SELECT 1") finally: db.paramstyle = original_paramstyle + + +def test_executemany_bulk_insert_qmark( + httpx_mock: HTTPXMock, + cursor: Cursor, + query_url: str, + python_query_data: List[List[ColType]], +): + """executemany with bulk_insert=True concatenates INSERT queries with QMARK style.""" + + def bulk_insert_callback(request): + assert request.url.params.get("merge_prepared_statement_batches") == "true" + + query = request.content.decode() + assert query.count("INSERT INTO") == 3 + assert "; " in query + + return Response( + status_code=200, + content=json.dumps( + { + "data": [], + "rows": 0, + "statistics": {}, + } + ), + headers={}, + ) + + httpx_mock.add_callback(bulk_insert_callback, url=query_url) + + result = cursor.executemany( + "INSERT INTO test_table VALUES (?, ?)", + [(1, "a"), (2, "b"), (3, "c")], + bulk_insert=True, + ) + assert result == 0 + + +def test_executemany_bulk_insert_fb_numeric( + httpx_mock: HTTPXMock, + cursor: Cursor, + query_url: str, +): + """executemany with bulk_insert=True and FB_NUMERIC style.""" + import firebolt.db as db_module + + original_paramstyle = db_module.paramstyle + + try: + db_module.paramstyle = "fb_numeric" + + def bulk_insert_callback(request): + assert request.url.params.get("merge_prepared_statement_batches") == "true" + + query = request.content.decode() + assert query.count("INSERT INTO") == 3 + assert "; " in query + + query_params = json.loads(request.url.params.get("query_parameters", "[]")) + assert len(query_params) == 6 + assert query_params[0]["name"] == "$1" + assert query_params[5]["name"] == "$6" + + return Response( + status_code=200, + content=json.dumps( + { + "data": [], + "rows": 0, + "statistics": {}, + } + ), + headers={}, + ) + + httpx_mock.add_callback(bulk_insert_callback, url=query_url) + + result = cursor.executemany( + "INSERT INTO test_table VALUES ($1, $2)", + [(1, "a"), (2, "b"), (3, "c")], + bulk_insert=True, + ) + assert result == 0 + finally: + db_module.paramstyle = original_paramstyle + + +def test_executemany_bulk_insert_non_insert_fails(cursor: Cursor): + """executemany with bulk_insert=True fails for non-INSERT queries.""" + with raises(ProgrammingError, match="bulk_insert is only supported for INSERT"): + cursor.executemany( + "SELECT * FROM test_table", + [()], + bulk_insert=True, + ) + + with raises(ProgrammingError, match="bulk_insert is only supported for INSERT"): + cursor.executemany( + "UPDATE test_table SET col = ?", + [(1,)], + bulk_insert=True, + ) + + with raises(ProgrammingError, match="bulk_insert is only supported for INSERT"): + cursor.executemany( + "DELETE FROM test_table WHERE id = ?", + [(1,)], + bulk_insert=True, + ) + + +def test_executemany_bulk_insert_multi_statement_fails(cursor: Cursor): + """executemany with bulk_insert=True fails for multi-statement queries.""" + with raises(ProgrammingError, match="does not support multi-statement"): + cursor.executemany( + "INSERT INTO t1 VALUES (?); INSERT INTO t2 VALUES (?)", + [(1,), (2,)], + bulk_insert=True, + ) + + +def test_executemany_bulk_insert_empty_params_fails(cursor: Cursor): + """executemany with bulk_insert=True fails with empty parameters.""" + with raises(ProgrammingError, match="requires at least one parameter set"): + cursor.executemany( + "INSERT INTO test_table VALUES (?)", + [], + bulk_insert=True, + ) From 1a098d0fbaaf4a0733b63fd1624aed4e40284c56 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Thu, 9 Oct 2025 10:03:53 +0000 Subject: [PATCH 07/21] fix: use sqlparse get_type() for reliable INSERT statement validation Co-Authored-By: petro.tiurin@firebolt.io --- src/firebolt/async_db/cursor.py | 17 +++-------------- src/firebolt/db/cursor.py | 17 +++-------------- 2 files changed, 6 insertions(+), 28 deletions(-) diff --git a/src/firebolt/async_db/cursor.py b/src/firebolt/async_db/cursor.py index 991ac4bdfb..2f2d5ffaa9 100644 --- a/src/firebolt/async_db/cursor.py +++ b/src/firebolt/async_db/cursor.py @@ -18,7 +18,6 @@ codes, ) from sqlparse import parse as parse_sql # type: ignore -from sqlparse.tokens import Token as TokenType # type: ignore from firebolt.client.client import AsyncClient, AsyncClientV1, AsyncClientV2 from firebolt.common._types import ColType, ParameterType, SetParameter @@ -442,21 +441,11 @@ def _validate_bulk_insert_query(self, query: str) -> None: "bulk_insert does not support multi-statement queries" ) - first_token = None - for token in statements[0].tokens: - if not token.is_whitespace: - first_token = token - break - - if ( - not first_token - or first_token.ttype != TokenType.Keyword - or first_token.value.upper() != "INSERT" - ): - token_val = first_token.value if first_token else "unknown" + statement_type = statements[0].get_type() + if statement_type != "INSERT": raise ProgrammingError( f"bulk_insert is only supported for INSERT statements. " - f"Got query starting with: {token_val}" + f"Got {statement_type} statement" ) async def _executemany_bulk_insert( diff --git a/src/firebolt/db/cursor.py b/src/firebolt/db/cursor.py index 65625482e8..fb02743818 100644 --- a/src/firebolt/db/cursor.py +++ b/src/firebolt/db/cursor.py @@ -26,7 +26,6 @@ codes, ) from sqlparse import parse as parse_sql # type: ignore -from sqlparse.tokens import Token as TokenType # type: ignore from firebolt.client import Client, ClientV1, ClientV2 from firebolt.common._types import ColType, ParameterType, SetParameter @@ -442,21 +441,11 @@ def _validate_bulk_insert_query(self, query: str) -> None: "bulk_insert does not support multi-statement queries" ) - first_token = None - for token in statements[0].tokens: - if not token.is_whitespace: - first_token = token - break - - if ( - not first_token - or first_token.ttype != TokenType.Keyword - or first_token.value.upper() != "INSERT" - ): - token_val = first_token.value if first_token else "unknown" + statement_type = statements[0].get_type() + if statement_type != "INSERT": raise ProgrammingError( f"bulk_insert is only supported for INSERT statements. " - f"Got query starting with: {token_val}" + f"Got {statement_type} statement" ) def _executemany_bulk_insert( From 2b51b14fe8d30606fc3e6efbaef8808f77d96fad Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Thu, 9 Oct 2025 10:13:25 +0000 Subject: [PATCH 08/21] fix: use regex URL matching in bulk_insert tests to handle query parameters Co-Authored-By: petro.tiurin@firebolt.io --- tests/unit/async_db/test_cursor.py | 8 ++++++-- tests/unit/db/test_cursor.py | 8 ++++++-- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/tests/unit/async_db/test_cursor.py b/tests/unit/async_db/test_cursor.py index 122a5055e6..d639bbabc6 100644 --- a/tests/unit/async_db/test_cursor.py +++ b/tests/unit/async_db/test_cursor.py @@ -1535,7 +1535,9 @@ def bulk_insert_callback(request): headers={}, ) - httpx_mock.add_callback(bulk_insert_callback, url=query_url) + base_url = query_url.split("?")[0] + url_pattern = re.compile(re.escape(base_url)) + httpx_mock.add_callback(bulk_insert_callback, url=url_pattern) result = await cursor.executemany( "INSERT INTO test_table VALUES (?, ?)", @@ -1582,7 +1584,9 @@ def bulk_insert_callback(request): headers={}, ) - httpx_mock.add_callback(bulk_insert_callback, url=query_url) + base_url = query_url.split("?")[0] + url_pattern = re.compile(re.escape(base_url)) + httpx_mock.add_callback(bulk_insert_callback, url=url_pattern) result = await cursor.executemany( "INSERT INTO test_table VALUES ($1, $2)", diff --git a/tests/unit/db/test_cursor.py b/tests/unit/db/test_cursor.py index bdf7c2bf2e..3c025e9d37 100644 --- a/tests/unit/db/test_cursor.py +++ b/tests/unit/db/test_cursor.py @@ -1420,7 +1420,9 @@ def bulk_insert_callback(request): headers={}, ) - httpx_mock.add_callback(bulk_insert_callback, url=query_url) + base_url = query_url.split("?")[0] + url_pattern = re.compile(re.escape(base_url)) + httpx_mock.add_callback(bulk_insert_callback, url=url_pattern) result = cursor.executemany( "INSERT INTO test_table VALUES (?, ?)", @@ -1467,7 +1469,9 @@ def bulk_insert_callback(request): headers={}, ) - httpx_mock.add_callback(bulk_insert_callback, url=query_url) + base_url = query_url.split("?")[0] + url_pattern = re.compile(re.escape(base_url)) + httpx_mock.add_callback(bulk_insert_callback, url=url_pattern) result = cursor.executemany( "INSERT INTO test_table VALUES ($1, $2)", From b00d87a495ff5ca1d57f0c02aa1d4b9da6b6d6ec Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Thu, 9 Oct 2025 10:23:39 +0000 Subject: [PATCH 09/21] fix: convert URL objects to strings before split in bulk_insert tests Co-Authored-By: petro.tiurin@firebolt.io --- tests/unit/async_db/test_cursor.py | 4 ++-- tests/unit/db/test_cursor.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/unit/async_db/test_cursor.py b/tests/unit/async_db/test_cursor.py index d639bbabc6..9cfa01a7c7 100644 --- a/tests/unit/async_db/test_cursor.py +++ b/tests/unit/async_db/test_cursor.py @@ -1535,7 +1535,7 @@ def bulk_insert_callback(request): headers={}, ) - base_url = query_url.split("?")[0] + base_url = str(query_url).split("?")[0] url_pattern = re.compile(re.escape(base_url)) httpx_mock.add_callback(bulk_insert_callback, url=url_pattern) @@ -1584,7 +1584,7 @@ def bulk_insert_callback(request): headers={}, ) - base_url = query_url.split("?")[0] + base_url = str(query_url).split("?")[0] url_pattern = re.compile(re.escape(base_url)) httpx_mock.add_callback(bulk_insert_callback, url=url_pattern) diff --git a/tests/unit/db/test_cursor.py b/tests/unit/db/test_cursor.py index 3c025e9d37..9580992047 100644 --- a/tests/unit/db/test_cursor.py +++ b/tests/unit/db/test_cursor.py @@ -1420,7 +1420,7 @@ def bulk_insert_callback(request): headers={}, ) - base_url = query_url.split("?")[0] + base_url = str(query_url).split("?")[0] url_pattern = re.compile(re.escape(base_url)) httpx_mock.add_callback(bulk_insert_callback, url=url_pattern) @@ -1469,7 +1469,7 @@ def bulk_insert_callback(request): headers={}, ) - base_url = query_url.split("?")[0] + base_url = str(query_url).split("?")[0] url_pattern = re.compile(re.escape(base_url)) httpx_mock.add_callback(bulk_insert_callback, url=url_pattern) From 9bbb19c02ad33d43bebed7a7bc71bad70c40f7a0 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Thu, 9 Oct 2025 10:55:12 +0000 Subject: [PATCH 10/21] fix: add meta field to bulk_insert test mock responses Co-Authored-By: petro.tiurin@firebolt.io --- tests/unit/async_db/test_cursor.py | 2 ++ tests/unit/db/test_cursor.py | 2 ++ 2 files changed, 4 insertions(+) diff --git a/tests/unit/async_db/test_cursor.py b/tests/unit/async_db/test_cursor.py index 9cfa01a7c7..ccebb52c7f 100644 --- a/tests/unit/async_db/test_cursor.py +++ b/tests/unit/async_db/test_cursor.py @@ -1527,6 +1527,7 @@ def bulk_insert_callback(request): status_code=200, content=json.dumps( { + "meta": [], "data": [], "rows": 0, "statistics": {}, @@ -1576,6 +1577,7 @@ def bulk_insert_callback(request): status_code=200, content=json.dumps( { + "meta": [], "data": [], "rows": 0, "statistics": {}, diff --git a/tests/unit/db/test_cursor.py b/tests/unit/db/test_cursor.py index 9580992047..6f485d45c3 100644 --- a/tests/unit/db/test_cursor.py +++ b/tests/unit/db/test_cursor.py @@ -1412,6 +1412,7 @@ def bulk_insert_callback(request): status_code=200, content=json.dumps( { + "meta": [], "data": [], "rows": 0, "statistics": {}, @@ -1461,6 +1462,7 @@ def bulk_insert_callback(request): status_code=200, content=json.dumps( { + "meta": [], "data": [], "rows": 0, "statistics": {}, From d5e4257e8582615c64f8b20ce023ec733d66f631 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Thu, 9 Oct 2025 11:02:39 +0000 Subject: [PATCH 11/21] fix: add proper statistics structure to bulk_insert test mock responses Co-Authored-By: petro.tiurin@firebolt.io --- tests/unit/async_db/test_cursor.py | 12 ++++++++++-- tests/unit/db/test_cursor.py | 12 ++++++++++-- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/tests/unit/async_db/test_cursor.py b/tests/unit/async_db/test_cursor.py index ccebb52c7f..dfc3b7918f 100644 --- a/tests/unit/async_db/test_cursor.py +++ b/tests/unit/async_db/test_cursor.py @@ -1530,7 +1530,11 @@ def bulk_insert_callback(request): "meta": [], "data": [], "rows": 0, - "statistics": {}, + "statistics": { + "elapsed": 0.0, + "rows_read": 0, + "bytes_read": 0, + }, } ), headers={}, @@ -1580,7 +1584,11 @@ def bulk_insert_callback(request): "meta": [], "data": [], "rows": 0, - "statistics": {}, + "statistics": { + "elapsed": 0.0, + "rows_read": 0, + "bytes_read": 0, + }, } ), headers={}, diff --git a/tests/unit/db/test_cursor.py b/tests/unit/db/test_cursor.py index 6f485d45c3..079f3ae84a 100644 --- a/tests/unit/db/test_cursor.py +++ b/tests/unit/db/test_cursor.py @@ -1415,7 +1415,11 @@ def bulk_insert_callback(request): "meta": [], "data": [], "rows": 0, - "statistics": {}, + "statistics": { + "elapsed": 0.0, + "rows_read": 0, + "bytes_read": 0, + }, } ), headers={}, @@ -1465,7 +1469,11 @@ def bulk_insert_callback(request): "meta": [], "data": [], "rows": 0, - "statistics": {}, + "statistics": { + "elapsed": 0.0, + "rows_read": 0, + "bytes_read": 0, + }, } ), headers={}, From a3c981bfc51a31a24710f5aaf2c39f8894360cfb Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Fri, 10 Oct 2025 08:55:40 +0000 Subject: [PATCH 12/21] refactor: simplify bulk_insert validation and reuse existing execution logic - Replace sqlparse with simple string checks in _validate_bulk_insert_query - Add extra_params parameter to _build_fb_numeric_query_params for extensibility - Refactor _executemany_bulk_insert to preprocess and delegate to existing methods - Use TimeoutController for consistent timeout handling - Parametrize integration tests to avoid duplication Addresses PR feedback from ptiurin on #463 Co-Authored-By: petro.tiurin@firebolt.io --- src/firebolt/async_db/cursor.py | 50 ++++++--------- src/firebolt/common/cursor/base_cursor.py | 52 +++++++++++++-- src/firebolt/db/cursor.py | 50 ++++++--------- .../dbapi/async/V2/test_queries_async.py | 63 ++++++++----------- .../integration/dbapi/sync/V2/test_queries.py | 63 ++++++++----------- 5 files changed, 140 insertions(+), 138 deletions(-) diff --git a/src/firebolt/async_db/cursor.py b/src/firebolt/async_db/cursor.py index 2f2d5ffaa9..b19514d070 100644 --- a/src/firebolt/async_db/cursor.py +++ b/src/firebolt/async_db/cursor.py @@ -1,6 +1,5 @@ from __future__ import annotations -import json import logging import time import warnings @@ -432,20 +431,16 @@ async def executemany( def _validate_bulk_insert_query(self, query: str) -> None: """Validate that query is an INSERT statement for bulk_insert.""" - statements = parse_sql(query) - if not statements: - raise ProgrammingError("bulk_insert requires a valid INSERT statement") + query_normalized = query.lstrip().lower() - if len(statements) > 1: + if not query_normalized.startswith("insert"): raise ProgrammingError( - "bulk_insert does not support multi-statement queries" + "bulk_insert is only supported for INSERT statements" ) - statement_type = statements[0].get_type() - if statement_type != "INSERT": + if ";" in query.strip().rstrip(";"): raise ProgrammingError( - f"bulk_insert is only supported for INSERT statements. " - f"Got {statement_type} statement" + "bulk_insert does not support multi-statement queries" ) async def _executemany_bulk_insert( @@ -467,37 +462,32 @@ async def _executemany_bulk_insert( except ValueError: raise ProgrammingError(f"Unsupported paramstyle: {paramstyle}") + concatenated_query = "; ".join([query] * len(parameters_seq)) + await self._close_rowset_and_reset() self._row_set = InMemoryAsyncRowSet() try: if parameter_style == ParameterStyle.FB_NUMERIC: - concatenated_query = "; ".join([query] * len(parameters_seq)) - flattened_params: List[ParameterType] = [] for param_set in parameters_seq: flattened_params.extend(param_set) - query_parameters = [ - { - "name": f"${i+1}", - "value": self._formatter.convert_parameter_for_serialization( - value - ), - } - for i, value in enumerate(flattened_params) - ] - - query_params: Dict[str, Any] = { - "output_format": self._get_output_format(False), - "merge_prepared_statement_batches": "true", - } - if query_parameters: - query_params["query_parameters"] = json.dumps(query_parameters) - Cursor._log_query(concatenated_query) + timeout_controller = TimeoutController(timeout_seconds) + timeout_controller.raise_if_timeout() + + query_params = self._build_fb_numeric_query_params( + [flattened_params], + streaming=False, + async_execution=False, + extra_params={"merge_prepared_statement_batches": "true"}, + ) + resp = await self._api_request( - concatenated_query, query_params, timeout=timeout_seconds + concatenated_query, + query_params, + timeout=timeout_controller.remaining(), ) await self._raise_if_error(resp) await self._parse_response_headers(resp.headers) diff --git a/src/firebolt/common/cursor/base_cursor.py b/src/firebolt/common/cursor/base_cursor.py index 8493788642..0b7b1f6fae 100644 --- a/src/firebolt/common/cursor/base_cursor.py +++ b/src/firebolt/common/cursor/base_cursor.py @@ -3,13 +3,13 @@ import logging import re from types import TracebackType -from typing import Any, Dict, List, Optional, Tuple, Type, Union +from typing import Any, Dict, List, Optional, Sequence, Tuple, Type, Union from httpx import URL, Response from firebolt.client.auth.base import Auth from firebolt.client.client import AsyncClient, Client -from firebolt.common._types import RawColType, SetParameter +from firebolt.common._types import ParameterType, RawColType, SetParameter from firebolt.common.constants import ( DISALLOWED_PARAMETER_LIST, IMMUTABLE_PARAMETER_LIST, @@ -27,7 +27,11 @@ SecureCacheKey, _firebolt_cache, ) -from firebolt.utils.exception import ConfigurationError, FireboltError +from firebolt.utils.exception import ( + ConfigurationError, + FireboltError, + ProgrammingError, +) from firebolt.utils.util import fix_url_schema logger = logging.getLogger(__name__) @@ -236,7 +240,6 @@ def _log_query(query: Union[str, SetParameter]) -> None: "aws_key_id|credentials", query, flags=re.IGNORECASE ): logger.debug(f"Running query: {query}") - @property def engine_name(self) -> str: """ @@ -317,3 +320,44 @@ def set_cache_record(self, record: ConnectionInfo) -> None: self._client.auth.secret, ) _firebolt_cache.set(cache_key, record) + + def _validate_bulk_insert_query(self, query: str) -> None: + """Validate that query is an INSERT statement for bulk_insert.""" + query_normalized = query.lstrip().lower() + + if not query_normalized.startswith("insert"): + raise ConfigurationError( + "bulk_insert is only supported for INSERT statements" + ) + + if ";" in query.strip().rstrip(";"): + raise ProgrammingError( + "bulk_insert does not support multi-statement queries" + ) + + def _prepare_bulk_insert( + self, query: str, parameters_seq: Sequence[Sequence[ParameterType]] + ) -> tuple[str, Sequence[Sequence[ParameterType]]]: + """Execute multiple INSERT queries as a single batch.""" + self._validate_bulk_insert_query(query) + + if not parameters_seq: + raise ProgrammingError("bulk_insert requires at least one parameter set") + + # For bulk insert, we need to create unique parameter names for each INSERT + # Example: ($1, $2); ($3, $4); ($5, $6) instead of ($1, $2); ($1, $2); ($1, $2) + queries = [] + param_offset = 0 + for param_set in parameters_seq: + # Replace parameter placeholders with unique numbers + modified_query = query + for i in range(len(param_set)): + old_param = f"${i + 1}" + new_param = f"${param_offset + i + 1}" + modified_query = modified_query.replace(old_param, new_param) + queries.append(modified_query) + param_offset += len(param_set) + + combined_query = "; ".join(queries) + parameters = [param for param_set in parameters_seq for param in param_set] + return combined_query, [parameters] diff --git a/src/firebolt/db/cursor.py b/src/firebolt/db/cursor.py index fb02743818..f4ff48376b 100644 --- a/src/firebolt/db/cursor.py +++ b/src/firebolt/db/cursor.py @@ -1,6 +1,5 @@ from __future__ import annotations -import json import logging import time from abc import ABCMeta, abstractmethod @@ -432,20 +431,16 @@ def executemany( def _validate_bulk_insert_query(self, query: str) -> None: """Validate that query is an INSERT statement for bulk_insert.""" - statements = parse_sql(query) - if not statements: - raise ProgrammingError("bulk_insert requires a valid INSERT statement") + query_normalized = query.lstrip().lower() - if len(statements) > 1: + if not query_normalized.startswith("insert"): raise ProgrammingError( - "bulk_insert does not support multi-statement queries" + "bulk_insert is only supported for INSERT statements" ) - statement_type = statements[0].get_type() - if statement_type != "INSERT": + if ";" in query.strip().rstrip(";"): raise ProgrammingError( - f"bulk_insert is only supported for INSERT statements. " - f"Got {statement_type} statement" + "bulk_insert does not support multi-statement queries" ) def _executemany_bulk_insert( @@ -467,37 +462,32 @@ def _executemany_bulk_insert( except ValueError: raise ProgrammingError(f"Unsupported paramstyle: {paramstyle}") + concatenated_query = "; ".join([query] * len(parameters_seq)) + self._close_rowset_and_reset() self._row_set = InMemoryRowSet() try: if parameter_style == ParameterStyle.FB_NUMERIC: - concatenated_query = "; ".join([query] * len(parameters_seq)) - flattened_params: List[ParameterType] = [] for param_set in parameters_seq: flattened_params.extend(param_set) - query_parameters = [ - { - "name": f"${i+1}", - "value": self._formatter.convert_parameter_for_serialization( - value - ), - } - for i, value in enumerate(flattened_params) - ] - - query_params: Dict[str, Any] = { - "output_format": self._get_output_format(False), - "merge_prepared_statement_batches": "true", - } - if query_parameters: - query_params["query_parameters"] = json.dumps(query_parameters) - Cursor._log_query(concatenated_query) + timeout_controller = TimeoutController(timeout_seconds) + timeout_controller.raise_if_timeout() + + query_params = self._build_fb_numeric_query_params( + [flattened_params], + streaming=False, + async_execution=False, + extra_params={"merge_prepared_statement_batches": "true"}, + ) + resp = self._api_request( - concatenated_query, query_params, timeout=timeout_seconds + concatenated_query, + query_params, + timeout=timeout_controller.remaining(), ) self._raise_if_error(resp) self._parse_response_headers(resp.headers) diff --git a/tests/integration/dbapi/async/V2/test_queries_async.py b/tests/integration/dbapi/async/V2/test_queries_async.py index 17332d1dd9..9b768caaf6 100644 --- a/tests/integration/dbapi/async/V2/test_queries_async.py +++ b/tests/integration/dbapi/async/V2/test_queries_async.py @@ -282,56 +282,45 @@ async def test_parameterized_query_with_special_chars(connection: Connection) -> ], "Invalid data in table after parameterized insert" -async def test_executemany_bulk_insert(connection: Connection) -> None: +@mark.parametrize("paramstyle", ["qmark", "fb_numeric"]) +async def test_executemany_bulk_insert(connection: Connection, paramstyle: str) -> None: """executemany with bulk_insert=True inserts data correctly.""" - async with connection.cursor() as c: - await c.execute('DROP TABLE IF EXISTS "test_bulk_insert_async"') - await c.execute( - 'CREATE FACT TABLE "test_bulk_insert_async"(id int, name string) primary index id' - ) + import firebolt.async_db as db_module - import firebolt.async_db as db_module + original_paramstyle = db_module.paramstyle - original_paramstyle = db_module.paramstyle - db_module.paramstyle = "qmark" + try: + db_module.paramstyle = paramstyle - try: - await c.executemany( - 'INSERT INTO "test_bulk_insert_async" VALUES (?, ?)', - [(1, "alice"), (2, "bob"), (3, "charlie")], - bulk_insert=True, + async with connection.cursor() as c: + await c.execute('DROP TABLE IF EXISTS "test_bulk_insert_async"') + await c.execute( + 'CREATE FACT TABLE "test_bulk_insert_async"(id int, name string) primary index id' ) + if paramstyle == "qmark": + await c.executemany( + 'INSERT INTO "test_bulk_insert_async" VALUES (?, ?)', + [(1, "alice"), (2, "bob"), (3, "charlie")], + bulk_insert=True, + ) + else: + await c.executemany( + 'INSERT INTO "test_bulk_insert_async" VALUES ($1, $2)', + [(1, "alice"), (2, "bob"), (3, "charlie")], + bulk_insert=True, + ) + await c.execute('SELECT * FROM "test_bulk_insert_async" ORDER BY id') data = await c.fetchall() assert len(data) == 3 assert data[0] == [1, "alice"] assert data[1] == [2, "bob"] assert data[2] == [3, "charlie"] - finally: - db_module.paramstyle = original_paramstyle - - await c.execute('DELETE FROM "test_bulk_insert_async"') - - db_module.paramstyle = "fb_numeric" - - try: - await c.executemany( - 'INSERT INTO "test_bulk_insert_async" VALUES ($1, $2)', - [(4, "david"), (5, "eve"), (6, "frank")], - bulk_insert=True, - ) - - await c.execute('SELECT * FROM "test_bulk_insert_async" ORDER BY id') - data = await c.fetchall() - assert len(data) == 3 - assert data[0] == [4, "david"] - assert data[1] == [5, "eve"] - assert data[2] == [6, "frank"] - finally: - db_module.paramstyle = original_paramstyle - await c.execute('DROP TABLE "test_bulk_insert_async"') + await c.execute('DROP TABLE "test_bulk_insert_async"') + finally: + db_module.paramstyle = original_paramstyle async def test_multi_statement_query(connection: Connection) -> None: diff --git a/tests/integration/dbapi/sync/V2/test_queries.py b/tests/integration/dbapi/sync/V2/test_queries.py index 3e10c37516..d7e6fdd183 100644 --- a/tests/integration/dbapi/sync/V2/test_queries.py +++ b/tests/integration/dbapi/sync/V2/test_queries.py @@ -283,56 +283,45 @@ def test_empty_query(c: Cursor, query: str, params: tuple) -> None: ) -def test_executemany_bulk_insert(connection: Connection) -> None: +@mark.parametrize("paramstyle", ["qmark", "fb_numeric"]) +def test_executemany_bulk_insert(connection: Connection, paramstyle: str) -> None: """executemany with bulk_insert=True inserts data correctly.""" - with connection.cursor() as c: - c.execute('DROP TABLE IF EXISTS "test_bulk_insert"') - c.execute( - 'CREATE FACT TABLE "test_bulk_insert"(id int, name string) primary index id' - ) + import firebolt.db as db_module - import firebolt.db as db_module + original_paramstyle = db_module.paramstyle - original_paramstyle = db_module.paramstyle - db_module.paramstyle = "qmark" + try: + db_module.paramstyle = paramstyle - try: - c.executemany( - 'INSERT INTO "test_bulk_insert" VALUES (?, ?)', - [(1, "alice"), (2, "bob"), (3, "charlie")], - bulk_insert=True, + with connection.cursor() as c: + c.execute('DROP TABLE IF EXISTS "test_bulk_insert"') + c.execute( + 'CREATE FACT TABLE "test_bulk_insert"(id int, name string) primary index id' ) + if paramstyle == "qmark": + c.executemany( + 'INSERT INTO "test_bulk_insert" VALUES (?, ?)', + [(1, "alice"), (2, "bob"), (3, "charlie")], + bulk_insert=True, + ) + else: + c.executemany( + 'INSERT INTO "test_bulk_insert" VALUES ($1, $2)', + [(1, "alice"), (2, "bob"), (3, "charlie")], + bulk_insert=True, + ) + c.execute('SELECT * FROM "test_bulk_insert" ORDER BY id') data = c.fetchall() assert len(data) == 3 assert data[0] == [1, "alice"] assert data[1] == [2, "bob"] assert data[2] == [3, "charlie"] - finally: - db_module.paramstyle = original_paramstyle - - c.execute('DELETE FROM "test_bulk_insert"') - - db_module.paramstyle = "fb_numeric" - - try: - c.executemany( - 'INSERT INTO "test_bulk_insert" VALUES ($1, $2)', - [(4, "david"), (5, "eve"), (6, "frank")], - bulk_insert=True, - ) - - c.execute('SELECT * FROM "test_bulk_insert" ORDER BY id') - data = c.fetchall() - assert len(data) == 3 - assert data[0] == [4, "david"] - assert data[1] == [5, "eve"] - assert data[2] == [6, "frank"] - finally: - db_module.paramstyle = original_paramstyle - c.execute('DROP TABLE "test_bulk_insert"') + c.execute('DROP TABLE "test_bulk_insert"') + finally: + db_module.paramstyle = original_paramstyle def test_multi_statement_query(connection: Connection) -> None: From eeb4702dc551e063db9c4638f99a3e0e3e975a43 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Fri, 10 Oct 2025 09:03:26 +0000 Subject: [PATCH 13/21] refactor: move imports to top of integration test files - Move firebolt.db and firebolt.async_db imports to top of test files - Remove local import statements from inside test functions - Use full module names instead of local aliases Addresses final PR feedback from ptiurin on #463 Co-Authored-By: petro.tiurin@firebolt.io --- tests/integration/dbapi/async/V2/test_queries_async.py | 9 ++++----- tests/integration/dbapi/sync/V2/test_queries.py | 9 ++++----- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/tests/integration/dbapi/async/V2/test_queries_async.py b/tests/integration/dbapi/async/V2/test_queries_async.py index 9b768caaf6..383cdacc96 100644 --- a/tests/integration/dbapi/async/V2/test_queries_async.py +++ b/tests/integration/dbapi/async/V2/test_queries_async.py @@ -6,6 +6,7 @@ from pytest import mark, raises +import firebolt.async_db from firebolt.async_db import Binary, Connection, Cursor, OperationalError from firebolt.async_db.connection import connect from firebolt.client.auth.base import Auth @@ -285,12 +286,10 @@ async def test_parameterized_query_with_special_chars(connection: Connection) -> @mark.parametrize("paramstyle", ["qmark", "fb_numeric"]) async def test_executemany_bulk_insert(connection: Connection, paramstyle: str) -> None: """executemany with bulk_insert=True inserts data correctly.""" - import firebolt.async_db as db_module - - original_paramstyle = db_module.paramstyle + original_paramstyle = firebolt.async_db.paramstyle try: - db_module.paramstyle = paramstyle + firebolt.async_db.paramstyle = paramstyle async with connection.cursor() as c: await c.execute('DROP TABLE IF EXISTS "test_bulk_insert_async"') @@ -320,7 +319,7 @@ async def test_executemany_bulk_insert(connection: Connection, paramstyle: str) await c.execute('DROP TABLE "test_bulk_insert_async"') finally: - db_module.paramstyle = original_paramstyle + firebolt.async_db.paramstyle = original_paramstyle async def test_multi_statement_query(connection: Connection) -> None: diff --git a/tests/integration/dbapi/sync/V2/test_queries.py b/tests/integration/dbapi/sync/V2/test_queries.py index d7e6fdd183..c72ad59e05 100644 --- a/tests/integration/dbapi/sync/V2/test_queries.py +++ b/tests/integration/dbapi/sync/V2/test_queries.py @@ -7,6 +7,7 @@ from pytest import mark, raises +import firebolt.db from firebolt.client.auth import Auth from firebolt.common._types import ColType from firebolt.common.row_set.types import Column @@ -286,12 +287,10 @@ def test_empty_query(c: Cursor, query: str, params: tuple) -> None: @mark.parametrize("paramstyle", ["qmark", "fb_numeric"]) def test_executemany_bulk_insert(connection: Connection, paramstyle: str) -> None: """executemany with bulk_insert=True inserts data correctly.""" - import firebolt.db as db_module - - original_paramstyle = db_module.paramstyle + original_paramstyle = firebolt.db.paramstyle try: - db_module.paramstyle = paramstyle + firebolt.db.paramstyle = paramstyle with connection.cursor() as c: c.execute('DROP TABLE IF EXISTS "test_bulk_insert"') @@ -321,7 +320,7 @@ def test_executemany_bulk_insert(connection: Connection, paramstyle: str) -> Non c.execute('DROP TABLE "test_bulk_insert"') finally: - db_module.paramstyle = original_paramstyle + firebolt.db.paramstyle = original_paramstyle def test_multi_statement_query(connection: Connection) -> None: From 878ddd2d879639f28734e36b9b9fe510377cd272 Mon Sep 17 00:00:00 2001 From: ptiurin Date: Mon, 13 Oct 2025 17:29:33 +0100 Subject: [PATCH 14/21] only support this in fb_numeric --- src/firebolt/async_db/cursor.py | 107 +----------------- .../common/cursor/statement_planners.py | 63 ++++++++++- src/firebolt/db/cursor.py | 105 +---------------- .../dbapi/async/V2/test_queries_async.py | 31 ++--- .../integration/dbapi/sync/V2/test_queries.py | 33 ++---- tests/unit/async_db/test_cursor.py | 89 ++++++--------- tests/unit/db/test_cursor.py | 93 +++++++-------- 7 files changed, 168 insertions(+), 353 deletions(-) diff --git a/src/firebolt/async_db/cursor.py b/src/firebolt/async_db/cursor.py index b19514d070..da1c108723 100644 --- a/src/firebolt/async_db/cursor.py +++ b/src/firebolt/async_db/cursor.py @@ -16,7 +16,6 @@ TimeoutException, codes, ) -from sqlparse import parse as parse_sql # type: ignore from firebolt.client.client import AsyncClient, AsyncClientV1, AsyncClientV2 from firebolt.common._types import ColType, ParameterType, SetParameter @@ -47,6 +46,7 @@ from firebolt.common.row_set.asynchronous.streaming import StreamingAsyncRowSet from firebolt.common.statement_formatter import create_statement_formatter from firebolt.utils.exception import ( + ConfigurationError, EngineNotRunningError, FireboltDatabaseError, FireboltError, @@ -219,6 +219,7 @@ async def _do_execute( timeout: Optional[float] = None, async_execution: bool = False, streaming: bool = False, + bulk_insert: bool = False, ) -> None: await self._close_rowset_and_reset() self._row_set = StreamingAsyncRowSet() if streaming else InMemoryAsyncRowSet() @@ -231,7 +232,7 @@ async def _do_execute( ) plan = statement_planner.create_execution_plan( - raw_query, parameters, skip_parsing, async_execution, streaming + raw_query, parameters, skip_parsing, async_execution, streaming, bulk_insert ) await self._execute_plan(plan, timeout) self._state = CursorState.DONE @@ -422,105 +423,9 @@ async def executemany( Returns: int: Query row count. """ - if bulk_insert: - return await self._executemany_bulk_insert( - query, parameters_seq, timeout_seconds - ) - await self._do_execute(query, parameters_seq, timeout=timeout_seconds) - return self.rowcount - - def _validate_bulk_insert_query(self, query: str) -> None: - """Validate that query is an INSERT statement for bulk_insert.""" - query_normalized = query.lstrip().lower() - - if not query_normalized.startswith("insert"): - raise ProgrammingError( - "bulk_insert is only supported for INSERT statements" - ) - - if ";" in query.strip().rstrip(";"): - raise ProgrammingError( - "bulk_insert does not support multi-statement queries" - ) - - async def _executemany_bulk_insert( - self, - query: str, - parameters_seq: Sequence[Sequence[ParameterType]], - timeout_seconds: Optional[float], - ) -> int: - """Execute multiple INSERT queries as a single batch.""" - self._validate_bulk_insert_query(query) - - if not parameters_seq: - raise ProgrammingError("bulk_insert requires at least one parameter set") - - from firebolt.async_db import paramstyle - - try: - parameter_style = ParameterStyle(paramstyle) - except ValueError: - raise ProgrammingError(f"Unsupported paramstyle: {paramstyle}") - - concatenated_query = "; ".join([query] * len(parameters_seq)) - - await self._close_rowset_and_reset() - self._row_set = InMemoryAsyncRowSet() - - try: - if parameter_style == ParameterStyle.FB_NUMERIC: - flattened_params: List[ParameterType] = [] - for param_set in parameters_seq: - flattened_params.extend(param_set) - - Cursor._log_query(concatenated_query) - timeout_controller = TimeoutController(timeout_seconds) - timeout_controller.raise_if_timeout() - - query_params = self._build_fb_numeric_query_params( - [flattened_params], - streaming=False, - async_execution=False, - extra_params={"merge_prepared_statement_batches": "true"}, - ) - - resp = await self._api_request( - concatenated_query, - query_params, - timeout=timeout_controller.remaining(), - ) - await self._raise_if_error(resp) - await self._parse_response_headers(resp.headers) - await self._append_row_set_from_response(resp) - else: - formatted_queries = [] - statements = parse_sql(query) - for param_set in parameters_seq: - formatted_query = self._formatter.format_statement( - statements[0], param_set - ) - formatted_queries.append(formatted_query) - - concatenated_query = "; ".join(formatted_queries) - - query_params = { - "output_format": self._get_output_format(False), - "merge_prepared_statement_batches": "true", - } - - Cursor._log_query(concatenated_query) - resp = await self._api_request( - concatenated_query, query_params, timeout=timeout_seconds - ) - await self._raise_if_error(resp) - await self._parse_response_headers(resp.headers) - await self._append_row_set_from_response(resp) - - self._state = CursorState.DONE - except Exception: - self._state = CursorState.ERROR - raise - + await self._do_execute( + query, parameters_seq, timeout=timeout_seconds, bulk_insert=bulk_insert + ) return self.rowcount @check_not_closed diff --git a/src/firebolt/common/cursor/statement_planners.py b/src/firebolt/common/cursor/statement_planners.py index a24d5f92b2..b7f44cddb6 100644 --- a/src/firebolt/common/cursor/statement_planners.py +++ b/src/firebolt/common/cursor/statement_planners.py @@ -12,7 +12,7 @@ JSON_LINES_OUTPUT_FORMAT, JSON_OUTPUT_FORMAT, ) -from firebolt.utils.exception import FireboltError, ProgrammingError +from firebolt.utils.exception import ConfigurationError, FireboltError, ProgrammingError if TYPE_CHECKING: from firebolt.common.statement_formatter import StatementFormatter @@ -44,6 +44,7 @@ def create_execution_plan( skip_parsing: bool = False, async_execution: bool = False, streaming: bool = False, + bulk_insert: bool = False, ) -> ExecutionPlan: """Create an execution plan for the given statement and parameters.""" @@ -65,13 +66,32 @@ def create_execution_plan( skip_parsing: bool = False, async_execution: bool = False, streaming: bool = False, + bulk_insert: bool = False, ) -> ExecutionPlan: """Create execution plan for fb_numeric parameter style.""" - query_params = self._build_fb_numeric_query_params( - parameters, streaming, async_execution - ) + if bulk_insert: + # Validate bulk_insert requirements + query_normalized = raw_query.lstrip().lower() + if not query_normalized.startswith("insert"): + raise ConfigurationError("bulk_insert is only supported for INSERT statements") + if ";" in raw_query.strip().rstrip(";"): + raise ConfigurationError("bulk_insert does not support multi-statement queries") + if not parameters: + raise ConfigurationError("bulk_insert requires at least one parameter set") + + # Prepare bulk insert query and parameters + processed_query, processed_params = self._prepare_bulk_insert(raw_query, parameters) + query_params = self._build_fb_numeric_query_params( + processed_params, streaming, async_execution, {"merge_prepared_statement_batches": "true"} + ) + else: + processed_query = raw_query + query_params = self._build_fb_numeric_query_params( + parameters, streaming, async_execution + ) + return ExecutionPlan( - queries=[raw_query], + queries=[processed_query], query_params=query_params, is_multi_statement=False, async_execution=async_execution, @@ -83,6 +103,7 @@ def _build_fb_numeric_query_params( parameters: Sequence[Sequence[ParameterType]], streaming: bool, async_execution: bool, + extra_params: Optional[Dict[str, Any]] = None, ) -> Dict[str, Any]: """Build query parameters for fb_numeric style.""" param_list = parameters[0] if parameters else [] @@ -101,8 +122,35 @@ def _build_fb_numeric_query_params( query_params["query_parameters"] = json.dumps(query_parameters) if async_execution: query_params["async"] = True + if extra_params: + query_params.update(extra_params) return query_params + def _prepare_bulk_insert( + self, query: str, parameters_seq: Sequence[Sequence[ParameterType]] + ) -> tuple[str, Sequence[Sequence[ParameterType]]]: + """Execute multiple INSERT queries as a single batch.""" + if not parameters_seq: + raise ProgrammingError("bulk_insert requires at least one parameter set") + + # For bulk insert, we need to create unique parameter names for each INSERT + # Example: ($1, $2); ($3, $4); ($5, $6) instead of ($1, $2); ($1, $2); ($1, $2) + queries = [] + param_offset = 0 + for param_set in parameters_seq: + # Replace parameter placeholders with unique numbers + modified_query = query + for i in range(len(param_set)): + old_param = f"${i + 1}" + new_param = f"${param_offset + i + 1}" + modified_query = modified_query.replace(old_param, new_param) + queries.append(modified_query) + param_offset += len(param_set) + + combined_query = "; ".join(queries) + parameters = [param for param_set in parameters_seq for param in param_set] + return combined_query, [parameters] + class QmarkStatementPlanner(BaseStatementPlanner): """Statement planner for qmark parameter style.""" @@ -114,8 +162,13 @@ def create_execution_plan( skip_parsing: bool = False, async_execution: bool = False, streaming: bool = False, + bulk_insert: bool = False, ) -> ExecutionPlan: """Create execution plan for qmark parameter style.""" + # Validate bulk_insert is not used with qmark + if bulk_insert: + raise ConfigurationError("bulk_insert is only supported for fb_numeric") + queries: List[Union[SetParameter, str]] = ( [raw_query] if skip_parsing diff --git a/src/firebolt/db/cursor.py b/src/firebolt/db/cursor.py index f4ff48376b..fdbd5987e4 100644 --- a/src/firebolt/db/cursor.py +++ b/src/firebolt/db/cursor.py @@ -24,7 +24,6 @@ TimeoutException, codes, ) -from sqlparse import parse as parse_sql # type: ignore from firebolt.client import Client, ClientV1, ClientV2 from firebolt.common._types import ColType, ParameterType, SetParameter @@ -56,6 +55,7 @@ from firebolt.common.statement_formatter import create_statement_formatter from firebolt.utils.cache import ConnectionInfo, DatabaseInfo, EngineInfo from firebolt.utils.exception import ( + ConfigurationError, EngineNotRunningError, FireboltDatabaseError, FireboltError, @@ -225,6 +225,7 @@ def _do_execute( timeout: Optional[float] = None, async_execution: bool = False, streaming: bool = False, + bulk_insert: bool = False, ) -> None: self._close_rowset_and_reset() self._row_set = StreamingRowSet() if streaming else InMemoryRowSet() @@ -237,7 +238,7 @@ def _do_execute( ) plan = statement_planner.create_execution_plan( - raw_query, parameters, skip_parsing, async_execution, streaming + raw_query, parameters, skip_parsing, async_execution, streaming, bulk_insert ) self._execute_plan(plan, timeout) self._state = CursorState.DONE @@ -424,103 +425,9 @@ def executemany( Returns: int: Query row count. """ - if bulk_insert: - return self._executemany_bulk_insert(query, parameters_seq, timeout_seconds) - self._do_execute(query, parameters_seq, timeout=timeout_seconds) - return self.rowcount - - def _validate_bulk_insert_query(self, query: str) -> None: - """Validate that query is an INSERT statement for bulk_insert.""" - query_normalized = query.lstrip().lower() - - if not query_normalized.startswith("insert"): - raise ProgrammingError( - "bulk_insert is only supported for INSERT statements" - ) - - if ";" in query.strip().rstrip(";"): - raise ProgrammingError( - "bulk_insert does not support multi-statement queries" - ) - - def _executemany_bulk_insert( - self, - query: str, - parameters_seq: Sequence[Sequence[ParameterType]], - timeout_seconds: Optional[float], - ) -> int: - """Execute multiple INSERT queries as a single batch.""" - self._validate_bulk_insert_query(query) - - if not parameters_seq: - raise ProgrammingError("bulk_insert requires at least one parameter set") - - from firebolt.db import paramstyle - - try: - parameter_style = ParameterStyle(paramstyle) - except ValueError: - raise ProgrammingError(f"Unsupported paramstyle: {paramstyle}") - - concatenated_query = "; ".join([query] * len(parameters_seq)) - - self._close_rowset_and_reset() - self._row_set = InMemoryRowSet() - - try: - if parameter_style == ParameterStyle.FB_NUMERIC: - flattened_params: List[ParameterType] = [] - for param_set in parameters_seq: - flattened_params.extend(param_set) - - Cursor._log_query(concatenated_query) - timeout_controller = TimeoutController(timeout_seconds) - timeout_controller.raise_if_timeout() - - query_params = self._build_fb_numeric_query_params( - [flattened_params], - streaming=False, - async_execution=False, - extra_params={"merge_prepared_statement_batches": "true"}, - ) - - resp = self._api_request( - concatenated_query, - query_params, - timeout=timeout_controller.remaining(), - ) - self._raise_if_error(resp) - self._parse_response_headers(resp.headers) - self._append_row_set_from_response(resp) - else: - formatted_queries = [] - statements = parse_sql(query) - for param_set in parameters_seq: - formatted_query = self._formatter.format_statement( - statements[0], param_set - ) - formatted_queries.append(formatted_query) - - concatenated_query = "; ".join(formatted_queries) - - query_params = { - "output_format": self._get_output_format(False), - "merge_prepared_statement_batches": "true", - } - - Cursor._log_query(concatenated_query) - resp = self._api_request( - concatenated_query, query_params, timeout=timeout_seconds - ) - self._raise_if_error(resp) - self._parse_response_headers(resp.headers) - self._append_row_set_from_response(resp) - - self._state = CursorState.DONE - except Exception: - self._state = CursorState.ERROR - raise - + self._do_execute( + query, parameters_seq, timeout=timeout_seconds, bulk_insert=bulk_insert + ) return self.rowcount @check_not_closed diff --git a/tests/integration/dbapi/async/V2/test_queries_async.py b/tests/integration/dbapi/async/V2/test_queries_async.py index 383cdacc96..a3dd671377 100644 --- a/tests/integration/dbapi/async/V2/test_queries_async.py +++ b/tests/integration/dbapi/async/V2/test_queries_async.py @@ -283,13 +283,12 @@ async def test_parameterized_query_with_special_chars(connection: Connection) -> ], "Invalid data in table after parameterized insert" -@mark.parametrize("paramstyle", ["qmark", "fb_numeric"]) -async def test_executemany_bulk_insert(connection: Connection, paramstyle: str) -> None: +async def test_executemany_bulk_insert( + connection: Connection, fb_numeric_paramstyle: None +) -> None: """executemany with bulk_insert=True inserts data correctly.""" - original_paramstyle = firebolt.async_db.paramstyle - try: - firebolt.async_db.paramstyle = paramstyle + firebolt.async_db.paramstyle = "fb_numeric" async with connection.cursor() as c: await c.execute('DROP TABLE IF EXISTS "test_bulk_insert_async"') @@ -297,18 +296,11 @@ async def test_executemany_bulk_insert(connection: Connection, paramstyle: str) 'CREATE FACT TABLE "test_bulk_insert_async"(id int, name string) primary index id' ) - if paramstyle == "qmark": - await c.executemany( - 'INSERT INTO "test_bulk_insert_async" VALUES (?, ?)', - [(1, "alice"), (2, "bob"), (3, "charlie")], - bulk_insert=True, - ) - else: - await c.executemany( - 'INSERT INTO "test_bulk_insert_async" VALUES ($1, $2)', - [(1, "alice"), (2, "bob"), (3, "charlie")], - bulk_insert=True, - ) + await c.executemany( + 'INSERT INTO "test_bulk_insert_async" VALUES ($1, $2)', + [(1, "alice"), (2, "bob"), (3, "charlie")], + bulk_insert=True, + ) await c.execute('SELECT * FROM "test_bulk_insert_async" ORDER BY id') data = await c.fetchall() @@ -316,10 +308,9 @@ async def test_executemany_bulk_insert(connection: Connection, paramstyle: str) assert data[0] == [1, "alice"] assert data[1] == [2, "bob"] assert data[2] == [3, "charlie"] - - await c.execute('DROP TABLE "test_bulk_insert_async"') finally: - firebolt.async_db.paramstyle = original_paramstyle + async with connection.cursor() as c: + await c.execute('DROP TABLE IF EXISTS "test_bulk_insert_async"') async def test_multi_statement_query(connection: Connection) -> None: diff --git a/tests/integration/dbapi/sync/V2/test_queries.py b/tests/integration/dbapi/sync/V2/test_queries.py index c72ad59e05..577a62ff22 100644 --- a/tests/integration/dbapi/sync/V2/test_queries.py +++ b/tests/integration/dbapi/sync/V2/test_queries.py @@ -7,7 +7,6 @@ from pytest import mark, raises -import firebolt.db from firebolt.client.auth import Auth from firebolt.common._types import ColType from firebolt.common.row_set.types import Column @@ -284,32 +283,21 @@ def test_empty_query(c: Cursor, query: str, params: tuple) -> None: ) -@mark.parametrize("paramstyle", ["qmark", "fb_numeric"]) -def test_executemany_bulk_insert(connection: Connection, paramstyle: str) -> None: +def test_executemany_bulk_insert( + connection: Connection, fb_numeric_paramstyle: None +) -> None: """executemany with bulk_insert=True inserts data correctly.""" - original_paramstyle = firebolt.db.paramstyle - try: - firebolt.db.paramstyle = paramstyle - with connection.cursor() as c: c.execute('DROP TABLE IF EXISTS "test_bulk_insert"') c.execute( 'CREATE FACT TABLE "test_bulk_insert"(id int, name string) primary index id' ) - - if paramstyle == "qmark": - c.executemany( - 'INSERT INTO "test_bulk_insert" VALUES (?, ?)', - [(1, "alice"), (2, "bob"), (3, "charlie")], - bulk_insert=True, - ) - else: - c.executemany( - 'INSERT INTO "test_bulk_insert" VALUES ($1, $2)', - [(1, "alice"), (2, "bob"), (3, "charlie")], - bulk_insert=True, - ) + c.executemany( + 'INSERT INTO "test_bulk_insert" VALUES ($1, $2)', + [(1, "alice"), (2, "bob"), (3, "charlie")], + bulk_insert=True, + ) c.execute('SELECT * FROM "test_bulk_insert" ORDER BY id') data = c.fetchall() @@ -317,10 +305,9 @@ def test_executemany_bulk_insert(connection: Connection, paramstyle: str) -> Non assert data[0] == [1, "alice"] assert data[1] == [2, "bob"] assert data[2] == [3, "charlie"] - - c.execute('DROP TABLE "test_bulk_insert"') finally: - firebolt.db.paramstyle = original_paramstyle + with connection.cursor() as c: + c.execute('DROP TABLE IF EXISTS "test_bulk_insert"') def test_multi_statement_query(connection: Connection) -> None: diff --git a/tests/unit/async_db/test_cursor.py b/tests/unit/async_db/test_cursor.py index dfc3b7918f..d13841c514 100644 --- a/tests/unit/async_db/test_cursor.py +++ b/tests/unit/async_db/test_cursor.py @@ -1508,49 +1508,19 @@ async def test_unsupported_paramstyle_raises(cursor: Cursor) -> None: db.paramstyle = original_paramstyle -async def test_executemany_bulk_insert_qmark( - httpx_mock: HTTPXMock, +async def test_executemany_bulk_insert_qmark_fails( cursor: Cursor, - query_url: str, - python_query_data: List[List[ColType]], ): - """executemany with bulk_insert=True concatenates INSERT queries with QMARK style.""" - - def bulk_insert_callback(request): - assert request.url.params.get("merge_prepared_statement_batches") == "true" - - query = request.content.decode() - assert query.count("INSERT INTO") == 3 - assert "; " in query - - return Response( - status_code=200, - content=json.dumps( - { - "meta": [], - "data": [], - "rows": 0, - "statistics": { - "elapsed": 0.0, - "rows_read": 0, - "bytes_read": 0, - }, - } - ), - headers={}, + """executemany with bulk_insert=True fails with qmark paramstyle.""" + with raises( + ConfigurationError, match="bulk_insert is only supported for fb_numeric" + ): + await cursor.executemany( + "INSERT INTO test_table VALUES (?, ?)", + [(1, "a"), (2, "b"), (3, "c")], + bulk_insert=True, ) - base_url = str(query_url).split("?")[0] - url_pattern = re.compile(re.escape(base_url)) - httpx_mock.add_callback(bulk_insert_callback, url=url_pattern) - - result = await cursor.executemany( - "INSERT INTO test_table VALUES (?, ?)", - [(1, "a"), (2, "b"), (3, "c")], - bulk_insert=True, - ) - assert result == 0 - async def test_executemany_bulk_insert_fb_numeric( httpx_mock: HTTPXMock, @@ -1566,8 +1536,6 @@ async def test_executemany_bulk_insert_fb_numeric( db_module.paramstyle = "fb_numeric" def bulk_insert_callback(request): - assert request.url.params.get("merge_prepared_statement_batches") == "true" - query = request.content.decode() assert query.count("INSERT INTO") == 3 assert "; " in query @@ -1608,45 +1576,62 @@ def bulk_insert_callback(request): db_module.paramstyle = original_paramstyle -async def test_executemany_bulk_insert_non_insert_fails(cursor: Cursor): +async def test_executemany_bulk_insert_non_insert_fails( + cursor: Cursor, fb_numeric_paramstyle +): """executemany with bulk_insert=True fails for non-INSERT queries.""" - with raises(ProgrammingError, match="bulk_insert is only supported for INSERT"): + with raises(ConfigurationError, match="bulk_insert is only supported for INSERT"): await cursor.executemany( "SELECT * FROM test_table", [()], bulk_insert=True, ) - with raises(ProgrammingError, match="bulk_insert is only supported for INSERT"): + with raises(ConfigurationError, match="bulk_insert is only supported for INSERT"): await cursor.executemany( - "UPDATE test_table SET col = ?", + "UPDATE test_table SET col = $1", [(1,)], bulk_insert=True, ) - with raises(ProgrammingError, match="bulk_insert is only supported for INSERT"): + with raises(ConfigurationError, match="bulk_insert is only supported for INSERT"): await cursor.executemany( - "DELETE FROM test_table WHERE id = ?", + "DELETE FROM test_table WHERE id = $1", [(1,)], bulk_insert=True, ) -async def test_executemany_bulk_insert_multi_statement_fails(cursor: Cursor): +async def test_executemany_bulk_insert_multi_statement_fails( + cursor: Cursor, fb_numeric_paramstyle +): """executemany with bulk_insert=True fails for multi-statement queries.""" - with raises(ProgrammingError, match="does not support multi-statement"): + with raises( + ProgrammingError, match="bulk_insert does not support multi-statement queries" + ): await cursor.executemany( - "INSERT INTO t1 VALUES (?); INSERT INTO t2 VALUES (?)", + "INSERT INTO test_table VALUES ($1); SELECT * FROM test_table", + [(1,)], + bulk_insert=True, + ) + + with raises( + ProgrammingError, match="bulk_insert does not support multi-statement queries" + ): + await cursor.executemany( + "INSERT INTO test_table VALUES ($1); INSERT INTO test_table VALUES ($2)", [(1,), (2,)], bulk_insert=True, ) -async def test_executemany_bulk_insert_empty_params_fails(cursor: Cursor): +async def test_executemany_bulk_insert_empty_params_fails( + cursor: Cursor, fb_numeric_paramstyle +): """executemany with bulk_insert=True fails with empty parameters.""" with raises(ProgrammingError, match="requires at least one parameter set"): await cursor.executemany( - "INSERT INTO test_table VALUES (?)", + "INSERT INTO test_table VALUES ($1)", [], bulk_insert=True, ) diff --git a/tests/unit/db/test_cursor.py b/tests/unit/db/test_cursor.py index 079f3ae84a..c3bdbc74c9 100644 --- a/tests/unit/db/test_cursor.py +++ b/tests/unit/db/test_cursor.py @@ -1393,49 +1393,19 @@ def test_unsupported_paramstyle_raises(cursor): db.paramstyle = original_paramstyle -def test_executemany_bulk_insert_qmark( - httpx_mock: HTTPXMock, +def test_executemany_bulk_insert_qmark_fails( cursor: Cursor, - query_url: str, - python_query_data: List[List[ColType]], ): - """executemany with bulk_insert=True concatenates INSERT queries with QMARK style.""" - - def bulk_insert_callback(request): - assert request.url.params.get("merge_prepared_statement_batches") == "true" - - query = request.content.decode() - assert query.count("INSERT INTO") == 3 - assert "; " in query - - return Response( - status_code=200, - content=json.dumps( - { - "meta": [], - "data": [], - "rows": 0, - "statistics": { - "elapsed": 0.0, - "rows_read": 0, - "bytes_read": 0, - }, - } - ), - headers={}, + """executemany with bulk_insert=True fails with qmark paramstyle.""" + with raises( + ConfigurationError, match="bulk_insert is only supported for fb_numeric" + ): + cursor.executemany( + "INSERT INTO test_table VALUES (?, ?)", + [(1, "a"), (2, "b"), (3, "c")], + bulk_insert=True, ) - base_url = str(query_url).split("?")[0] - url_pattern = re.compile(re.escape(base_url)) - httpx_mock.add_callback(bulk_insert_callback, url=url_pattern) - - result = cursor.executemany( - "INSERT INTO test_table VALUES (?, ?)", - [(1, "a"), (2, "b"), (3, "c")], - bulk_insert=True, - ) - assert result == 0 - def test_executemany_bulk_insert_fb_numeric( httpx_mock: HTTPXMock, @@ -1451,8 +1421,6 @@ def test_executemany_bulk_insert_fb_numeric( db_module.paramstyle = "fb_numeric" def bulk_insert_callback(request): - assert request.url.params.get("merge_prepared_statement_batches") == "true" - query = request.content.decode() assert query.count("INSERT INTO") == 3 assert "; " in query @@ -1493,45 +1461,64 @@ def bulk_insert_callback(request): db_module.paramstyle = original_paramstyle -def test_executemany_bulk_insert_non_insert_fails(cursor: Cursor): +def test_executemany_bulk_insert_non_insert_fails( + cursor: Cursor, fb_numeric_paramstyle +): """executemany with bulk_insert=True fails for non-INSERT queries.""" - with raises(ProgrammingError, match="bulk_insert is only supported for INSERT"): + with raises(ConfigurationError, match="bulk_insert is only supported for INSERT"): cursor.executemany( "SELECT * FROM test_table", [()], bulk_insert=True, ) - with raises(ProgrammingError, match="bulk_insert is only supported for INSERT"): + with raises(ConfigurationError, match="bulk_insert is only supported for INSERT"): cursor.executemany( - "UPDATE test_table SET col = ?", + "UPDATE test_table SET col = $1", [(1,)], bulk_insert=True, ) - with raises(ProgrammingError, match="bulk_insert is only supported for INSERT"): + with raises(ConfigurationError, match="bulk_insert is only supported for INSERT"): cursor.executemany( - "DELETE FROM test_table WHERE id = ?", + "DELETE FROM test_table WHERE id = $1", [(1,)], bulk_insert=True, ) -def test_executemany_bulk_insert_multi_statement_fails(cursor: Cursor): +def test_executemany_bulk_insert_multi_statement_fails( + cursor: Cursor, fb_numeric_paramstyle +): """executemany with bulk_insert=True fails for multi-statement queries.""" - with raises(ProgrammingError, match="does not support multi-statement"): + with raises( + ProgrammingError, match="bulk_insert does not support multi-statement queries" + ): cursor.executemany( - "INSERT INTO t1 VALUES (?); INSERT INTO t2 VALUES (?)", + "INSERT INTO test_table VALUES ($1); SELECT * FROM test_table", + [(1,)], + bulk_insert=True, + ) + + with raises( + ProgrammingError, match="bulk_insert does not support multi-statement queries" + ): + cursor.executemany( + "INSERT INTO test_table VALUES ($1); INSERT INTO test_table VALUES ($2)", [(1,), (2,)], bulk_insert=True, ) -def test_executemany_bulk_insert_empty_params_fails(cursor: Cursor): +def test_executemany_bulk_insert_empty_params_fails( + cursor: Cursor, fb_numeric_paramstyle +): """executemany with bulk_insert=True fails with empty parameters.""" - with raises(ProgrammingError, match="requires at least one parameter set"): + with raises( + ProgrammingError, match="bulk_insert requires at least one parameter set" + ): cursor.executemany( - "INSERT INTO test_table VALUES (?)", + "INSERT INTO test_table VALUES ($1)", [], bulk_insert=True, ) From 90d0df097dcd1052b1f676d42ec176293ca791d2 Mon Sep 17 00:00:00 2001 From: ptiurin Date: Mon, 13 Oct 2025 17:40:24 +0100 Subject: [PATCH 15/21] refactor for complexity --- src/firebolt/async_db/cursor.py | 1 + src/firebolt/db/cursor.py | 1 + 2 files changed, 2 insertions(+) diff --git a/src/firebolt/async_db/cursor.py b/src/firebolt/async_db/cursor.py index da1c108723..ddb43df49f 100644 --- a/src/firebolt/async_db/cursor.py +++ b/src/firebolt/async_db/cursor.py @@ -223,6 +223,7 @@ async def _do_execute( ) -> None: await self._close_rowset_and_reset() self._row_set = StreamingAsyncRowSet() if streaming else InMemoryAsyncRowSet() + # Import paramstyle from module level from firebolt.async_db import paramstyle diff --git a/src/firebolt/db/cursor.py b/src/firebolt/db/cursor.py index fdbd5987e4..df9fbd1308 100644 --- a/src/firebolt/db/cursor.py +++ b/src/firebolt/db/cursor.py @@ -229,6 +229,7 @@ def _do_execute( ) -> None: self._close_rowset_and_reset() self._row_set = StreamingRowSet() if streaming else InMemoryRowSet() + # Import paramstyle from module level from firebolt.db import paramstyle From 018b9bbbaab3de07d13c2e6378cd9dd35b107867 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Mon, 13 Oct 2025 16:56:34 +0000 Subject: [PATCH 16/21] docs: clarify bulk_insert only supports fb_numeric parameter style - Remove QMARK parameter style example from bulk_insert documentation - Update note to explicitly state bulk_insert requires fb_numeric - Clarify that using other parameter styles will raise an error Addresses PR feedback from ptiurin on #463 Co-Authored-By: petro.tiurin@firebolt.io --- docsrc/Connecting_and_queries.rst | 23 +++-------------------- 1 file changed, 3 insertions(+), 20 deletions(-) diff --git a/docsrc/Connecting_and_queries.rst b/docsrc/Connecting_and_queries.rst index 5d1624da4e..096fd96ca3 100644 --- a/docsrc/Connecting_and_queries.rst +++ b/docsrc/Connecting_and_queries.rst @@ -444,26 +444,9 @@ For inserting large amounts of data more efficiently, you can use the ``bulk_ins with ``executemany()``. This concatenates multiple INSERT statements into a single batch request, which can significantly improve performance when inserting many rows. -**Note:** The ``bulk_insert`` parameter only works with INSERT statements. Using it with other -statement types (SELECT, UPDATE, DELETE, etc.) will raise an error. - -Example with QMARK parameter style:: - - import firebolt.db - # Explicitly set paramstyle to "qmark" for QMARK style - firebolt.db.paramstyle = "qmark" - - cursor.executemany( - "INSERT INTO test_table VALUES (?, ?, ?)", - ( - (1, "apple", "2019-01-01"), - (2, "banana", "2020-01-01"), - (3, "carrot", "2021-01-01"), - (4, "donut", "2022-01-01"), - (5, "eggplant", "2023-01-01") - ), - bulk_insert=True # Enable bulk insert for better performance (important-comment) - ) +**Note:** The ``bulk_insert`` parameter only works with INSERT statements and requires the +``fb_numeric`` parameter style. Using it with other statement types or parameter styles will +raise an error. Example with FB_NUMERIC parameter style:: From 97b5c56b687424cc46de149901f992c32bf57955 Mon Sep 17 00:00:00 2001 From: ptiurin Date: Tue, 14 Oct 2025 17:23:48 +0100 Subject: [PATCH 17/21] improve the change and rebase --- docsrc/Connecting_and_queries.rst | 3 +- src/firebolt/async_db/cursor.py | 8 +- src/firebolt/common/cursor/base_cursor.py | 52 +---- .../common/cursor/statement_planners.py | 202 ++++++++++++++---- src/firebolt/db/cursor.py | 8 +- tests/unit/async_db/test_cursor.py | 46 +++- .../common/cursor/test_statement_planners.py | 36 ++++ tests/unit/db/test_cursor.py | 46 +++- 8 files changed, 284 insertions(+), 117 deletions(-) diff --git a/docsrc/Connecting_and_queries.rst b/docsrc/Connecting_and_queries.rst index 096fd96ca3..d503183884 100644 --- a/docsrc/Connecting_and_queries.rst +++ b/docsrc/Connecting_and_queries.rst @@ -469,8 +469,7 @@ Example with FB_NUMERIC parameter style:: cursor.close() When ``bulk_insert=True``, the SDK concatenates all INSERT statements into a single batch -and sends them to the server with the ``merge_prepared_statement_batches=true`` parameter, -allowing for optimized batch processing. +and sends them to the server for optimized batch processing. Setting session parameters diff --git a/src/firebolt/async_db/cursor.py b/src/firebolt/async_db/cursor.py index ddb43df49f..f4cc2ebc94 100644 --- a/src/firebolt/async_db/cursor.py +++ b/src/firebolt/async_db/cursor.py @@ -46,7 +46,6 @@ from firebolt.common.row_set.asynchronous.streaming import StreamingAsyncRowSet from firebolt.common.statement_formatter import create_statement_formatter from firebolt.utils.exception import ( - ConfigurationError, EngineNotRunningError, FireboltDatabaseError, FireboltError, @@ -229,11 +228,11 @@ async def _do_execute( try: statement_planner = StatementPlannerFactory.create_planner( - paramstyle, self._formatter + paramstyle, self._formatter, bulk_insert ) plan = statement_planner.create_execution_plan( - raw_query, parameters, skip_parsing, async_execution, streaming, bulk_insert + raw_query, parameters, skip_parsing, async_execution, streaming ) await self._execute_plan(plan, timeout) self._state = CursorState.DONE @@ -407,8 +406,7 @@ async def executemany( cursor object until it's closed. They can also be removed with `flush_parameters` method call. Bulk insert: When bulk_insert=True, multiple INSERT queries are - concatenated and sent as a single batch with - merge_prepared_statement_batches=true for improved performance. + concatenated and sent as a single batch for improved performance. Only supported for INSERT statements. Args: diff --git a/src/firebolt/common/cursor/base_cursor.py b/src/firebolt/common/cursor/base_cursor.py index 0b7b1f6fae..8493788642 100644 --- a/src/firebolt/common/cursor/base_cursor.py +++ b/src/firebolt/common/cursor/base_cursor.py @@ -3,13 +3,13 @@ import logging import re from types import TracebackType -from typing import Any, Dict, List, Optional, Sequence, Tuple, Type, Union +from typing import Any, Dict, List, Optional, Tuple, Type, Union from httpx import URL, Response from firebolt.client.auth.base import Auth from firebolt.client.client import AsyncClient, Client -from firebolt.common._types import ParameterType, RawColType, SetParameter +from firebolt.common._types import RawColType, SetParameter from firebolt.common.constants import ( DISALLOWED_PARAMETER_LIST, IMMUTABLE_PARAMETER_LIST, @@ -27,11 +27,7 @@ SecureCacheKey, _firebolt_cache, ) -from firebolt.utils.exception import ( - ConfigurationError, - FireboltError, - ProgrammingError, -) +from firebolt.utils.exception import ConfigurationError, FireboltError from firebolt.utils.util import fix_url_schema logger = logging.getLogger(__name__) @@ -240,6 +236,7 @@ def _log_query(query: Union[str, SetParameter]) -> None: "aws_key_id|credentials", query, flags=re.IGNORECASE ): logger.debug(f"Running query: {query}") + @property def engine_name(self) -> str: """ @@ -320,44 +317,3 @@ def set_cache_record(self, record: ConnectionInfo) -> None: self._client.auth.secret, ) _firebolt_cache.set(cache_key, record) - - def _validate_bulk_insert_query(self, query: str) -> None: - """Validate that query is an INSERT statement for bulk_insert.""" - query_normalized = query.lstrip().lower() - - if not query_normalized.startswith("insert"): - raise ConfigurationError( - "bulk_insert is only supported for INSERT statements" - ) - - if ";" in query.strip().rstrip(";"): - raise ProgrammingError( - "bulk_insert does not support multi-statement queries" - ) - - def _prepare_bulk_insert( - self, query: str, parameters_seq: Sequence[Sequence[ParameterType]] - ) -> tuple[str, Sequence[Sequence[ParameterType]]]: - """Execute multiple INSERT queries as a single batch.""" - self._validate_bulk_insert_query(query) - - if not parameters_seq: - raise ProgrammingError("bulk_insert requires at least one parameter set") - - # For bulk insert, we need to create unique parameter names for each INSERT - # Example: ($1, $2); ($3, $4); ($5, $6) instead of ($1, $2); ($1, $2); ($1, $2) - queries = [] - param_offset = 0 - for param_set in parameters_seq: - # Replace parameter placeholders with unique numbers - modified_query = query - for i in range(len(param_set)): - old_param = f"${i + 1}" - new_param = f"${param_offset + i + 1}" - modified_query = modified_query.replace(old_param, new_param) - queries.append(modified_query) - param_offset += len(param_set) - - combined_query = "; ".join(queries) - parameters = [param for param_set in parameters_seq for param in param_set] - return combined_query, [parameters] diff --git a/src/firebolt/common/cursor/statement_planners.py b/src/firebolt/common/cursor/statement_planners.py index b7f44cddb6..b06038295b 100644 --- a/src/firebolt/common/cursor/statement_planners.py +++ b/src/firebolt/common/cursor/statement_planners.py @@ -12,7 +12,11 @@ JSON_LINES_OUTPUT_FORMAT, JSON_OUTPUT_FORMAT, ) -from firebolt.utils.exception import ConfigurationError, FireboltError, ProgrammingError +from firebolt.utils.exception import ( + ConfigurationError, + FireboltError, + ProgrammingError, +) if TYPE_CHECKING: from firebolt.common.statement_formatter import StatementFormatter @@ -29,6 +33,69 @@ class ExecutionPlan: streaming: bool = False +class BulkInsertMixin: + """Mixin class for bulk insert functionality.""" + + def create_execution_plan( + self, + raw_query: str, + parameters: Sequence[Sequence[ParameterType]], + skip_parsing: bool = False, + async_execution: bool = False, + streaming: bool = False, + ) -> ExecutionPlan: + """Create execution plan for bulk insert operations.""" + return self._create_bulk_execution_plan( + raw_query, parameters, async_execution, streaming + ) + + def _validate_bulk_insert_query(self, query: str) -> None: + """Validate that query is an INSERT statement for bulk_insert.""" + query_normalized = query.lstrip().lower() + if not query_normalized.startswith("insert"): + raise ConfigurationError( + "bulk_insert is only supported for INSERT statements" + ) + if ";" in query.strip().rstrip(";"): + raise ProgrammingError( + "bulk_insert does not support multi-statement queries" + ) + + def _create_bulk_execution_plan( + self, + raw_query: str, + parameters: Sequence[Sequence[ParameterType]], + async_execution: bool, + streaming: bool, + ) -> ExecutionPlan: + """ + Create bulk execution plan by delegating to + parameter-style specific methods. + """ + # Validate bulk_insert requirements + self._validate_bulk_insert_query(raw_query) + if not parameters: + raise ProgrammingError("bulk_insert requires at least one parameter set") + + # Call the parameter-style specific bulk creation method + return self._create_bulk_plan_impl( + raw_query, parameters, async_execution, streaming + ) + + def _create_bulk_plan_impl( + self, + raw_query: str, + parameters: Sequence[Sequence[ParameterType]], + async_execution: bool, + streaming: bool, + ) -> ExecutionPlan: + """ + Override in subclasses to provide parameter-style + specific bulk implementation. + """ + raise NotImplementedError("Subclass must implement _create_bulk_plan_impl") + + class BaseStatementPlanner(ABC): """Base class for statement planning handlers.""" @@ -44,7 +111,6 @@ def create_execution_plan( skip_parsing: bool = False, async_execution: bool = False, streaming: bool = False, - bulk_insert: bool = False, ) -> ExecutionPlan: """Create an execution plan for the given statement and parameters.""" @@ -66,32 +132,14 @@ def create_execution_plan( skip_parsing: bool = False, async_execution: bool = False, streaming: bool = False, - bulk_insert: bool = False, ) -> ExecutionPlan: """Create execution plan for fb_numeric parameter style.""" - if bulk_insert: - # Validate bulk_insert requirements - query_normalized = raw_query.lstrip().lower() - if not query_normalized.startswith("insert"): - raise ConfigurationError("bulk_insert is only supported for INSERT statements") - if ";" in raw_query.strip().rstrip(";"): - raise ConfigurationError("bulk_insert does not support multi-statement queries") - if not parameters: - raise ConfigurationError("bulk_insert requires at least one parameter set") - - # Prepare bulk insert query and parameters - processed_query, processed_params = self._prepare_bulk_insert(raw_query, parameters) - query_params = self._build_fb_numeric_query_params( - processed_params, streaming, async_execution, {"merge_prepared_statement_batches": "true"} - ) - else: - processed_query = raw_query - query_params = self._build_fb_numeric_query_params( - parameters, streaming, async_execution - ) - + query_params = self._build_fb_numeric_query_params( + parameters, streaming, async_execution + ) + return ExecutionPlan( - queries=[processed_query], + queries=[raw_query], query_params=query_params, is_multi_statement=False, async_execution=async_execution, @@ -126,13 +174,40 @@ def _build_fb_numeric_query_params( query_params.update(extra_params) return query_params - def _prepare_bulk_insert( + +class FbNumericBulkStatementPlanner(BulkInsertMixin, FbNumericStatementPlanner): + """Statement planner for fb_numeric parameter style with bulk insert support.""" + + def _create_bulk_plan_impl( + self, + raw_query: str, + parameters: Sequence[Sequence[ParameterType]], + async_execution: bool, + streaming: bool, + ) -> ExecutionPlan: + """Create bulk insert execution plan for fb_numeric parameter style.""" + # Prepare bulk insert query and parameters for fb_numeric + processed_query, processed_params = self._prepare_fb_numeric_bulk_insert( + raw_query, parameters + ) + + # Build query parameters for bulk insert + query_params = self._build_fb_numeric_query_params( + processed_params, streaming, async_execution + ) + + return ExecutionPlan( + queries=[processed_query], + query_params=query_params, + is_multi_statement=False, + async_execution=async_execution, + streaming=streaming, + ) + + def _prepare_fb_numeric_bulk_insert( self, query: str, parameters_seq: Sequence[Sequence[ParameterType]] ) -> tuple[str, Sequence[Sequence[ParameterType]]]: - """Execute multiple INSERT queries as a single batch.""" - if not parameters_seq: - raise ProgrammingError("bulk_insert requires at least one parameter set") - + """Prepare multiple INSERT queries as a single batch for fb_numeric style.""" # For bulk insert, we need to create unique parameter names for each INSERT # Example: ($1, $2); ($3, $4); ($5, $6) instead of ($1, $2); ($1, $2); ($1, $2) queries = [] @@ -148,8 +223,10 @@ def _prepare_bulk_insert( param_offset += len(param_set) combined_query = "; ".join(queries) - parameters = [param for param_set in parameters_seq for param in param_set] - return combined_query, [parameters] + flattened_parameters = [ + param for param_set in parameters_seq for param in param_set + ] + return combined_query, [flattened_parameters] class QmarkStatementPlanner(BaseStatementPlanner): @@ -162,13 +239,8 @@ def create_execution_plan( skip_parsing: bool = False, async_execution: bool = False, streaming: bool = False, - bulk_insert: bool = False, ) -> ExecutionPlan: """Create execution plan for qmark parameter style.""" - # Validate bulk_insert is not used with qmark - if bulk_insert: - raise ConfigurationError("bulk_insert is only supported for fb_numeric") - queries: List[Union[SetParameter, str]] = ( [raw_query] if skip_parsing @@ -196,6 +268,48 @@ def create_execution_plan( ) +class QmarkBulkStatementPlanner(BulkInsertMixin, QmarkStatementPlanner): + """Statement planner for qmark parameter style with bulk insert support.""" + + def _create_bulk_plan_impl( + self, + raw_query: str, + parameters: Sequence[Sequence[ParameterType]], + async_execution: bool, + streaming: bool, + ) -> ExecutionPlan: + """Create bulk insert execution plan for qmark parameter style.""" + # Import needed modules + from sqlparse import parse as parse_sql # type: ignore + + # Prepare bulk insert query for qmark style + statements = parse_sql(raw_query) + if not statements: + raise ProgrammingError("Invalid SQL query for bulk insert") + + formatted_queries = [] + for param_set in parameters: + formatted_query = self.formatter.format_statement(statements[0], param_set) + formatted_queries.append(formatted_query) + + combined_query = "; ".join(formatted_queries) + + # Build query parameters for bulk insert + query_params: Dict[str, Any] = { + "output_format": self._get_output_format(streaming), + } + if async_execution: + query_params["async"] = True + + return ExecutionPlan( + queries=[combined_query], + query_params=query_params, + is_multi_statement=False, + async_execution=async_execution, + streaming=streaming, + ) + + class StatementPlannerFactory: """Factory for creating statement planner instances based on paramstyle.""" @@ -204,15 +318,21 @@ class StatementPlannerFactory: "qmark": QmarkStatementPlanner, } + _BULK_PLANNER_CLASSES = { + "fb_numeric": FbNumericBulkStatementPlanner, + "qmark": QmarkBulkStatementPlanner, + } + @classmethod def create_planner( - cls, paramstyle: str, formatter: StatementFormatter + cls, paramstyle: str, formatter: StatementFormatter, bulk_insert: bool = False ) -> BaseStatementPlanner: """Create a statement planner instance for the given paramstyle. Args: paramstyle: The parameter style ('fb_numeric' or 'qmark') formatter: StatementFormatter instance for statement processing + bulk_insert: Whether to create a bulk-capable planner Returns: Appropriate statement planner instance @@ -220,7 +340,11 @@ def create_planner( Raises: ProgrammingError: If paramstyle is not supported """ - planner_class = cls._PLANNER_CLASSES.get(paramstyle) + planner_classes = ( + cls._BULK_PLANNER_CLASSES if bulk_insert else cls._PLANNER_CLASSES + ) + planner_class = planner_classes.get(paramstyle) + if planner_class is None: raise ProgrammingError(f"Unsupported paramstyle: {paramstyle}") diff --git a/src/firebolt/db/cursor.py b/src/firebolt/db/cursor.py index df9fbd1308..d14a077051 100644 --- a/src/firebolt/db/cursor.py +++ b/src/firebolt/db/cursor.py @@ -55,7 +55,6 @@ from firebolt.common.statement_formatter import create_statement_formatter from firebolt.utils.cache import ConnectionInfo, DatabaseInfo, EngineInfo from firebolt.utils.exception import ( - ConfigurationError, EngineNotRunningError, FireboltDatabaseError, FireboltError, @@ -235,11 +234,11 @@ def _do_execute( try: statement_planner = StatementPlannerFactory.create_planner( - paramstyle, self._formatter + paramstyle, self._formatter, bulk_insert ) plan = statement_planner.create_execution_plan( - raw_query, parameters, skip_parsing, async_execution, streaming, bulk_insert + raw_query, parameters, skip_parsing, async_execution, streaming ) self._execute_plan(plan, timeout) self._state = CursorState.DONE @@ -409,8 +408,7 @@ def executemany( cursor object until it's closed. They can also be removed with `flush_parameters` method call. Bulk insert: When bulk_insert=True, multiple INSERT queries are - concatenated and sent as a single batch with - merge_prepared_statement_batches=true for improved performance. + concatenated and sent as a single batch for improved performance. Only supported for INSERT statements. Args: diff --git a/tests/unit/async_db/test_cursor.py b/tests/unit/async_db/test_cursor.py index d13841c514..f7bb296671 100644 --- a/tests/unit/async_db/test_cursor.py +++ b/tests/unit/async_db/test_cursor.py @@ -1508,19 +1508,47 @@ async def test_unsupported_paramstyle_raises(cursor: Cursor) -> None: db.paramstyle = original_paramstyle -async def test_executemany_bulk_insert_qmark_fails( +async def test_executemany_bulk_insert_qmark_works( + httpx_mock: HTTPXMock, cursor: Cursor, + query_url: str, ): - """executemany with bulk_insert=True fails with qmark paramstyle.""" - with raises( - ConfigurationError, match="bulk_insert is only supported for fb_numeric" - ): - await cursor.executemany( - "INSERT INTO test_table VALUES (?, ?)", - [(1, "a"), (2, "b"), (3, "c")], - bulk_insert=True, + """executemany with bulk_insert=True works with qmark paramstyle.""" + + def bulk_insert_callback(request): + query = request.content.decode() + # Should contain multiple INSERT statements + assert query.count("INSERT INTO") == 3 + assert "; " in query + + return Response( + status_code=200, + content=json.dumps( + { + "meta": [], + "data": [], + "rows": 0, + "statistics": { + "elapsed": 0.0, + "rows_read": 0, + "bytes_read": 0, + }, + } + ), + headers={}, ) + base_url = str(query_url).split("?")[0] + url_pattern = re.compile(re.escape(base_url)) + httpx_mock.add_callback(bulk_insert_callback, url=url_pattern) + + result = await cursor.executemany( + "INSERT INTO test_table VALUES (?, ?)", + [(1, "a"), (2, "b"), (3, "c")], + bulk_insert=True, + ) + assert result == 0 + async def test_executemany_bulk_insert_fb_numeric( httpx_mock: HTTPXMock, diff --git a/tests/unit/common/cursor/test_statement_planners.py b/tests/unit/common/cursor/test_statement_planners.py index 368fd51c6d..630ac58575 100644 --- a/tests/unit/common/cursor/test_statement_planners.py +++ b/tests/unit/common/cursor/test_statement_planners.py @@ -12,6 +12,7 @@ ) from firebolt.common.cursor.statement_planners import ( BaseStatementPlanner, + BulkInsertMixin, ExecutionPlan, FbNumericStatementPlanner, QmarkStatementPlanner, @@ -304,6 +305,41 @@ def test_statement_planner_factory_unsupported_paramstyle(formatter): StatementPlannerFactory.create_planner("unsupported", formatter) +@pytest.mark.parametrize( + "paramstyle,expected_class", + [ + ("fb_numeric", "FbNumericBulkStatementPlanner"), + ("qmark", "QmarkBulkStatementPlanner"), + ], +) +def test_statement_planner_factory_creates_correct_bulk_planners( + formatter, paramstyle, expected_class +): + """Test that factory creates correct bulk planner types.""" + planner = StatementPlannerFactory.create_planner( + paramstyle, formatter, bulk_insert=True + ) + + assert planner.__class__.__name__ == expected_class + assert planner.formatter == formatter + + +def test_statement_planner_factory_bulk_planner_inheritance(formatter): + """Test that bulk planners inherit from regular planners.""" + fb_bulk_planner = StatementPlannerFactory.create_planner( + "fb_numeric", formatter, bulk_insert=True + ) + qmark_bulk_planner = StatementPlannerFactory.create_planner( + "qmark", formatter, bulk_insert=True + ) + + # Check that bulk planners inherit from their regular counterparts and the mixin + assert isinstance(fb_bulk_planner, FbNumericStatementPlanner) + assert isinstance(fb_bulk_planner, BulkInsertMixin) + assert isinstance(qmark_bulk_planner, QmarkStatementPlanner) + assert isinstance(qmark_bulk_planner, BulkInsertMixin) + + # Edge cases and error conditions def test_fb_numeric_empty_parameters_list(formatter): """Test fb_numeric with empty parameters list.""" diff --git a/tests/unit/db/test_cursor.py b/tests/unit/db/test_cursor.py index c3bdbc74c9..ceba19a174 100644 --- a/tests/unit/db/test_cursor.py +++ b/tests/unit/db/test_cursor.py @@ -1393,19 +1393,47 @@ def test_unsupported_paramstyle_raises(cursor): db.paramstyle = original_paramstyle -def test_executemany_bulk_insert_qmark_fails( +def test_executemany_bulk_insert_qmark_works( + httpx_mock: HTTPXMock, cursor: Cursor, + query_url: str, ): - """executemany with bulk_insert=True fails with qmark paramstyle.""" - with raises( - ConfigurationError, match="bulk_insert is only supported for fb_numeric" - ): - cursor.executemany( - "INSERT INTO test_table VALUES (?, ?)", - [(1, "a"), (2, "b"), (3, "c")], - bulk_insert=True, + """executemany with bulk_insert=True works with qmark paramstyle.""" + + def bulk_insert_callback(request): + query = request.content.decode() + # Should contain multiple INSERT statements + assert query.count("INSERT INTO") == 3 + assert "; " in query + + return Response( + status_code=200, + content=json.dumps( + { + "meta": [], + "data": [], + "rows": 0, + "statistics": { + "elapsed": 0.0, + "rows_read": 0, + "bytes_read": 0, + }, + } + ), + headers={}, ) + base_url = str(query_url).split("?")[0] + url_pattern = re.compile(re.escape(base_url)) + httpx_mock.add_callback(bulk_insert_callback, url=url_pattern) + + result = cursor.executemany( + "INSERT INTO test_table VALUES (?, ?)", + [(1, "a"), (2, "b"), (3, "c")], + bulk_insert=True, + ) + assert result == 0 + def test_executemany_bulk_insert_fb_numeric( httpx_mock: HTTPXMock, From e561be24a30cce6ee642d3606c82563fd7822e33 Mon Sep 17 00:00:00 2001 From: ptiurin Date: Wed, 15 Oct 2025 11:53:37 +0100 Subject: [PATCH 18/21] add integration tests --- .../dbapi/async/V2/test_queries_async.py | 84 ++++++++++++++----- tests/integration/dbapi/conftest.py | 6 +- .../integration/dbapi/sync/V2/test_queries.py | 84 ++++++++++++++----- 3 files changed, 128 insertions(+), 46 deletions(-) diff --git a/tests/integration/dbapi/async/V2/test_queries_async.py b/tests/integration/dbapi/async/V2/test_queries_async.py index a3dd671377..27d4186250 100644 --- a/tests/integration/dbapi/async/V2/test_queries_async.py +++ b/tests/integration/dbapi/async/V2/test_queries_async.py @@ -4,6 +4,7 @@ from random import randint from typing import Callable, List, Tuple +import trio from pytest import mark, raises import firebolt.async_db @@ -283,34 +284,73 @@ async def test_parameterized_query_with_special_chars(connection: Connection) -> ], "Invalid data in table after parameterized insert" -async def test_executemany_bulk_insert( - connection: Connection, fb_numeric_paramstyle: None +@mark.parametrize( + "paramstyle,query,test_data", + [ + ( + "fb_numeric", + 'INSERT INTO "test_tbl" VALUES ($1, $2)', + [(1, "alice"), (2, "bob"), (3, "charlie")], + ), + ( + "qmark", + 'INSERT INTO "test_tbl" VALUES (?, ?)', + [(4, "david"), (5, "eve"), (6, "frank")], + ), + ], +) +async def test_executemany_bulk_insert_paramstyles( + connection: Connection, + paramstyle: str, + query: str, + test_data: List[Tuple], + create_drop_test_table_setup_teardown_async: Callable, ) -> None: - """executemany with bulk_insert=True inserts data correctly.""" + """executemany with bulk_insert=True works correctly for both paramstyles.""" + # Set the paramstyle for this test + original_paramstyle = firebolt.async_db.paramstyle + firebolt.async_db.paramstyle = paramstyle + # Generate a unique label for this test execution + unique_label = f"test_bulk_insert_async_{paramstyle}_{randint(100000, 999999)}" + table_name = "test_tbl" + try: - firebolt.async_db.paramstyle = "fb_numeric" + c = connection.cursor() - async with connection.cursor() as c: - await c.execute('DROP TABLE IF EXISTS "test_bulk_insert_async"') - await c.execute( - 'CREATE FACT TABLE "test_bulk_insert_async"(id int, name string) primary index id' - ) + # Can't do this for fb_numeric yet - FIR-49970 + if paramstyle != "fb_numeric": + await c.execute(f"SET query_label = '{unique_label}'") - await c.executemany( - 'INSERT INTO "test_bulk_insert_async" VALUES ($1, $2)', - [(1, "alice"), (2, "bob"), (3, "charlie")], - bulk_insert=True, - ) + # Execute bulk insert + await c.executemany( + query, + test_data, + bulk_insert=True, + ) - await c.execute('SELECT * FROM "test_bulk_insert_async" ORDER BY id') - data = await c.fetchall() - assert len(data) == 3 - assert data[0] == [1, "alice"] - assert data[1] == [2, "bob"] - assert data[2] == [3, "charlie"] + # Verify the data was inserted correctly + await c.execute(f'SELECT * FROM "{table_name}" ORDER BY id') + data = await c.fetchall() + assert len(data) == len(test_data) + for i, (expected_id, expected_name) in enumerate(test_data): + assert data[i] == [expected_id, expected_name] + + # Verify that only one INSERT query was executed with our unique label + # Can't do this for fb_numeric yet - FIR-49970 + if paramstyle != "fb_numeric": + # Wait a moment to ensure query history is updated + await trio.sleep(10) + await c.execute( + "SELECT COUNT(*) FROM information_schema.engine_query_history " + f"WHERE query_label = '{unique_label}' AND query_text LIKE 'INSERT INTO%'" + " AND status = 'ENDED_SUCCESSFULLY'" + ) + query_count = (await c.fetchone())[0] + assert ( + query_count == 1 + ), f"Expected 1 INSERT query with label '{unique_label}', but found {query_count}" finally: - async with connection.cursor() as c: - await c.execute('DROP TABLE IF EXISTS "test_bulk_insert_async"') + firebolt.async_db.paramstyle = original_paramstyle async def test_multi_statement_query(connection: Connection) -> None: diff --git a/tests/integration/dbapi/conftest.py b/tests/integration/dbapi/conftest.py index 6ec89708a8..7fc257d10c 100644 --- a/tests/integration/dbapi/conftest.py +++ b/tests/integration/dbapi/conftest.py @@ -12,9 +12,7 @@ LOGGER = getLogger(__name__) -CREATE_TEST_TABLE = ( - 'CREATE DIMENSION TABLE IF NOT EXISTS "test_tbl" (id int, name string)' -) +CREATE_TEST_TABLE = 'CREATE TABLE IF NOT EXISTS "test_tbl" (id int, name string)' DROP_TEST_TABLE = 'DROP TABLE IF EXISTS "test_tbl" CASCADE' LONG_SELECT_DEFAULT_V1 = 250000000000 @@ -59,7 +57,7 @@ def create_drop_test_table_setup_teardown(connection: Connection) -> None: @fixture async def create_drop_test_table_setup_teardown_async(connection: Connection) -> None: - with connection.cursor() as c: + async with connection.cursor() as c: await c.execute(CREATE_TEST_TABLE) yield c await c.execute(DROP_TEST_TABLE) diff --git a/tests/integration/dbapi/sync/V2/test_queries.py b/tests/integration/dbapi/sync/V2/test_queries.py index 577a62ff22..55b99d5340 100644 --- a/tests/integration/dbapi/sync/V2/test_queries.py +++ b/tests/integration/dbapi/sync/V2/test_queries.py @@ -1,4 +1,5 @@ import math +import time from datetime import date, datetime from decimal import Decimal from random import randint @@ -7,6 +8,7 @@ from pytest import mark, raises +import firebolt.db from firebolt.client.auth import Auth from firebolt.common._types import ColType from firebolt.common.row_set.types import Column @@ -283,31 +285,73 @@ def test_empty_query(c: Cursor, query: str, params: tuple) -> None: ) -def test_executemany_bulk_insert( - connection: Connection, fb_numeric_paramstyle: None +@mark.parametrize( + "paramstyle,query,test_data", + [ + ( + "fb_numeric", + 'INSERT INTO "test_tbl" VALUES ($1, $2)', + [(1, "alice"), (2, "bob"), (3, "charlie")], + ), + ( + "qmark", + 'INSERT INTO "test_tbl" VALUES (?, ?)', + [(4, "david"), (5, "eve"), (6, "frank")], + ), + ], +) +def test_executemany_bulk_insert_paramstyles( + connection: Connection, + paramstyle: str, + query: str, + test_data: List[Tuple], + create_drop_test_table_setup_teardown: Callable, ) -> None: - """executemany with bulk_insert=True inserts data correctly.""" + """executemany with bulk_insert=True works correctly for both paramstyles.""" + # Set the paramstyle for this test + original_paramstyle = firebolt.db.paramstyle + firebolt.db.paramstyle = paramstyle + # Generate a unique label for this test execution + unique_label = f"test_bulk_insert_{paramstyle}_{randint(100000, 999999)}" + table_name = "test_tbl" + try: - with connection.cursor() as c: - c.execute('DROP TABLE IF EXISTS "test_bulk_insert"') + c = connection.cursor() + + # Can't do this for fb_numeric yet - FIR-49970 + if paramstyle != "fb_numeric": + c.execute(f"SET query_label = '{unique_label}'") + + # Execute bulk insert + c.executemany( + query, + test_data, + bulk_insert=True, + ) + + # Verify the data was inserted correctly + c.execute(f'SELECT * FROM "{table_name}" ORDER BY id') + data = c.fetchall() + assert len(data) == len(test_data) + for i, (expected_id, expected_name) in enumerate(test_data): + assert data[i] == [expected_id, expected_name] + + # Verify that only one INSERT query was executed with our unique label + # Can't do this for fb_numeric yet - FIR-49970 + if paramstyle != "fb_numeric": + # Wait a moment to ensure query history is updated + time.sleep(10) c.execute( - 'CREATE FACT TABLE "test_bulk_insert"(id int, name string) primary index id' - ) - c.executemany( - 'INSERT INTO "test_bulk_insert" VALUES ($1, $2)', - [(1, "alice"), (2, "bob"), (3, "charlie")], - bulk_insert=True, + "SELECT COUNT(*) FROM information_schema.engine_query_history " + f"WHERE query_label = '{unique_label}' AND query_text LIKE 'INSERT INTO%'" + " AND status = 'ENDED_SUCCESSFULLY'" ) - - c.execute('SELECT * FROM "test_bulk_insert" ORDER BY id') - data = c.fetchall() - assert len(data) == 3 - assert data[0] == [1, "alice"] - assert data[1] == [2, "bob"] - assert data[2] == [3, "charlie"] + query_count = c.fetchone()[0] + assert ( + query_count == 1 + ), f"Expected 1 INSERT query with label '{unique_label}', but found {query_count}" finally: - with connection.cursor() as c: - c.execute('DROP TABLE IF EXISTS "test_bulk_insert"') + firebolt.db.paramstyle = original_paramstyle def test_multi_statement_query(connection: Connection) -> None: From 062def9685e9ab8fadf746d758249ca91ad647a4 Mon Sep 17 00:00:00 2001 From: ptiurin Date: Wed, 15 Oct 2025 13:00:30 +0100 Subject: [PATCH 19/21] add docs --- docsrc/Connecting_and_queries.rst | 47 +++++++++++++++++++++---------- 1 file changed, 32 insertions(+), 15 deletions(-) diff --git a/docsrc/Connecting_and_queries.rst b/docsrc/Connecting_and_queries.rst index d503183884..c5913e3deb 100644 --- a/docsrc/Connecting_and_queries.rst +++ b/docsrc/Connecting_and_queries.rst @@ -196,14 +196,14 @@ To get started, follow the steps below: ) as connection: # Create a cursor cursor = connection.cursor() - + # Execute a simple test query cursor.execute("SELECT 1") .. note:: - Firebolt Core is assumed to be running locally on the default port (3473). For instructions - on how to run Firebolt Core locally using Docker, refer to the + Firebolt Core is assumed to be running locally on the default port (3473). For instructions + on how to run Firebolt Core locally using Docker, refer to the `official docs `_. @@ -404,7 +404,7 @@ parameters equal in length to the number of placeholders in the statement. "INSERT INTO test_table2 VALUES ($1, $2, $3)", (2, "world", "2018-01-02"), ) - + # paramstyle only needs to be set once, it will be used for all subsequent queries cursor.execute( @@ -444,11 +444,30 @@ For inserting large amounts of data more efficiently, you can use the ``bulk_ins with ``executemany()``. This concatenates multiple INSERT statements into a single batch request, which can significantly improve performance when inserting many rows. -**Note:** The ``bulk_insert`` parameter only works with INSERT statements and requires the -``fb_numeric`` parameter style. Using it with other statement types or parameter styles will +**Note:** The ``bulk_insert`` parameter only works with INSERT statements and supports both +``fb_numeric`` and ``qmark`` parameter styles. Using it with other statement types will raise an error. -Example with FB_NUMERIC parameter style:: +**Example with QMARK parameter style (default):** + +:: + + # Using the default qmark parameter style + cursor.executemany( + "INSERT INTO test_table VALUES (?, ?, ?)", + ( + (1, "apple", "2019-01-01"), + (2, "banana", "2020-01-01"), + (3, "carrot", "2021-01-01"), + (4, "donut", "2022-01-01"), + (5, "eggplant", "2023-01-01") + ), + bulk_insert=True # Enable bulk insert for better performance + ) + +**Example with FB_NUMERIC parameter style:** + +:: import firebolt.db # Set paramstyle to "fb_numeric" for server-side parameter substitution @@ -463,11 +482,9 @@ Example with FB_NUMERIC parameter style:: (4, "donut", "2022-01-01"), (5, "eggplant", "2023-01-01") ), - bulk_insert=True # Enable bulk insert for better performance (important-comment) + bulk_insert=True # Enable bulk insert for better performance ) - cursor.close() - When ``bulk_insert=True``, the SDK concatenates all INSERT statements into a single batch and sends them to the server for optimized batch processing. @@ -766,7 +783,7 @@ of execute_async is -1, which is the rowcount for queries where it's not applica cursor.execute_async("INSERT INTO my_table VALUES (5, 'egg', '2022-01-01')") token = cursor.async_query_token -Trying to access `async_query_token` before calling `execute_async` will raise an exception. +Trying to access `async_query_token` before calling `execute_async` will raise an exception. .. note:: Multiple-statement queries are not supported for asynchronous queries. However, you can run each statement @@ -781,9 +798,9 @@ Monitoring the query status To check the async query status you need to retrieve the token of the query. The token is a unique identifier for the query and can be used to fetch the query status. You can store this token outside of the current process and use it later to check the query status. :ref:`Connection ` object -has two methods to check the query status: :py:meth:`firebolt.db.connection.Connection.is_async_query_running` and -:py:meth:`firebolt.db.connection.Connection.is_async_query_successful`.`is_async_query_running` will return True -if the query is still running, and False otherwise. `is_async_query_successful` will return True if the query +has two methods to check the query status: :py:meth:`firebolt.db.connection.Connection.is_async_query_running` and +:py:meth:`firebolt.db.connection.Connection.is_async_query_successful`.`is_async_query_running` will return True +if the query is still running, and False otherwise. `is_async_query_successful` will return True if the query has finished successfully, None if query is still running and False if the query has failed. :: @@ -814,7 +831,7 @@ will send a cancel request to the server and the query will be stopped. token = cursor.async_query_token connection.cancel_async_query(token) - + # Verify that the query was cancelled running = connection.is_async_query_running(token) print(running) # False From c393dd2999e1b4db8d205a2f74bd57785f81807c Mon Sep 17 00:00:00 2001 From: ptiurin Date: Wed, 15 Oct 2025 16:46:47 +0100 Subject: [PATCH 20/21] rework bulk --- src/firebolt/async_db/cursor.py | 9 +- .../common/cursor/statement_planners.py | 183 ++++++++---------- src/firebolt/common/statement_formatter.py | 24 +++ src/firebolt/db/cursor.py | 9 +- .../common/cursor/test_statement_planners.py | 55 +++--- 5 files changed, 151 insertions(+), 129 deletions(-) diff --git a/src/firebolt/async_db/cursor.py b/src/firebolt/async_db/cursor.py index f4cc2ebc94..cf572107a3 100644 --- a/src/firebolt/async_db/cursor.py +++ b/src/firebolt/async_db/cursor.py @@ -228,11 +228,16 @@ async def _do_execute( try: statement_planner = StatementPlannerFactory.create_planner( - paramstyle, self._formatter, bulk_insert + paramstyle, self._formatter ) plan = statement_planner.create_execution_plan( - raw_query, parameters, skip_parsing, async_execution, streaming + raw_query, + parameters, + skip_parsing, + async_execution, + streaming, + bulk_insert, ) await self._execute_plan(plan, timeout) self._state = CursorState.DONE diff --git a/src/firebolt/common/cursor/statement_planners.py b/src/firebolt/common/cursor/statement_planners.py index b06038295b..246ef5d187 100644 --- a/src/firebolt/common/cursor/statement_planners.py +++ b/src/firebolt/common/cursor/statement_planners.py @@ -33,8 +33,12 @@ class ExecutionPlan: streaming: bool = False -class BulkInsertMixin: - """Mixin class for bulk insert functionality.""" +class BaseStatementPlanner(ABC): + """Base class for statement planning handlers.""" + + def __init__(self, formatter: StatementFormatter) -> None: + """Initialize statement planner with required dependencies.""" + self.formatter = formatter def create_execution_plan( self, @@ -43,11 +47,40 @@ def create_execution_plan( skip_parsing: bool = False, async_execution: bool = False, streaming: bool = False, + bulk_insert: bool = False, ) -> ExecutionPlan: - """Create execution plan for bulk insert operations.""" - return self._create_bulk_execution_plan( - raw_query, parameters, async_execution, streaming - ) + """Create an execution plan for a given statement and parameters. + + This method serves as a factory for creating an execution plan, which + encapsulates the queries to be executed and the parameters for execution. + It supports standard execution, as well as bulk insert, which is handled + by a separate method. + + Args: + raw_query (str): The raw SQL query to be executed. + parameters (Sequence[Sequence[ParameterType]]): A sequence of parameter + sequences for the query. + skip_parsing (bool): If True, the query will not be parsed, and all + special features (e.g., multi-statement, parameterized queries) will + be disabled. Defaults to False. + async_execution (bool): If True, the query will be executed + asynchronously. Defaults to False. + streaming (bool): If True, the query results will be streamed. + Defaults to False. + bulk_insert (bool): If True, the query will be treated as a bulk insert + operation. Defaults to False. + + Returns: + ExecutionPlan: An object representing the execution plan. + """ + if bulk_insert: + return self._create_bulk_execution_plan( + raw_query, parameters, async_execution, streaming + ) + else: + return self._create_standard_execution_plan( + raw_query, parameters, skip_parsing, async_execution, streaming + ) def _validate_bulk_insert_query(self, query: str) -> None: """Validate that query is an INSERT statement for bulk_insert.""" @@ -68,51 +101,36 @@ def _create_bulk_execution_plan( async_execution: bool, streaming: bool, ) -> ExecutionPlan: - """ - Create bulk execution plan by delegating to - parameter-style specific methods. - """ + """Create bulk execution plan using formatter logic.""" # Validate bulk_insert requirements self._validate_bulk_insert_query(raw_query) if not parameters: raise ProgrammingError("bulk_insert requires at least one parameter set") - # Call the parameter-style specific bulk creation method return self._create_bulk_plan_impl( raw_query, parameters, async_execution, streaming ) - def _create_bulk_plan_impl( + @abstractmethod + def _create_standard_execution_plan( self, raw_query: str, parameters: Sequence[Sequence[ParameterType]], + skip_parsing: bool, async_execution: bool, streaming: bool, ) -> ExecutionPlan: - """ - Override in subclasses to provide parameter-style - specific bulk implementation. - """ - raise NotImplementedError("Subclass must implement _create_bulk_plan_impl") - - -class BaseStatementPlanner(ABC): - """Base class for statement planning handlers.""" - - def __init__(self, formatter: StatementFormatter) -> None: - """Initialize statement planner with required dependencies.""" - self.formatter = formatter + """Create standard (non-bulk) execution plan.""" @abstractmethod - def create_execution_plan( + def _create_bulk_plan_impl( self, raw_query: str, parameters: Sequence[Sequence[ParameterType]], - skip_parsing: bool = False, - async_execution: bool = False, - streaming: bool = False, + async_execution: bool, + streaming: bool, ) -> ExecutionPlan: - """Create an execution plan for the given statement and parameters.""" + """Create parameter-style specific bulk execution plan.""" @staticmethod def _get_output_format(streaming: bool) -> str: @@ -125,13 +143,13 @@ def _get_output_format(streaming: bool) -> str: class FbNumericStatementPlanner(BaseStatementPlanner): """Statement planner for fb_numeric parameter style.""" - def create_execution_plan( + def _create_standard_execution_plan( self, raw_query: str, parameters: Sequence[Sequence[ParameterType]], - skip_parsing: bool = False, - async_execution: bool = False, - streaming: bool = False, + skip_parsing: bool, + async_execution: bool, + streaming: bool, ) -> ExecutionPlan: """Create execution plan for fb_numeric parameter style.""" query_params = self._build_fb_numeric_query_params( @@ -146,6 +164,32 @@ def create_execution_plan( streaming=streaming, ) + def _create_bulk_plan_impl( + self, + raw_query: str, + parameters: Sequence[Sequence[ParameterType]], + async_execution: bool, + streaming: bool, + ) -> ExecutionPlan: + """Create bulk insert execution plan for fb_numeric parameter style.""" + # Prepare bulk insert query and parameters for fb_numeric + processed_query, processed_params = self._prepare_fb_numeric_bulk_insert( + raw_query, parameters + ) + + # Build query parameters for bulk insert + query_params = self._build_fb_numeric_query_params( + processed_params, streaming, async_execution + ) + + return ExecutionPlan( + queries=[processed_query], + query_params=query_params, + is_multi_statement=False, + async_execution=async_execution, + streaming=streaming, + ) + def _build_fb_numeric_query_params( self, parameters: Sequence[Sequence[ParameterType]], @@ -174,36 +218,6 @@ def _build_fb_numeric_query_params( query_params.update(extra_params) return query_params - -class FbNumericBulkStatementPlanner(BulkInsertMixin, FbNumericStatementPlanner): - """Statement planner for fb_numeric parameter style with bulk insert support.""" - - def _create_bulk_plan_impl( - self, - raw_query: str, - parameters: Sequence[Sequence[ParameterType]], - async_execution: bool, - streaming: bool, - ) -> ExecutionPlan: - """Create bulk insert execution plan for fb_numeric parameter style.""" - # Prepare bulk insert query and parameters for fb_numeric - processed_query, processed_params = self._prepare_fb_numeric_bulk_insert( - raw_query, parameters - ) - - # Build query parameters for bulk insert - query_params = self._build_fb_numeric_query_params( - processed_params, streaming, async_execution - ) - - return ExecutionPlan( - queries=[processed_query], - query_params=query_params, - is_multi_statement=False, - async_execution=async_execution, - streaming=streaming, - ) - def _prepare_fb_numeric_bulk_insert( self, query: str, parameters_seq: Sequence[Sequence[ParameterType]] ) -> tuple[str, Sequence[Sequence[ParameterType]]]: @@ -232,13 +246,13 @@ def _prepare_fb_numeric_bulk_insert( class QmarkStatementPlanner(BaseStatementPlanner): """Statement planner for qmark parameter style.""" - def create_execution_plan( + def _create_standard_execution_plan( self, raw_query: str, parameters: Sequence[Sequence[ParameterType]], - skip_parsing: bool = False, - async_execution: bool = False, - streaming: bool = False, + skip_parsing: bool, + async_execution: bool, + streaming: bool, ) -> ExecutionPlan: """Create execution plan for qmark parameter style.""" queries: List[Union[SetParameter, str]] = ( @@ -267,10 +281,6 @@ def create_execution_plan( streaming=streaming, ) - -class QmarkBulkStatementPlanner(BulkInsertMixin, QmarkStatementPlanner): - """Statement planner for qmark parameter style with bulk insert support.""" - def _create_bulk_plan_impl( self, raw_query: str, @@ -279,20 +289,8 @@ def _create_bulk_plan_impl( streaming: bool, ) -> ExecutionPlan: """Create bulk insert execution plan for qmark parameter style.""" - # Import needed modules - from sqlparse import parse as parse_sql # type: ignore - - # Prepare bulk insert query for qmark style - statements = parse_sql(raw_query) - if not statements: - raise ProgrammingError("Invalid SQL query for bulk insert") - - formatted_queries = [] - for param_set in parameters: - formatted_query = self.formatter.format_statement(statements[0], param_set) - formatted_queries.append(formatted_query) - - combined_query = "; ".join(formatted_queries) + # Use formatter's bulk insert method to create combined query + combined_query = self.formatter.format_bulk_insert(raw_query, parameters) # Build query parameters for bulk insert query_params: Dict[str, Any] = { @@ -318,21 +316,15 @@ class StatementPlannerFactory: "qmark": QmarkStatementPlanner, } - _BULK_PLANNER_CLASSES = { - "fb_numeric": FbNumericBulkStatementPlanner, - "qmark": QmarkBulkStatementPlanner, - } - @classmethod def create_planner( - cls, paramstyle: str, formatter: StatementFormatter, bulk_insert: bool = False + cls, paramstyle: str, formatter: StatementFormatter ) -> BaseStatementPlanner: """Create a statement planner instance for the given paramstyle. Args: paramstyle: The parameter style ('fb_numeric' or 'qmark') formatter: StatementFormatter instance for statement processing - bulk_insert: Whether to create a bulk-capable planner Returns: Appropriate statement planner instance @@ -340,10 +332,7 @@ def create_planner( Raises: ProgrammingError: If paramstyle is not supported """ - planner_classes = ( - cls._BULK_PLANNER_CLASSES if bulk_insert else cls._PLANNER_CLASSES - ) - planner_class = planner_classes.get(paramstyle) + planner_class = cls._PLANNER_CLASSES.get(paramstyle) if planner_class is None: raise ProgrammingError(f"Unsupported paramstyle: {paramstyle}") diff --git a/src/firebolt/common/statement_formatter.py b/src/firebolt/common/statement_formatter.py index 27a79a031c..b43d7e8d8d 100644 --- a/src/firebolt/common/statement_formatter.py +++ b/src/firebolt/common/statement_formatter.py @@ -218,6 +218,30 @@ def split_format_sql( self.statement_to_set(st) or self.statement_to_sql(st) for st in statements ] + def format_bulk_insert( + self, query: str, parameters_seq: Sequence[Sequence[ParameterType]] + ) -> str: + """ + Format bulk insert operations by creating multiple INSERT statements. + + Args: + query: The base INSERT query template + parameters_seq: Sequence of parameter sets for each INSERT + + Returns: + Combined SQL string with all INSERT statements + """ + statements = parse_sql(query) + if not statements: + raise DataError("Invalid SQL query for bulk insert") + + formatted_queries = [] + for param_set in parameters_seq: + formatted_query = self.format_statement(statements[0], param_set) + formatted_queries.append(formatted_query) + + return "; ".join(formatted_queries) + def create_statement_formatter(version: int) -> StatementFormatter: if version == 1: diff --git a/src/firebolt/db/cursor.py b/src/firebolt/db/cursor.py index d14a077051..b0c67dcedb 100644 --- a/src/firebolt/db/cursor.py +++ b/src/firebolt/db/cursor.py @@ -234,11 +234,16 @@ def _do_execute( try: statement_planner = StatementPlannerFactory.create_planner( - paramstyle, self._formatter, bulk_insert + paramstyle, self._formatter ) plan = statement_planner.create_execution_plan( - raw_query, parameters, skip_parsing, async_execution, streaming + raw_query, + parameters, + skip_parsing, + async_execution, + streaming, + bulk_insert, ) self._execute_plan(plan, timeout) self._state = CursorState.DONE diff --git a/tests/unit/common/cursor/test_statement_planners.py b/tests/unit/common/cursor/test_statement_planners.py index 630ac58575..5cf758becf 100644 --- a/tests/unit/common/cursor/test_statement_planners.py +++ b/tests/unit/common/cursor/test_statement_planners.py @@ -12,7 +12,6 @@ ) from firebolt.common.cursor.statement_planners import ( BaseStatementPlanner, - BulkInsertMixin, ExecutionPlan, FbNumericStatementPlanner, QmarkStatementPlanner, @@ -84,7 +83,9 @@ def test_fb_numeric_planner_initialization(formatter): def test_fb_numeric_basic_execution_plan(fb_numeric_planner): """Test basic execution plan creation.""" - plan = fb_numeric_planner.create_execution_plan("SELECT $1", [[42]]) + plan = fb_numeric_planner.create_execution_plan( + "SELECT $1", [[42]], bulk_insert=False + ) assert len(plan.queries) == 1 assert plan.queries[0] == "SELECT $1" @@ -97,7 +98,9 @@ def test_fb_numeric_basic_execution_plan(fb_numeric_planner): def test_fb_numeric_execution_plan_with_parameters(fb_numeric_planner): """Test execution plan with parameters.""" parameters = [[42, "test", True]] - plan = fb_numeric_planner.create_execution_plan("SELECT $1, $2, $3", parameters) + plan = fb_numeric_planner.create_execution_plan( + "SELECT $1, $2, $3", parameters, bulk_insert=False + ) assert plan.query_params is not None assert "query_parameters" in plan.query_params @@ -112,7 +115,7 @@ def test_fb_numeric_execution_plan_with_parameters(fb_numeric_planner): def test_fb_numeric_execution_plan_no_parameters(fb_numeric_planner): """Test execution plan without parameters.""" - plan = fb_numeric_planner.create_execution_plan("SELECT 1", []) + plan = fb_numeric_planner.create_execution_plan("SELECT 1", [], bulk_insert=False) assert plan.query_params is not None assert "query_parameters" not in plan.query_params @@ -186,7 +189,7 @@ def test_qmark_planner_initialization(formatter): def test_qmark_basic_execution_plan(qmark_planner): """Test basic execution plan creation.""" - plan = qmark_planner.create_execution_plan("SELECT ?", [[42]]) + plan = qmark_planner.create_execution_plan("SELECT ?", [[42]], bulk_insert=False) assert len(plan.queries) >= 1 # Could be split by formatter assert plan.query_params is not None @@ -258,7 +261,9 @@ def test_qmark_multi_statement_detection(qmark_planner, queries, expected_multi) # Mock the formatter to return queries qmark_planner.formatter.split_format_sql = Mock(return_value=queries) - plan = qmark_planner.create_execution_plan("SELECT 1; SELECT 2", [[]]) + plan = qmark_planner.create_execution_plan( + "SELECT 1; SELECT 2", [[]], bulk_insert=False + ) assert plan.is_multi_statement is expected_multi assert len(plan.queries) == len(queries) @@ -308,43 +313,35 @@ def test_statement_planner_factory_unsupported_paramstyle(formatter): @pytest.mark.parametrize( "paramstyle,expected_class", [ - ("fb_numeric", "FbNumericBulkStatementPlanner"), - ("qmark", "QmarkBulkStatementPlanner"), + ("fb_numeric", "FbNumericStatementPlanner"), + ("qmark", "QmarkStatementPlanner"), ], ) def test_statement_planner_factory_creates_correct_bulk_planners( formatter, paramstyle, expected_class ): - """Test that factory creates correct bulk planner types.""" - planner = StatementPlannerFactory.create_planner( - paramstyle, formatter, bulk_insert=True - ) + """Test that factory creates unified planner types that support both standard and bulk operations.""" + planner = StatementPlannerFactory.create_planner(paramstyle, formatter) assert planner.__class__.__name__ == expected_class assert planner.formatter == formatter -def test_statement_planner_factory_bulk_planner_inheritance(formatter): - """Test that bulk planners inherit from regular planners.""" - fb_bulk_planner = StatementPlannerFactory.create_planner( - "fb_numeric", formatter, bulk_insert=True - ) - qmark_bulk_planner = StatementPlannerFactory.create_planner( - "qmark", formatter, bulk_insert=True - ) +def test_statement_planner_factory_unified_planners(formatter): + """Test that planners now support both standard and bulk operations.""" + fb_planner = StatementPlannerFactory.create_planner("fb_numeric", formatter) + qmark_planner = StatementPlannerFactory.create_planner("qmark", formatter) - # Check that bulk planners inherit from their regular counterparts and the mixin - assert isinstance(fb_bulk_planner, FbNumericStatementPlanner) - assert isinstance(fb_bulk_planner, BulkInsertMixin) - assert isinstance(qmark_bulk_planner, QmarkStatementPlanner) - assert isinstance(qmark_bulk_planner, BulkInsertMixin) + # Check that planners are the same whether bulk_insert is True or False + assert isinstance(fb_planner, FbNumericStatementPlanner) + assert isinstance(qmark_planner, QmarkStatementPlanner) # Edge cases and error conditions def test_fb_numeric_empty_parameters_list(formatter): """Test fb_numeric with empty parameters list.""" planner = FbNumericStatementPlanner(formatter) - plan = planner.create_execution_plan("SELECT 1", []) + plan = planner.create_execution_plan("SELECT 1", [], bulk_insert=False) assert "query_parameters" not in plan.query_params assert plan.query_params["output_format"] == JSON_OUTPUT_FORMAT @@ -353,7 +350,9 @@ def test_fb_numeric_empty_parameters_list(formatter): def test_fb_numeric_none_parameters(formatter): """Test fb_numeric with None in parameters.""" planner = FbNumericStatementPlanner(formatter) - plan = planner.create_execution_plan("SELECT $1, $2", [[None, "test"]]) + plan = planner.create_execution_plan( + "SELECT $1, $2", [[None, "test"]], bulk_insert=False + ) query_params = json.loads(plan.query_params["query_parameters"]) assert query_params[0]["value"] is None @@ -363,7 +362,7 @@ def test_fb_numeric_none_parameters(formatter): def test_qmark_empty_query(formatter): """Test qmark with empty query.""" planner = QmarkStatementPlanner(formatter) - plan = planner.create_execution_plan("", [[]]) + plan = planner.create_execution_plan("", [[]], bulk_insert=False) assert plan.query_params is not None assert "output_format" in plan.query_params From 07340400f37b3054dfa66507cb0a1345b5baa928 Mon Sep 17 00:00:00 2001 From: ptiurin Date: Thu, 16 Oct 2025 16:12:01 +0100 Subject: [PATCH 21/21] remove streaming option for bulk --- .../common/cursor/statement_planners.py | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/src/firebolt/common/cursor/statement_planners.py b/src/firebolt/common/cursor/statement_planners.py index 246ef5d187..162dacf985 100644 --- a/src/firebolt/common/cursor/statement_planners.py +++ b/src/firebolt/common/cursor/statement_planners.py @@ -75,7 +75,7 @@ def create_execution_plan( """ if bulk_insert: return self._create_bulk_execution_plan( - raw_query, parameters, async_execution, streaming + raw_query, parameters, async_execution ) else: return self._create_standard_execution_plan( @@ -99,7 +99,6 @@ def _create_bulk_execution_plan( raw_query: str, parameters: Sequence[Sequence[ParameterType]], async_execution: bool, - streaming: bool, ) -> ExecutionPlan: """Create bulk execution plan using formatter logic.""" # Validate bulk_insert requirements @@ -107,9 +106,7 @@ def _create_bulk_execution_plan( if not parameters: raise ProgrammingError("bulk_insert requires at least one parameter set") - return self._create_bulk_plan_impl( - raw_query, parameters, async_execution, streaming - ) + return self._create_bulk_plan_impl(raw_query, parameters, async_execution) @abstractmethod def _create_standard_execution_plan( @@ -128,7 +125,6 @@ def _create_bulk_plan_impl( raw_query: str, parameters: Sequence[Sequence[ParameterType]], async_execution: bool, - streaming: bool, ) -> ExecutionPlan: """Create parameter-style specific bulk execution plan.""" @@ -169,7 +165,6 @@ def _create_bulk_plan_impl( raw_query: str, parameters: Sequence[Sequence[ParameterType]], async_execution: bool, - streaming: bool, ) -> ExecutionPlan: """Create bulk insert execution plan for fb_numeric parameter style.""" # Prepare bulk insert query and parameters for fb_numeric @@ -179,7 +174,7 @@ def _create_bulk_plan_impl( # Build query parameters for bulk insert query_params = self._build_fb_numeric_query_params( - processed_params, streaming, async_execution + processed_params, streaming=False, async_execution=async_execution ) return ExecutionPlan( @@ -187,7 +182,7 @@ def _create_bulk_plan_impl( query_params=query_params, is_multi_statement=False, async_execution=async_execution, - streaming=streaming, + streaming=False, ) def _build_fb_numeric_query_params( @@ -286,7 +281,6 @@ def _create_bulk_plan_impl( raw_query: str, parameters: Sequence[Sequence[ParameterType]], async_execution: bool, - streaming: bool, ) -> ExecutionPlan: """Create bulk insert execution plan for qmark parameter style.""" # Use formatter's bulk insert method to create combined query @@ -294,7 +288,7 @@ def _create_bulk_plan_impl( # Build query parameters for bulk insert query_params: Dict[str, Any] = { - "output_format": self._get_output_format(streaming), + "output_format": self._get_output_format(False), } if async_execution: query_params["async"] = True @@ -304,7 +298,7 @@ def _create_bulk_plan_impl( query_params=query_params, is_multi_statement=False, async_execution=async_execution, - streaming=streaming, + streaming=False, )