diff --git a/.circleci/continue_config.yml b/.circleci/continue_config.yml index 04135574a9..5fca61d7ed 100644 --- a/.circleci/continue_config.yml +++ b/.circleci/continue_config.yml @@ -304,6 +304,8 @@ workflows: - bigquery - clickhouse-cloud - athena + # todo: enable fabric when cicd catalog create/drop implemented in manage-test-db.sh + #- fabric - gcp-postgres filters: branches: diff --git a/.circleci/install-prerequisites.sh b/.circleci/install-prerequisites.sh index 1eebd92c71..acd25ae02c 100755 --- a/.circleci/install-prerequisites.sh +++ b/.circleci/install-prerequisites.sh @@ -12,7 +12,7 @@ fi ENGINE="$1" -COMMON_DEPENDENCIES="libpq-dev netcat-traditional" +COMMON_DEPENDENCIES="libpq-dev netcat-traditional unixodbc-dev" ENGINE_DEPENDENCIES="" if [ "$ENGINE" == "spark" ]; then diff --git a/Makefile b/Makefile index 855d866c84..3fea757169 100644 --- a/Makefile +++ b/Makefile @@ -173,6 +173,9 @@ clickhouse-cloud-test: guard-CLICKHOUSE_CLOUD_HOST guard-CLICKHOUSE_CLOUD_USERNA athena-test: guard-AWS_ACCESS_KEY_ID guard-AWS_SECRET_ACCESS_KEY guard-ATHENA_S3_WAREHOUSE_LOCATION engine-athena-install pytest -n auto -m "athena" --retries 3 --junitxml=test-results/junit-athena.xml +fabric-test: guard-FABRIC_HOST guard-FABRIC_CLIENT_ID guard-FABRIC_CLIENT_SECRET guard-FABRIC_DATABASE engine-fabric-install + pytest -n auto -m "fabric" --retries 3 --junitxml=test-results/junit-fabric.xml + gcp-postgres-test: guard-GCP_POSTGRES_INSTANCE_CONNECTION_STRING guard-GCP_POSTGRES_USER guard-GCP_POSTGRES_PASSWORD guard-GCP_POSTGRES_KEYFILE_JSON engine-gcppostgres-install pytest -n auto -m "gcp_postgres" --retries 3 --junitxml=test-results/junit-gcp-postgres.xml diff --git a/docs/guides/configuration.md b/docs/guides/configuration.md index b137546d84..d2e294a589 100644 --- a/docs/guides/configuration.md +++ b/docs/guides/configuration.md @@ -909,6 +909,7 @@ These pages describe the connection configuration options for each execution eng * [BigQuery](../integrations/engines/bigquery.md) * [Databricks](../integrations/engines/databricks.md) * [DuckDB](../integrations/engines/duckdb.md) +* [Fabric](../integrations/engines/fabric.md) * [MotherDuck](../integrations/engines/motherduck.md) * [MySQL](../integrations/engines/mysql.md) * [MSSQL](../integrations/engines/mssql.md) diff --git a/docs/integrations/engines/fabric.md b/docs/integrations/engines/fabric.md new file mode 100644 index 0000000000..eb00b5ac1d --- /dev/null +++ b/docs/integrations/engines/fabric.md @@ -0,0 +1,34 @@ +# Fabric + +## Local/Built-in Scheduler +**Engine Adapter Type**: `fabric` + +NOTE: Fabric Warehouse is not recommended to be used for the SQLMesh [state connection](../../reference/configuration.md#connections). + +### Installation +#### Microsoft Entra ID / Azure Active Directory Authentication: +``` +pip install "sqlmesh[fabric]" +``` + +### Connection options + +| Option | Description | Type | Required | +| ----------------- | ------------------------------------------------------------ | :----------: | :------: | +| `type` | Engine type name - must be `fabric` | string | Y | +| `host` | The hostname of the Fabric Warehouse server | string | Y | +| `user` | The client id to use for authentication with the Fabric Warehouse server | string | N | +| `password` | The client secret to use for authentication with the Fabric Warehouse server | string | N | +| `port` | The port number of the Fabric Warehouse server | int | N | +| `database` | The target database | string | N | +| `charset` | The character set used for the connection | string | N | +| `timeout` | The query timeout in seconds. Default: no timeout | int | N | +| `login_timeout` | The timeout for connection and login in seconds. Default: 60 | int | N | +| `appname` | The application name to use for the connection | string | N | +| `conn_properties` | The list of connection properties | list[string] | N | +| `autocommit` | Is autocommit mode enabled. Default: false | bool | N | +| `driver` | The driver to use for the connection. Default: pyodbc | string | N | +| `driver_name` | The driver name to use for the connection. E.g., *ODBC Driver 18 for SQL Server* | string | N | +| `tenant_id` | The Azure / Entra tenant UUID | string | Y | +| `workspace_id` | The Fabric workspace UUID. The preferred way to retrieve it is by running `notebookutils.runtime.context.get("currentWorkspaceId")` in a python notebook. | string | Y | +| `odbc_properties` | The dict of ODBC connection properties. E.g., authentication: ActiveDirectoryServicePrincipal. See more [here](https://learn.microsoft.com/en-us/sql/connect/odbc/dsn-connection-string-attribute?view=sql-server-ver16). | dict | N | diff --git a/docs/integrations/overview.md b/docs/integrations/overview.md index 5e850afbf6..94b9289d21 100644 --- a/docs/integrations/overview.md +++ b/docs/integrations/overview.md @@ -17,6 +17,7 @@ SQLMesh supports the following execution engines for running SQLMesh projects (e * [ClickHouse](./engines/clickhouse.md) (clickhouse) * [Databricks](./engines/databricks.md) (databricks) * [DuckDB](./engines/duckdb.md) (duckdb) +* [Fabric](./engines/fabric.md) (fabric) * [MotherDuck](./engines/motherduck.md) (motherduck) * [MSSQL](./engines/mssql.md) (mssql) * [MySQL](./engines/mysql.md) (mysql) diff --git a/mkdocs.yml b/mkdocs.yml index 34156b1b66..47ddca54e9 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -83,6 +83,7 @@ nav: - integrations/engines/clickhouse.md - integrations/engines/databricks.md - integrations/engines/duckdb.md + - integrations/engines/fabric.md - integrations/engines/motherduck.md - integrations/engines/mssql.md - integrations/engines/mysql.md diff --git a/pyproject.toml b/pyproject.toml index 12c855f6f1..4e201a734d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -103,6 +103,7 @@ dev = [ dbt = ["dbt-core<2"] dlt = ["dlt"] duckdb = [] +fabric = ["pyodbc>=5.0.0"] gcppostgres = ["cloud-sql-python-connector[pg8000]>=1.8.0"] github = ["PyGithub>=2.6.0"] llm = ["langchain", "openai"] @@ -253,6 +254,7 @@ markers = [ "clickhouse_cloud: test for Clickhouse (cloud mode)", "databricks: test for Databricks", "duckdb: test for DuckDB", + "fabric: test for Fabric", "motherduck: test for MotherDuck", "mssql: test for MSSQL", "mysql: test for MySQL", diff --git a/sqlmesh/core/config/__init__.py b/sqlmesh/core/config/__init__.py index d8c7607d51..0dc99c0fd1 100644 --- a/sqlmesh/core/config/__init__.py +++ b/sqlmesh/core/config/__init__.py @@ -13,6 +13,7 @@ ConnectionConfig as ConnectionConfig, DatabricksConnectionConfig as DatabricksConnectionConfig, DuckDBConnectionConfig as DuckDBConnectionConfig, + FabricConnectionConfig as FabricConnectionConfig, GCPPostgresConnectionConfig as GCPPostgresConnectionConfig, MotherDuckConnectionConfig as MotherDuckConnectionConfig, MSSQLConnectionConfig as MSSQLConnectionConfig, diff --git a/sqlmesh/core/config/connection.py b/sqlmesh/core/config/connection.py index 4f289262ea..b530af36da 100644 --- a/sqlmesh/core/config/connection.py +++ b/sqlmesh/core/config/connection.py @@ -43,7 +43,13 @@ logger = logging.getLogger(__name__) -RECOMMENDED_STATE_SYNC_ENGINES = {"postgres", "gcp_postgres", "mysql", "mssql", "azuresql"} +RECOMMENDED_STATE_SYNC_ENGINES = { + "postgres", + "gcp_postgres", + "mysql", + "mssql", + "azuresql", +} FORBIDDEN_STATE_SYNC_ENGINES = { # Do not support row-level operations "spark", @@ -1684,6 +1690,55 @@ def _extra_engine_config(self) -> t.Dict[str, t.Any]: return {"catalog_support": CatalogSupport.SINGLE_CATALOG_ONLY} +class FabricConnectionConfig(MSSQLConnectionConfig): + """ + Fabric Connection Configuration. + Inherits most settings from MSSQLConnectionConfig and sets the type to 'fabric'. + It is recommended to use the 'pyodbc' driver for Fabric. + """ + + type_: t.Literal["fabric"] = Field(alias="type", default="fabric") # type: ignore + DIALECT: t.ClassVar[t.Literal["fabric"]] = "fabric" # type: ignore + DISPLAY_NAME: t.ClassVar[t.Literal["Fabric"]] = "Fabric" # type: ignore + DISPLAY_ORDER: t.ClassVar[t.Literal[17]] = 17 # type: ignore + driver: t.Literal["pyodbc"] = "pyodbc" + workspace_id: str + tenant_id: str + autocommit: t.Optional[bool] = True + + @property + def _engine_adapter(self) -> t.Type[EngineAdapter]: + from sqlmesh.core.engine_adapter.fabric import FabricEngineAdapter + + return FabricEngineAdapter + + @property + def _connection_factory(self) -> t.Callable: + # Override to support catalog switching for Fabric + base_factory = super()._connection_factory + + def create_fabric_connection( + target_catalog: t.Optional[str] = None, *args: t.Any, **kwargs: t.Any + ) -> t.Callable: + kwargs["database"] = target_catalog or self.database + return base_factory(*args, **kwargs) + + return create_fabric_connection + + @property + def _extra_engine_config(self) -> t.Dict[str, t.Any]: + return { + "database": self.database, + # more operations than not require a specific catalog to be already active + # in particular, create/drop view, create/drop schema and querying information_schema + "catalog_support": CatalogSupport.REQUIRES_SET_CATALOG, + "workspace_id": self.workspace_id, + "tenant_id": self.tenant_id, + "user": self.user, + "password": self.password, + } + + class SparkConnectionConfig(ConnectionConfig): """ Vanilla Spark Connection Configuration. Use `DatabricksConnectionConfig` for Databricks. diff --git a/sqlmesh/core/engine_adapter/__init__.py b/sqlmesh/core/engine_adapter/__init__.py index 19332dc005..ab29885c7b 100644 --- a/sqlmesh/core/engine_adapter/__init__.py +++ b/sqlmesh/core/engine_adapter/__init__.py @@ -19,6 +19,7 @@ from sqlmesh.core.engine_adapter.trino import TrinoEngineAdapter from sqlmesh.core.engine_adapter.athena import AthenaEngineAdapter from sqlmesh.core.engine_adapter.risingwave import RisingwaveEngineAdapter +from sqlmesh.core.engine_adapter.fabric import FabricEngineAdapter DIALECT_TO_ENGINE_ADAPTER = { "hive": SparkEngineAdapter, @@ -35,6 +36,7 @@ "trino": TrinoEngineAdapter, "athena": AthenaEngineAdapter, "risingwave": RisingwaveEngineAdapter, + "fabric": FabricEngineAdapter, } DIALECT_ALIASES = { diff --git a/sqlmesh/core/engine_adapter/fabric.py b/sqlmesh/core/engine_adapter/fabric.py new file mode 100644 index 0000000000..6f0123d022 --- /dev/null +++ b/sqlmesh/core/engine_adapter/fabric.py @@ -0,0 +1,291 @@ +from __future__ import annotations + +import typing as t +import logging +import requests +from functools import cached_property +from sqlglot import exp +from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_result +from sqlmesh.core.engine_adapter.mssql import MSSQLEngineAdapter +from sqlmesh.core.engine_adapter.shared import ( + InsertOverwriteStrategy, + SourceQuery, +) +from sqlmesh.core.engine_adapter.base import EngineAdapter +from sqlmesh.utils.errors import SQLMeshError +from sqlmesh.utils.connection_pool import ConnectionPool + +if t.TYPE_CHECKING: + from sqlmesh.core._typing import TableName + + +from sqlmesh.core.engine_adapter.mixins import LogicalMergeMixin + +logger = logging.getLogger(__name__) + + +class FabricEngineAdapter(LogicalMergeMixin, MSSQLEngineAdapter): + """ + Adapter for Microsoft Fabric. + """ + + DIALECT = "fabric" + SUPPORTS_INDEXES = False + SUPPORTS_TRANSACTIONS = False + SUPPORTS_CREATE_DROP_CATALOG = True + INSERT_OVERWRITE_STRATEGY = InsertOverwriteStrategy.DELETE_INSERT + + def __init__( + self, connection_factory_or_pool: t.Union[t.Callable, t.Any], *args: t.Any, **kwargs: t.Any + ) -> None: + # Wrap connection factory to support changing the catalog dynamically at runtime + if not isinstance(connection_factory_or_pool, ConnectionPool): + original_connection_factory = connection_factory_or_pool + + connection_factory_or_pool = lambda *args, **kwargs: original_connection_factory( + target_catalog=self._target_catalog, *args, **kwargs + ) + + super().__init__(connection_factory_or_pool, *args, **kwargs) + + @property + def _target_catalog(self) -> t.Optional[str]: + return self._connection_pool.get_attribute("target_catalog") + + @_target_catalog.setter + def _target_catalog(self, value: t.Optional[str]) -> None: + self._connection_pool.set_attribute("target_catalog", value) + + def _insert_overwrite_by_condition( + self, + table_name: TableName, + source_queries: t.List[SourceQuery], + target_columns_to_types: t.Optional[t.Dict[str, exp.DataType]] = None, + where: t.Optional[exp.Condition] = None, + insert_overwrite_strategy_override: t.Optional[InsertOverwriteStrategy] = None, + **kwargs: t.Any, + ) -> None: + # Override to avoid MERGE statement which isn't fully supported in Fabric + return EngineAdapter._insert_overwrite_by_condition( + self, + table_name=table_name, + source_queries=source_queries, + target_columns_to_types=target_columns_to_types, + where=where, + insert_overwrite_strategy_override=InsertOverwriteStrategy.DELETE_INSERT, + **kwargs, + ) + + @property + def api_client(self) -> FabricHttpClient: + # the requests Session is not guaranteed to be threadsafe + # so we create a http client per thread on demand + if existing_client := self._connection_pool.get_attribute("api_client"): + return existing_client + + tenant_id: t.Optional[str] = self._extra_config.get("tenant_id") + workspace_id: t.Optional[str] = self._extra_config.get("workspace_id") + client_id: t.Optional[str] = self._extra_config.get("user") + client_secret: t.Optional[str] = self._extra_config.get("password") + + if not tenant_id or not client_id or not client_secret: + raise SQLMeshError( + "Service Principal authentication requires tenant_id, client_id, and client_secret " + "in the Fabric connection configuration" + ) + + if not workspace_id: + raise SQLMeshError( + "Fabric requires the workspace_id to be configured in the connection configuration to create / drop catalogs" + ) + + client = FabricHttpClient( + tenant_id=tenant_id, + workspace_id=workspace_id, + client_id=client_id, + client_secret=client_secret, + ) + + self._connection_pool.set_attribute("api_client", client) + return client + + def _create_catalog(self, catalog_name: exp.Identifier) -> None: + """Create a catalog (warehouse) in Microsoft Fabric via REST API.""" + warehouse_name = catalog_name.sql(dialect=self.dialect, identify=False) + logger.info(f"Creating Fabric warehouse: {warehouse_name}") + + self.api_client.create_warehouse(warehouse_name) + + def _drop_catalog(self, catalog_name: exp.Identifier) -> None: + """Drop a catalog (warehouse) in Microsoft Fabric via REST API.""" + warehouse_name = catalog_name.sql(dialect=self.dialect, identify=False) + + logger.info(f"Deleting Fabric warehouse: {warehouse_name}") + self.api_client.delete_warehouse(warehouse_name) + + def set_current_catalog(self, catalog_name: str) -> None: + """ + Set the current catalog for Microsoft Fabric connections. + + Override to handle Fabric's stateless session limitation where USE statements + don't persist across queries. Instead, we close existing connections and + recreate them with the new catalog in the connection configuration. + + Args: + catalog_name: The name of the catalog (warehouse) to switch to + + Note: + Fabric doesn't support catalog switching via USE statements because each + statement runs as an independent session. This method works around this + limitation by updating the connection pool with new catalog configuration. + + See: + https://learn.microsoft.com/en-us/fabric/data-warehouse/sql-query-editor#limitations + """ + current_catalog = self.get_current_catalog() + + # If already using the requested catalog, do nothing + if current_catalog and current_catalog == catalog_name: + logger.debug(f"Already using catalog '{catalog_name}', no action needed") + return + + logger.info(f"Switching from catalog '{current_catalog}' to '{catalog_name}'") + + # note: we call close() on the connection pool instead of self.close() because self.close() calls close_all() + # on the connection pool but we just want to close the connection for this thread + self._connection_pool.close() + self._target_catalog = catalog_name # new connections will use this catalog + + catalog_after_switch = self.get_current_catalog() + + if catalog_after_switch != catalog_name: + # We need to raise an error if the catalog switch failed to prevent the operation that needed the catalog switch from being run against the wrong catalog + raise SQLMeshError( + f"Unable to switch catalog to {catalog_name}, catalog ended up as {catalog_after_switch}" + ) + + +class FabricHttpClient: + def __init__(self, tenant_id: str, workspace_id: str, client_id: str, client_secret: str): + self.tenant_id = tenant_id + self.client_id = client_id + self.client_secret = client_secret + self.workspace_id = workspace_id + + def create_warehouse(self, warehouse_name: str) -> None: + """Create a catalog (warehouse) in Microsoft Fabric via REST API.""" + logger.info(f"Creating Fabric warehouse: {warehouse_name}") + + request_data = { + "displayName": warehouse_name, + "description": f"Warehouse created by SQLMesh: {warehouse_name}", + } + + response = self.session.post(self._endpoint_url("warehouses"), json=request_data) + response.raise_for_status() + + # Handle direct success (201) or async creation (202) + if response.status_code == 201: + logger.info(f"Successfully created Fabric warehouse: {warehouse_name}") + return + + if response.status_code == 202 and (location_header := response.headers.get("location")): + logger.info(f"Warehouse creation initiated for: {warehouse_name}") + self._wait_for_completion(location_header, warehouse_name) + logger.info(f"Successfully created Fabric warehouse: {warehouse_name}") + else: + logger.error(f"Unexpected response from Fabric API: {response}\n{response.text}") + raise SQLMeshError(f"Unable to create warehouse: {response}") + + def delete_warehouse(self, warehouse_name: str) -> None: + """Drop a catalog (warehouse) in Microsoft Fabric via REST API.""" + logger.info(f"Deleting Fabric warehouse: {warehouse_name}") + + # Get the warehouse ID by listing warehouses + response = self.session.get(self._endpoint_url("warehouses")) + response.raise_for_status() + + warehouse_name_to_id = { + warehouse.get("displayName"): warehouse.get("id") + for warehouse in response.json().get("value", []) + } + + warehouse_id = warehouse_name_to_id.get(warehouse_name, None) + + if not warehouse_id: + logger.error( + f"Fabric warehouse does not exist: {warehouse_name}\n(available warehouses: {', '.join(warehouse_name_to_id)})" + ) + raise SQLMeshError( + f"Unable to delete Fabric warehouse {warehouse_name} as it doesnt exist" + ) + + # Delete the warehouse by ID + response = self.session.delete(self._endpoint_url(f"warehouses/{warehouse_id}")) + response.raise_for_status() + + logger.info(f"Successfully deleted Fabric warehouse: {warehouse_name}") + + @cached_property + def session(self) -> requests.Session: + s = requests.Session() + + access_token = self._get_access_token() + s.headers.update({"Authorization": f"Bearer {access_token}"}) + + return s + + def _endpoint_url(self, endpoint: str) -> str: + if endpoint.startswith("/"): + endpoint = endpoint[1:] + + return f"https://api.fabric.microsoft.com/v1/workspaces/{self.workspace_id}/{endpoint}" + + def _get_access_token(self) -> str: + """Get access token using Service Principal authentication.""" + + # Use Azure AD OAuth2 token endpoint + token_url = f"https://login.microsoftonline.com/{self.tenant_id}/oauth2/v2.0/token" + + data = { + "grant_type": "client_credentials", + "client_id": self.client_id, + "client_secret": self.client_secret, + "scope": "https://api.fabric.microsoft.com/.default", + } + + response = requests.post(token_url, data=data) + response.raise_for_status() + token_data = response.json() + return token_data["access_token"] + + def _wait_for_completion(self, location_url: str, operation_name: str) -> None: + """Poll the operation status until completion.""" + + @retry( + wait=wait_exponential(multiplier=1, min=1, max=30), + stop=stop_after_attempt(20), + retry=retry_if_result(lambda result: result not in ["Succeeded", "Failed"]), + ) + def _poll() -> str: + response = self.session.get(location_url) + response.raise_for_status() + + result = response.json() + status = result.get("status", "Unknown") + + logger.debug(f"Operation {operation_name} status: {status}") + + if status == "Failed": + error_msg = result.get("error", {}).get("message", "Unknown error") + raise SQLMeshError(f"Operation {operation_name} failed: {error_msg}") + elif status in ["InProgress", "Running"]: + logger.debug(f"Operation {operation_name} still in progress...") + elif status not in ["Succeeded"]: + logger.warning(f"Unknown status '{status}' for operation {operation_name}") + + return status + + final_status = _poll() + if final_status != "Succeeded": + raise SQLMeshError(f"Operation {operation_name} completed with status: {final_status}") diff --git a/tests/conftest.py b/tests/conftest.py index ad09deff6f..01fef852f7 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -478,7 +478,7 @@ def _make_function( connection_mock.cursor.return_value = cursor_mock cursor_mock.connection.return_value = connection_mock adapter = klass( - lambda: connection_mock, + lambda *args, **kwargs: connection_mock, dialect=dialect or klass.DIALECT, register_comments=register_comments, default_catalog=default_catalog, diff --git a/tests/core/engine_adapter/integration/__init__.py b/tests/core/engine_adapter/integration/__init__.py index 50437338ae..8476d992eb 100644 --- a/tests/core/engine_adapter/integration/__init__.py +++ b/tests/core/engine_adapter/integration/__init__.py @@ -82,6 +82,7 @@ def pytest_marks(self) -> t.List[MarkDecorator]: IntegrationTestEngine("bigquery", native_dataframe_type="bigframe", cloud=True), IntegrationTestEngine("databricks", native_dataframe_type="pyspark", cloud=True), IntegrationTestEngine("snowflake", native_dataframe_type="snowpark", cloud=True), + IntegrationTestEngine("fabric", cloud=True), IntegrationTestEngine("gcp_postgres", cloud=True), ] @@ -680,6 +681,9 @@ def create_catalog(self, catalog_name: str): except Exception: pass self.engine_adapter.cursor.connection.autocommit(False) + elif self.dialect == "fabric": + # Use the engine adapter's built-in catalog creation functionality + self.engine_adapter.create_catalog(catalog_name) elif self.dialect == "snowflake": self.engine_adapter.execute(f'CREATE DATABASE IF NOT EXISTS "{catalog_name}"') elif self.dialect == "duckdb": @@ -696,6 +700,9 @@ def drop_catalog(self, catalog_name: str): return # bigquery cannot create/drop catalogs if self.dialect == "databricks": self.engine_adapter.execute(f"DROP CATALOG IF EXISTS {catalog_name} CASCADE") + elif self.dialect == "fabric": + # Use the engine adapter's built-in catalog dropping functionality + self.engine_adapter.drop_catalog(catalog_name) else: self.engine_adapter.execute(f'DROP DATABASE IF EXISTS "{catalog_name}"') diff --git a/tests/core/engine_adapter/integration/config.yaml b/tests/core/engine_adapter/integration/config.yaml index 4aee4640a3..51241889de 100644 --- a/tests/core/engine_adapter/integration/config.yaml +++ b/tests/core/engine_adapter/integration/config.yaml @@ -186,6 +186,21 @@ gateways: state_connection: type: duckdb + inttest_fabric: + connection: + type: fabric + driver: pyodbc + host: {{ env_var("FABRIC_HOST") }} + user: {{ env_var("FABRIC_CLIENT_ID") }} + password: {{ env_var("FABRIC_CLIENT_SECRET") }} + database: {{ env_var("FABRIC_DATABASE") }} + tenant_id: {{ env_var("FABRIC_TENANT_ID") }} + workspace_id: {{ env_var("FABRIC_WORKSPACE_ID") }} + odbc_properties: + Authentication: ActiveDirectoryServicePrincipal + state_connection: + type: duckdb + inttest_gcp_postgres: connection: type: gcp_postgres @@ -197,5 +212,6 @@ gateways: enable_iam_auth: true check_import: false + model_defaults: dialect: duckdb diff --git a/tests/core/engine_adapter/integration/test_integration.py b/tests/core/engine_adapter/integration/test_integration.py index 1b7d54a2d9..521b287d12 100644 --- a/tests/core/engine_adapter/integration/test_integration.py +++ b/tests/core/engine_adapter/integration/test_integration.py @@ -2499,6 +2499,7 @@ def test_dialects(ctx: TestContext): { "default": pd.Timestamp("2020-01-01 00:00:00+00:00"), "clickhouse": pd.Timestamp("2020-01-01 00:00:00"), + "fabric": pd.Timestamp("2020-01-01 00:00:00"), "mysql": pd.Timestamp("2020-01-01 00:00:00"), "spark": pd.Timestamp("2020-01-01 00:00:00"), "databricks": pd.Timestamp("2020-01-01 00:00:00"), @@ -3033,14 +3034,12 @@ def test_value_normalization( input_data: t.Tuple[t.Any, ...], expected_results: t.Tuple[str, ...], ) -> None: - if ( - ctx.dialect == "trino" - and ctx.engine_adapter.current_catalog_type == "hive" - and column_type == exp.DataType.Type.TIMESTAMPTZ - ): - pytest.skip( - "Trino on Hive doesnt support creating tables with TIMESTAMP WITH TIME ZONE fields" - ) + # Skip TIMESTAMPTZ tests for engines that don't support it + if column_type == exp.DataType.Type.TIMESTAMPTZ: + if ctx.dialect == "trino" and ctx.engine_adapter.current_catalog_type == "hive": + pytest.skip("Trino on Hive doesn't support TIMESTAMP WITH TIME ZONE fields") + if ctx.dialect == "fabric": + pytest.skip("Fabric doesn't support TIMESTAMP WITH TIME ZONE fields") if not isinstance(ctx.engine_adapter, RowDiffMixin): pytest.skip( @@ -3130,7 +3129,10 @@ def test_table_diff_grain_check_single_key(ctx: TestContext): src_table = ctx.table("source") target_table = ctx.table("target") - columns_to_types = {"key1": exp.DataType.build("int"), "value": exp.DataType.build("varchar")} + columns_to_types = { + "key1": exp.DataType.build("int"), + "value": exp.DataType.build("varchar"), + } ctx.engine_adapter.create_table(src_table, columns_to_types) ctx.engine_adapter.create_table(target_table, columns_to_types) diff --git a/tests/core/engine_adapter/test_fabric.py b/tests/core/engine_adapter/test_fabric.py new file mode 100644 index 0000000000..6b80ef7337 --- /dev/null +++ b/tests/core/engine_adapter/test_fabric.py @@ -0,0 +1,90 @@ +# type: ignore + +import typing as t + +import pytest +from pytest_mock import MockerFixture +from sqlglot import exp, parse_one + +from sqlmesh.core.engine_adapter import FabricEngineAdapter +from tests.core.engine_adapter import to_sql_calls +from sqlmesh.core.engine_adapter.shared import DataObject + +pytestmark = [pytest.mark.engine, pytest.mark.fabric] + + +@pytest.fixture +def adapter(make_mocked_engine_adapter: t.Callable) -> FabricEngineAdapter: + return make_mocked_engine_adapter(FabricEngineAdapter) + + +def test_columns(adapter: FabricEngineAdapter): + adapter.cursor.fetchall.return_value = [ + ("decimal_ps", "decimal", None, 5, 4), + ("decimal", "decimal", None, 18, 0), + ("float", "float", None, 53, None), + ("char_n", "char", 10, None, None), + ("varchar_n", "varchar", 10, None, None), + ("nvarchar_max", "nvarchar", -1, None, None), + ] + + assert adapter.columns("db.table") == { + "decimal_ps": exp.DataType.build("decimal(5, 4)", dialect=adapter.dialect), + "decimal": exp.DataType.build("decimal(18, 0)", dialect=adapter.dialect), + "float": exp.DataType.build("float(53)", dialect=adapter.dialect), + "char_n": exp.DataType.build("char(10)", dialect=adapter.dialect), + "varchar_n": exp.DataType.build("varchar(10)", dialect=adapter.dialect), + "nvarchar_max": exp.DataType.build("nvarchar(max)", dialect=adapter.dialect), + } + + # Verify that the adapter queries the uppercase INFORMATION_SCHEMA + adapter.cursor.execute.assert_called_once_with( + """SELECT [COLUMN_NAME], [DATA_TYPE], [CHARACTER_MAXIMUM_LENGTH], [NUMERIC_PRECISION], [NUMERIC_SCALE] FROM [INFORMATION_SCHEMA].[COLUMNS] WHERE [TABLE_NAME] = 'table' AND [TABLE_SCHEMA] = 'db';""" + ) + + +def test_table_exists(adapter: FabricEngineAdapter): + adapter.cursor.fetchone.return_value = (1,) + assert adapter.table_exists("db.table") + # Verify that the adapter queries the uppercase INFORMATION_SCHEMA + adapter.cursor.execute.assert_called_once_with( + """SELECT 1 FROM [INFORMATION_SCHEMA].[TABLES] WHERE [TABLE_NAME] = 'table' AND [TABLE_SCHEMA] = 'db';""" + ) + + adapter.cursor.fetchone.return_value = None + assert not adapter.table_exists("db.table") + + +def test_insert_overwrite_by_time_partition(adapter: FabricEngineAdapter): + adapter.insert_overwrite_by_time_partition( + "test_table", + parse_one("SELECT a, b FROM tbl"), + start="2022-01-01", + end="2022-01-02", + time_column="b", + time_formatter=lambda x, _: exp.Literal.string(x.strftime("%Y-%m-%d")), + target_columns_to_types={"a": exp.DataType.build("INT"), "b": exp.DataType.build("STRING")}, + ) + + # Fabric adapter should use DELETE/INSERT strategy, not MERGE. + assert to_sql_calls(adapter) == [ + """DELETE FROM [test_table] WHERE [b] BETWEEN '2022-01-01' AND '2022-01-02';""", + """INSERT INTO [test_table] ([a], [b]) SELECT [a], [b] FROM (SELECT [a] AS [a], [b] AS [b] FROM [tbl]) AS [_subquery] WHERE [b] BETWEEN '2022-01-01' AND '2022-01-02';""", + ] + + +def test_replace_query(adapter: FabricEngineAdapter, mocker: MockerFixture): + mocker.patch.object( + adapter, + "_get_data_objects", + return_value=[DataObject(schema="", name="test_table", type="table")], + ) + adapter.replace_query( + "test_table", parse_one("SELECT a FROM tbl"), {"a": exp.DataType.build("int")} + ) + + # This behavior is inherited from MSSQLEngineAdapter and should be TRUNCATE + INSERT + assert to_sql_calls(adapter) == [ + "TRUNCATE TABLE [test_table];", + "INSERT INTO [test_table] ([a]) SELECT [a] FROM [tbl];", + ] diff --git a/tests/core/test_connection_config.py b/tests/core/test_connection_config.py index 7fe2487891..22d21fcef7 100644 --- a/tests/core/test_connection_config.py +++ b/tests/core/test_connection_config.py @@ -12,6 +12,7 @@ ConnectionConfig, DatabricksConnectionConfig, DuckDBAttachOptions, + FabricConnectionConfig, DuckDBConnectionConfig, GCPPostgresConnectionConfig, MotherDuckConnectionConfig, @@ -1687,3 +1688,95 @@ def mock_add_output_converter(sql_type, converter_func): expected_dt = datetime(2023, 1, 1, 12, 0, 0, 0, timezone(timedelta(hours=-8, minutes=0))) assert result == expected_dt assert result.tzinfo == timezone(timedelta(hours=-8)) + + +def test_fabric_connection_config_defaults(make_config): + """Test Fabric connection config defaults to pyodbc and autocommit=True.""" + config = make_config( + type="fabric", + host="localhost", + workspace_id="test-workspace-id", + tenant_id="test-tenant-id", + check_import=False, + ) + assert isinstance(config, FabricConnectionConfig) + assert config.driver == "pyodbc" + assert config.autocommit is True + + # Ensure it creates the FabricEngineAdapter + from sqlmesh.core.engine_adapter.fabric import FabricEngineAdapter + + assert isinstance(config.create_engine_adapter(), FabricEngineAdapter) + + +def test_fabric_connection_config_parameter_validation(make_config): + """Test Fabric connection config parameter validation.""" + # Test that FabricConnectionConfig correctly handles pyodbc-specific parameters. + config = make_config( + type="fabric", + host="localhost", + driver_name="ODBC Driver 18 for SQL Server", + trust_server_certificate=True, + encrypt=False, + odbc_properties={"Authentication": "ActiveDirectoryServicePrincipal"}, + workspace_id="test-workspace-id", + tenant_id="test-tenant-id", + check_import=False, + ) + assert isinstance(config, FabricConnectionConfig) + assert config.driver == "pyodbc" # Driver is fixed to pyodbc + assert config.driver_name == "ODBC Driver 18 for SQL Server" + assert config.trust_server_certificate is True + assert config.encrypt is False + assert config.odbc_properties == {"Authentication": "ActiveDirectoryServicePrincipal"} + + # Test that specifying a different driver for Fabric raises an error + with pytest.raises(ConfigError, match=r"Input should be 'pyodbc'"): + make_config(type="fabric", host="localhost", driver="pymssql", check_import=False) + + +def test_fabric_pyodbc_connection_string_generation(): + """Test that the Fabric pyodbc connection gets invoked with the correct ODBC connection string.""" + with patch("pyodbc.connect") as mock_pyodbc_connect: + # Create a Fabric config + config = FabricConnectionConfig( + host="testserver.datawarehouse.fabric.microsoft.com", + port=1433, + database="testdb", + user="testuser", + password="testpass", + driver_name="ODBC Driver 18 for SQL Server", + trust_server_certificate=True, + encrypt=True, + login_timeout=30, + workspace_id="test-workspace-id", + tenant_id="test-tenant-id", + check_import=False, + ) + + # Get the connection factory with kwargs and call it + factory_with_kwargs = config._connection_factory_with_kwargs + connection = factory_with_kwargs() + + # Verify pyodbc.connect was called with the correct connection string + mock_pyodbc_connect.assert_called_once() + call_args = mock_pyodbc_connect.call_args + + # Check the connection string (first argument) + conn_str = call_args[0][0] + expected_parts = [ + "DRIVER={ODBC Driver 18 for SQL Server}", + "SERVER=testserver.datawarehouse.fabric.microsoft.com,1433", + "DATABASE=testdb", + "Encrypt=YES", + "TrustServerCertificate=YES", + "Connection Timeout=30", + "UID=testuser", + "PWD=testpass", + ] + + for part in expected_parts: + assert part in conn_str + + # Check autocommit parameter, should default to True for Fabric + assert call_args[1]["autocommit"] is True