Skip to content

Commit 548e395

Browse files
introduce column normalisation for SEA metadat queries
Signed-off-by: varun-edachali-dbx <varun.edachali@databricks.com>
1 parent dfbbf79 commit 548e395

File tree

4 files changed

+376
-0
lines changed

4 files changed

+376
-0
lines changed
Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
"""
2+
Column name mappings between different backend protocols.
3+
4+
This module provides mappings between column names returned by different backends
5+
to ensure a consistent interface for metadata operations.
6+
"""
7+
8+
from enum import Enum
9+
10+
11+
class MetadataOp(Enum):
12+
"""Enum for metadata operations."""
13+
14+
CATALOGS = "catalogs"
15+
SCHEMAS = "schemas"
16+
TABLES = "tables"
17+
COLUMNS = "columns"
18+
19+
20+
# Mappings from column names to standard column names
21+
CATALOG_OP = {
22+
"catalog": "TABLE_CAT",
23+
}
24+
25+
SCHEMA_OP = {
26+
"databaseName": "TABLE_SCHEM",
27+
"catalogName": "TABLE_CATALOG",
28+
}
29+
30+
TABLE_OP = {
31+
"catalogName": "TABLE_CAT",
32+
"namespace": "TABLE_SCHEM",
33+
"tableName": "TABLE_NAME",
34+
"tableType": "TABLE_TYPE",
35+
"remarks": "REMARKS",
36+
"TYPE_CATALOG_COLUMN": "TYPE_CAT",
37+
"TYPE_SCHEMA_COLUMN": "TYPE_SCHEM",
38+
"TYPE_NAME": "TYPE_NAME",
39+
"SELF_REFERENCING_COLUMN_NAME": "SELF_REFERENCING_COL_NAME",
40+
"REF_GENERATION_COLUMN": "REF_GENERATION",
41+
}
42+
43+
COLUMN_OP = {
44+
"catalogName": "TABLE_CAT",
45+
"namespace": "TABLE_SCHEM",
46+
"tableName": "TABLE_NAME",
47+
"columnName": "COLUMN_NAME",
48+
"dataType": "DATA_TYPE",
49+
"columnType": "TYPE_NAME",
50+
"columnSize": "COLUMN_SIZE",
51+
"bufferLength": "BUFFER_LENGTH",
52+
"decimalDigits": "DECIMAL_DIGITS",
53+
"radix": "NUM_PREC_RADIX",
54+
"nullable": "NULLABLE",
55+
"remarks": "REMARKS",
56+
"columnDef": "COLUMN_DEF",
57+
"sqlDataType": "SQL_DATA_TYPE",
58+
"sqlDatetimeSub": "SQL_DATETIME_SUB",
59+
"charOctetLength": "CHAR_OCTET_LENGTH",
60+
"ordinalPosition": "ORDINAL_POSITION",
61+
"isNullable": "IS_NULLABLE",
62+
"scopeCatalog": "SCOPE_CATALOG",
63+
"scopeSchema": "SCOPE_SCHEMA",
64+
"scopeTable": "SCOPE_TABLE",
65+
"sourceDataType": "SOURCE_DATA_TYPE",
66+
"isAutoIncrement": "IS_AUTOINCREMENT",
67+
"isGenerated": "IS_GENERATEDCOLUMN",
68+
}
69+
70+
71+
def normalise_metadata_result(result_set, operation: MetadataOp):
72+
"""
73+
Normalise column names in a result set based on the operation type.
74+
This function modifies the result set in place.
75+
76+
Args:
77+
result_set: The result set object to normalise
78+
operation: The metadata operation (from MetadataOp enum)
79+
"""
80+
81+
# Select the appropriate mapping based on the operation
82+
mapping = None
83+
if operation == MetadataOp.CATALOGS:
84+
mapping = CATALOG_OP
85+
elif operation == MetadataOp.SCHEMAS:
86+
mapping = SCHEMA_OP
87+
elif operation == MetadataOp.TABLES:
88+
mapping = TABLE_OP
89+
elif operation == MetadataOp.COLUMNS:
90+
mapping = COLUMN_OP
91+
92+
if mapping is None:
93+
return
94+
95+
# Normalize column names in the description
96+
new_description = []
97+
for col_desc in result_set.description:
98+
col_name = col_desc[0]
99+
if col_name in mapping:
100+
# Create a new column description tuple with the normalized name
101+
new_col_desc = (mapping[col_name],) + col_desc[1:]
102+
new_description.append(new_col_desc)
103+
else:
104+
new_description.append(col_desc)
105+
result_set.description = new_description

src/databricks/sql/backend/sea/backend.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,9 @@
3131
from databricks.sql.backend.sea.utils.http_client import SeaHttpClient
3232
from databricks.sql.types import SSLOptions
3333

34+
# Import the column mapping module
35+
from databricks.sql.backend.column_mapping import normalise_metadata_result, MetadataOp
36+
3437
from databricks.sql.backend.sea.models import (
3538
ExecuteStatementRequest,
3639
GetStatementRequest,
@@ -681,6 +684,9 @@ def get_catalogs(
681684
enforce_embedded_schema_correctness=False,
682685
)
683686
assert result is not None, "execute_command returned None in synchronous mode"
687+
688+
normalise_metadata_result(result, MetadataOp.CATALOGS)
689+
684690
return result
685691

686692
def get_schemas(
@@ -714,6 +720,9 @@ def get_schemas(
714720
enforce_embedded_schema_correctness=False,
715721
)
716722
assert result is not None, "execute_command returned None in synchronous mode"
723+
724+
normalise_metadata_result(result, MetadataOp.SCHEMAS)
725+
717726
return result
718727

719728
def get_tables(
@@ -761,6 +770,8 @@ def get_tables(
761770

762771
result = ResultSetFilter.filter_tables_by_type(result, table_types)
763772

773+
normalise_metadata_result(result, MetadataOp.TABLES)
774+
764775
return result
765776

766777
def get_columns(
@@ -802,4 +813,7 @@ def get_columns(
802813
enforce_embedded_schema_correctness=False,
803814
)
804815
assert result is not None, "execute_command returned None in synchronous mode"
816+
817+
normalise_metadata_result(result, MetadataOp.COLUMNS)
818+
805819
return result
Lines changed: 200 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,200 @@
1+
"""
2+
Tests for the column mapping module.
3+
"""
4+
5+
import pytest
6+
from unittest.mock import MagicMock
7+
from enum import Enum
8+
9+
from databricks.sql.backend.column_mapping import (
10+
normalise_metadata_result,
11+
MetadataOp,
12+
CATALOG_OP,
13+
SCHEMA_OP,
14+
TABLE_OP,
15+
COLUMN_OP,
16+
)
17+
18+
19+
class TestColumnMapping:
20+
"""Tests for the column mapping module."""
21+
22+
def test_normalize_metadata_result_catalogs(self):
23+
"""Test normalizing catalog column names."""
24+
# Create a mock result set with a description
25+
mock_result = MagicMock()
26+
mock_result.description = [
27+
("catalog", "string", None, None, None, None, True),
28+
("other_column", "string", None, None, None, None, True),
29+
]
30+
31+
# Normalize the result set
32+
normalise_metadata_result(mock_result, MetadataOp.CATALOGS)
33+
34+
# Check that the column names were normalized
35+
assert mock_result.description[0][0] == "TABLE_CAT"
36+
assert mock_result.description[1][0] == "other_column"
37+
38+
def test_normalize_metadata_result_schemas(self):
39+
"""Test normalizing schema column names."""
40+
# Create a mock result set with a description
41+
mock_result = MagicMock()
42+
mock_result.description = [
43+
("databaseName", "string", None, None, None, None, True),
44+
("catalogName", "string", None, None, None, None, True),
45+
("other_column", "string", None, None, None, None, True),
46+
]
47+
48+
# Normalize the result set
49+
normalise_metadata_result(mock_result, MetadataOp.SCHEMAS)
50+
51+
# Check that the column names were normalized
52+
assert mock_result.description[0][0] == "TABLE_SCHEM"
53+
assert mock_result.description[1][0] == "TABLE_CATALOG"
54+
assert mock_result.description[2][0] == "other_column"
55+
56+
def test_normalize_metadata_result_tables(self):
57+
"""Test normalizing table column names."""
58+
# Create a mock result set with a description
59+
mock_result = MagicMock()
60+
mock_result.description = [
61+
("catalogName", "string", None, None, None, None, True),
62+
("namespace", "string", None, None, None, None, True),
63+
("tableName", "string", None, None, None, None, True),
64+
("tableType", "string", None, None, None, None, True),
65+
("remarks", "string", None, None, None, None, True),
66+
("TYPE_CATALOG_COLUMN", "string", None, None, None, None, True),
67+
("TYPE_SCHEMA_COLUMN", "string", None, None, None, None, True),
68+
("TYPE_NAME", "string", None, None, None, None, True),
69+
("SELF_REFERENCING_COLUMN_NAME", "string", None, None, None, None, True),
70+
("REF_GENERATION_COLUMN", "string", None, None, None, None, True),
71+
("other_column", "string", None, None, None, None, True),
72+
]
73+
74+
# Normalize the result set
75+
normalise_metadata_result(mock_result, MetadataOp.TABLES)
76+
77+
# Check that the column names were normalized
78+
assert mock_result.description[0][0] == "TABLE_CAT"
79+
assert mock_result.description[1][0] == "TABLE_SCHEM"
80+
assert mock_result.description[2][0] == "TABLE_NAME"
81+
assert mock_result.description[3][0] == "TABLE_TYPE"
82+
assert mock_result.description[4][0] == "REMARKS"
83+
assert mock_result.description[5][0] == "TYPE_CAT"
84+
assert mock_result.description[6][0] == "TYPE_SCHEM"
85+
assert mock_result.description[7][0] == "TYPE_NAME"
86+
assert mock_result.description[8][0] == "SELF_REFERENCING_COL_NAME"
87+
assert mock_result.description[9][0] == "REF_GENERATION"
88+
assert mock_result.description[10][0] == "other_column"
89+
90+
def test_normalize_metadata_result_columns(self):
91+
"""Test normalizing column column names."""
92+
# Create a mock result set with a description
93+
mock_result = MagicMock()
94+
mock_result.description = [
95+
("catalogName", "string", None, None, None, None, True),
96+
("namespace", "string", None, None, None, None, True),
97+
("tableName", "string", None, None, None, None, True),
98+
("columnName", "string", None, None, None, None, True),
99+
("dataType", "string", None, None, None, None, True),
100+
("columnType", "string", None, None, None, None, True),
101+
("columnSize", "string", None, None, None, None, True),
102+
("bufferLength", "string", None, None, None, None, True),
103+
("decimalDigits", "string", None, None, None, None, True),
104+
("radix", "string", None, None, None, None, True),
105+
("nullable", "string", None, None, None, None, True),
106+
("remarks", "string", None, None, None, None, True),
107+
("columnDef", "string", None, None, None, None, True),
108+
("sqlDataType", "string", None, None, None, None, True),
109+
("sqlDatetimeSub", "string", None, None, None, None, True),
110+
("charOctetLength", "string", None, None, None, None, True),
111+
("ordinalPosition", "string", None, None, None, None, True),
112+
("isNullable", "string", None, None, None, None, True),
113+
("scopeCatalog", "string", None, None, None, None, True),
114+
("scopeSchema", "string", None, None, None, None, True),
115+
("scopeTable", "string", None, None, None, None, True),
116+
("sourceDataType", "string", None, None, None, None, True),
117+
("isAutoIncrement", "string", None, None, None, None, True),
118+
("isGenerated", "string", None, None, None, None, True),
119+
("other_column", "string", None, None, None, None, True),
120+
]
121+
122+
# Normalize the result set
123+
normalise_metadata_result(mock_result, MetadataOp.COLUMNS)
124+
125+
# Check that the column names were normalized
126+
assert mock_result.description[0][0] == "TABLE_CAT"
127+
assert mock_result.description[1][0] == "TABLE_SCHEM"
128+
assert mock_result.description[2][0] == "TABLE_NAME"
129+
assert mock_result.description[3][0] == "COLUMN_NAME"
130+
assert mock_result.description[4][0] == "DATA_TYPE"
131+
assert mock_result.description[5][0] == "TYPE_NAME"
132+
assert mock_result.description[6][0] == "COLUMN_SIZE"
133+
assert mock_result.description[7][0] == "BUFFER_LENGTH"
134+
assert mock_result.description[8][0] == "DECIMAL_DIGITS"
135+
assert mock_result.description[9][0] == "NUM_PREC_RADIX"
136+
assert mock_result.description[10][0] == "NULLABLE"
137+
assert mock_result.description[11][0] == "REMARKS"
138+
assert mock_result.description[12][0] == "COLUMN_DEF"
139+
assert mock_result.description[13][0] == "SQL_DATA_TYPE"
140+
assert mock_result.description[14][0] == "SQL_DATETIME_SUB"
141+
assert mock_result.description[15][0] == "CHAR_OCTET_LENGTH"
142+
assert mock_result.description[16][0] == "ORDINAL_POSITION"
143+
assert mock_result.description[17][0] == "IS_NULLABLE"
144+
assert mock_result.description[18][0] == "SCOPE_CATALOG"
145+
assert mock_result.description[19][0] == "SCOPE_SCHEMA"
146+
assert mock_result.description[20][0] == "SCOPE_TABLE"
147+
assert mock_result.description[21][0] == "SOURCE_DATA_TYPE"
148+
assert mock_result.description[22][0] == "IS_AUTOINCREMENT"
149+
assert mock_result.description[23][0] == "IS_GENERATEDCOLUMN"
150+
assert mock_result.description[24][0] == "other_column"
151+
152+
def test_normalize_metadata_result_unknown_operation(self):
153+
"""Test normalizing with an unknown operation type."""
154+
# Create a mock result set with a description
155+
mock_result = MagicMock()
156+
mock_result.description = [
157+
("column1", "string", None, None, None, None, True),
158+
("column2", "string", None, None, None, None, True),
159+
]
160+
161+
# Save the original description
162+
original_description = mock_result.description.copy()
163+
164+
# Create a separate enum for testing
165+
class TestOp(Enum):
166+
UNKNOWN = "unknown"
167+
168+
# Normalize the result set with an unknown operation
169+
normalise_metadata_result(mock_result, TestOp.UNKNOWN)
170+
171+
# Check that the description was not modified
172+
assert mock_result.description == original_description
173+
174+
def test_normalize_metadata_result_preserves_other_fields(self):
175+
"""Test that normalization preserves other fields in the description."""
176+
# Create a mock result set with a description
177+
mock_result = MagicMock()
178+
mock_result.description = [
179+
(
180+
"catalog",
181+
"string",
182+
"display_size",
183+
"internal_size",
184+
"precision",
185+
"scale",
186+
True,
187+
),
188+
]
189+
190+
# Normalize the result set
191+
normalise_metadata_result(mock_result, MetadataOp.CATALOGS)
192+
193+
# Check that the column name was normalized but other fields preserved
194+
assert mock_result.description[0][0] == "TABLE_CAT"
195+
assert mock_result.description[0][1] == "string"
196+
assert mock_result.description[0][2] == "display_size"
197+
assert mock_result.description[0][3] == "internal_size"
198+
assert mock_result.description[0][4] == "precision"
199+
assert mock_result.description[0][5] == "scale"
200+
assert mock_result.description[0][6] == True

0 commit comments

Comments
 (0)