From 7e2302b7c585d7baad045e98d1c9c172df23f01f Mon Sep 17 00:00:00 2001 From: Tom Samaras Date: Mon, 22 Sep 2025 17:15:29 -0400 Subject: [PATCH 1/8] hive - parse ALTER table CHANGE COLUMN syntax --- sqlglot/dialects/dialect.py | 7 +++++ sqlglot/dialects/hive.py | 59 +++++++++++++++++++++++++++++++++++++ sqlglot/expressions.py | 2 ++ sqlglot/generator.py | 3 +- sqlglot/parser.py | 7 ++++- tests/dialects/test_hive.py | 13 ++++++++ 6 files changed, 89 insertions(+), 2 deletions(-) diff --git a/sqlglot/dialects/dialect.py b/sqlglot/dialects/dialect.py index dedcc8067b..e478da8a08 100644 --- a/sqlglot/dialects/dialect.py +++ b/sqlglot/dialects/dialect.py @@ -525,6 +525,13 @@ class Dialect(metaclass=_Dialect): equivalent of CREATE SCHEMA is CREATE DATABASE. """ + ALTER_TABLE_SUPPORTS_CASCADE = False + """ + Hive by default does not update the schema of existing partitions when a column is changed. + the CASCADE clause is used to indicate that the change should be propagated to all existing partitions. + the Spark dialect, while derived from Hive, does not support the CASCADE clause. + """ + # Whether ADD is present for each column added by ALTER TABLE ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True diff --git a/sqlglot/dialects/hive.py b/sqlglot/dialects/hive.py index 2573223419..b6e55c7271 100644 --- a/sqlglot/dialects/hive.py +++ b/sqlglot/dialects/hive.py @@ -211,6 +211,7 @@ class Hive(Dialect): SAFE_DIVISION = True ARRAY_AGG_INCLUDES_NULLS = None REGEXP_EXTRACT_DEFAULT_GROUP = 1 + ALTER_TABLE_SUPPORTS_CASCADE = True # https://spark.apache.org/docs/latest/sql-ref-identifier.html#description NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE @@ -310,6 +311,7 @@ class Parser(parser.Parser): VALUES_FOLLOWED_BY_PAREN = False JOINS_HAVE_EQUAL_PRECEDENCE = True ADD_JOIN_ON_TRUE = True + ALTER_TABLE_PARTITIONS = True FUNCTIONS = { **parser.Parser.FUNCTIONS, @@ -378,6 +380,17 @@ class Parser(parser.Parser): ), } + ALTER_PARSERS = { + "ADD": lambda self: self._parse_alter_table_add(), + "AS": lambda self: self._parse_select(), + "CHANGE": lambda self: self._parse_alter_table_change(), + "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), + "DROP": lambda self: self._parse_alter_table_drop(), + "PARTITION": lambda self: self._parse_alter_table_partition(), + "RENAME": lambda self: self._parse_alter_table_rename(), + "SET": lambda self: self._parse_alter_table_set(), + } + def _parse_transform(self) -> t.Optional[exp.Transform | exp.QueryTransform]: if not self._match(TokenType.L_PAREN, advance=False): self._retreat(self._index - 1) @@ -451,6 +464,29 @@ def _parse_types( return this + def _parse_alter_table_change(self) -> t.List[exp.Expression]: + self._match(TokenType.COLUMN) + column_old = self._parse_column() + column_new = self._parse_column() + dtype = self._parse_types(schema=True) + + comment = None + if self._match(TokenType.COMMENT): + comment = self._parse_string() + + if not column_old or not column_new or not dtype: + return [] + + return [ + self.expression( + exp.AlterColumn, + this=column_old, + rename_to=column_new, + dtype=dtype, + comment=comment, + ) + ] + def _parse_partition_and_order( self, ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: @@ -500,6 +536,7 @@ class Generator(generator.Generator): PAD_FILL_PATTERN_IS_REQUIRED = True SUPPORTS_MEDIAN = False ARRAY_SIZE_NAME = "SIZE" + ALTER_SET_TYPE = "" EXPRESSIONS_WITHOUT_NESTED_CTES = { exp.Insert, @@ -757,6 +794,28 @@ def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: ), ) + def altercolumn_sql(self, expression: exp.AlterColumn) -> str: + this = self.sql(expression, "this") + + new_name = self.sql(expression, "rename_to") + + dtype = self.sql(expression, "dtype") + if dtype: + text = f"CHANGE COLUMN {this} {new_name} {dtype}" + + comment = self.sql(expression, "comment") + if comment: + text = text + f" COMMENT {comment}" + + default = self.sql(expression, "default") + visible = expression.args.get("visible") + allow_null = expression.args.get("allow_null") + drop = expression.args.get("drop") + + if any([default, drop, visible, allow_null, drop]): + self.unsupported("Unsupported CHANGE COLUMN syntax") + return text + def alterset_sql(self, expression: exp.AlterSet) -> str: exprs = self.expressions(expression, flat=True) exprs = f" {exprs}" if exprs else "" diff --git a/sqlglot/expressions.py b/sqlglot/expressions.py index 157857992c..9f3020781c 100644 --- a/sqlglot/expressions.py +++ b/sqlglot/expressions.py @@ -1838,6 +1838,7 @@ class AlterColumn(Expression): "comment": False, "allow_null": False, "visible": False, + "rename_to": False, } @@ -4956,6 +4957,7 @@ class Alter(Expression): "cluster": False, "not_valid": False, "check": False, + "cascade": False, } @property diff --git a/sqlglot/generator.py b/sqlglot/generator.py index aef7712c61..bbe67be98a 100644 --- a/sqlglot/generator.py +++ b/sqlglot/generator.py @@ -3621,10 +3621,11 @@ def alter_sql(self, expression: exp.Alter) -> str: kind = self.sql(expression, "kind") not_valid = " NOT VALID" if expression.args.get("not_valid") else "" check = " WITH CHECK" if expression.args.get("check") else "" + cascade = " CASCADE" if expression.args.get("cascade") and self.dialect.ALTER_TABLE_SUPPORTS_CASCADE else "" this = self.sql(expression, "this") this = f" {this}" if this else "" - return f"ALTER {kind}{exists}{only}{this}{on_cluster}{check}{self.sep()}{actions_sql}{not_valid}{options}" + return f"ALTER {kind}{exists}{only}{this}{on_cluster}{check}{self.sep()}{actions_sql}{not_valid}{options}{cascade}" def altersession_sql(self, expression: exp.AlterSession) -> str: items_sql = self.expressions(expression, flat=True) diff --git a/sqlglot/parser.py b/sqlglot/parser.py index 277cb76c72..9eac872c1d 100644 --- a/sqlglot/parser.py +++ b/sqlglot/parser.py @@ -1535,6 +1535,9 @@ def _parse_partitioned_by_bucket_or_truncate(self) -> t.Optional[exp.Expression] # Whether renaming a column with an ALTER statement requires the presence of the COLUMN keyword ALTER_RENAME_REQUIRES_COLUMN = True + # Whether Alter statements are allowed to contain Partition specifications + ALTER_TABLE_PARTITIONS = False + # Whether all join types have the same precedence, i.e., they "naturally" produce a left-deep tree. # In standard SQL, joins that use the JOIN keyword take higher precedence than comma-joins. That is # to say, JOIN operators happen before comma operators. This is not the case in some dialects, such @@ -7716,7 +7719,7 @@ def _parse_alter(self) -> exp.Alter | exp.Command: check = None cluster = None else: - this = self._parse_table(schema=True) + this = self._parse_table(schema=True, parse_partition=self.ALTER_TABLE_PARTITIONS) check = self._match_text_seq("WITH", "CHECK") cluster = self._parse_on_property() if self._match(TokenType.ON) else None @@ -7728,6 +7731,7 @@ def _parse_alter(self) -> exp.Alter | exp.Command: actions = ensure_list(parser(self)) not_valid = self._match_text_seq("NOT", "VALID") options = self._parse_csv(self._parse_property) + cascade = (self.dialect.ALTER_TABLE_SUPPORTS_CASCADE and self._match_text_seq("CASCADE")) if not self._curr and actions: return self.expression( @@ -7741,6 +7745,7 @@ def _parse_alter(self) -> exp.Alter | exp.Command: cluster=cluster, not_valid=not_valid, check=check, + cascade=cascade, ) return self._parse_as_command(start) diff --git a/tests/dialects/test_hive.py b/tests/dialects/test_hive.py index d0998a5583..0dba8a31ad 100644 --- a/tests/dialects/test_hive.py +++ b/tests/dialects/test_hive.py @@ -176,6 +176,19 @@ def test_ddl(self): }, ) + self.validate_identity("ALTER TABLE x PARTITION(y = z) ADD COLUMN a VARCHAR(10)") + self.validate_identity("ALTER TABLE x CHANGE COLUMN a a VARCHAR(10)") + self.validate_identity("ALTER TABLE x CHANGE COLUMN a a VARCHAR(10) COMMENT 'comment'") + self.validate_identity("ALTER TABLE x CHANGE COLUMN a b VARCHAR(10)") + + self.validate_identity( + "ALTER TABLE x CHANGE a a VARCHAR(10)", + write_sql="ALTER TABLE x CHANGE COLUMN a a VARCHAR(10)" + ) + + self.validate_identity("ALTER TABLE X ADD COLUMNS (y INT, z STRING)") + self.validate_identity("ALTER TABLE X ADD COLUMNS (y INT, z STRING) CASCADE") + self.validate_identity( """CREATE EXTERNAL TABLE x (y INT) ROW FORMAT SERDE 'serde' ROW FORMAT DELIMITED FIELDS TERMINATED BY '1' WITH SERDEPROPERTIES ('input.regex'='')""", ) From c535df32558155dfaaac02474b94df1412d144b3 Mon Sep 17 00:00:00 2001 From: Tom Samaras Date: Tue, 23 Sep 2025 14:16:46 -0400 Subject: [PATCH 2/8] spark - parse alter table change column syntax --- sqlglot/dialects/hive.py | 50 +++++++++++++++++++++++++----------- sqlglot/dialects/spark2.py | 8 ++++++ tests/dialects/test_hive.py | 26 ++++++++++++++++--- tests/dialects/test_spark.py | 22 ++++++++++++++++ 4 files changed, 87 insertions(+), 19 deletions(-) diff --git a/sqlglot/dialects/hive.py b/sqlglot/dialects/hive.py index b6e55c7271..d21e786aa4 100644 --- a/sqlglot/dialects/hive.py +++ b/sqlglot/dialects/hive.py @@ -212,6 +212,7 @@ class Hive(Dialect): ARRAY_AGG_INCLUDES_NULLS = None REGEXP_EXTRACT_DEFAULT_GROUP = 1 ALTER_TABLE_SUPPORTS_CASCADE = True + CHANGE_COLUMN_STYLE = "HIVE" # https://spark.apache.org/docs/latest/sql-ref-identifier.html#description NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE @@ -466,7 +467,15 @@ def _parse_types( def _parse_alter_table_change(self) -> t.List[exp.Expression]: self._match(TokenType.COLUMN) - column_old = self._parse_column() + this = self._parse_column() + + if self.dialect.CHANGE_COLUMN_STYLE == "SPARK" and self._match_text_seq("TYPE"): + return [self.expression( + exp.AlterColumn, + this=this, + dtype=self._parse_types(schema=True), + )] + column_new = self._parse_column() dtype = self._parse_types(schema=True) @@ -474,13 +483,13 @@ def _parse_alter_table_change(self) -> t.List[exp.Expression]: if self._match(TokenType.COMMENT): comment = self._parse_string() - if not column_old or not column_new or not dtype: - return [] + if not this or not column_new or not dtype: + self.raise_error("Expected 'CHANGE COLUMN' to be followed by 'column_name' 'column_name' 'data_type'") return [ self.expression( exp.AlterColumn, - this=column_old, + this=this, rename_to=column_new, dtype=dtype, comment=comment, @@ -795,18 +804,11 @@ def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: ) def altercolumn_sql(self, expression: exp.AlterColumn) -> str: - this = self.sql(expression, "this") - - new_name = self.sql(expression, "rename_to") + this = self.sql(expression, "this") + new_name = self.sql(expression, "rename_to") or this dtype = self.sql(expression, "dtype") - if dtype: - text = f"CHANGE COLUMN {this} {new_name} {dtype}" - - comment = self.sql(expression, "comment") - if comment: - text = text + f" COMMENT {comment}" - + comment = f" COMMENT {self.sql(expression, 'comment')}" if self.sql(expression, "comment") else "" default = self.sql(expression, "default") visible = expression.args.get("visible") allow_null = expression.args.get("allow_null") @@ -814,7 +816,17 @@ def altercolumn_sql(self, expression: exp.AlterColumn) -> str: if any([default, drop, visible, allow_null, drop]): self.unsupported("Unsupported CHANGE COLUMN syntax") - return text + if self.dialect.CHANGE_COLUMN_STYLE == "SPARK": + if new_name == this: + if comment: + return f"ALTER COLUMN {this}{comment}" + return super().altercolumn_sql(expression) + return f"RENAME COLUMN {this} TO {new_name}" + + if not dtype: + self.unsupported("CHANGE COLUMN without a type is not supported") + + return f"CHANGE COLUMN {this} {new_name} {dtype}{comment}" def alterset_sql(self, expression: exp.AlterSet) -> str: exprs = self.expressions(expression, flat=True) @@ -830,6 +842,14 @@ def alterset_sql(self, expression: exp.AlterSet) -> str: return f"SET{serde}{exprs}{location}{file_format}{tags}" + def alter_sql(self, expression: exp.Alter) -> str: + if self.dialect.CHANGE_COLUMN_STYLE == "HIVE": + actions = expression.args["actions"] + for action in actions: + if isinstance(action, exp.RenameColumn): + self.unsupported("Cannot rename columns without data type defined in Hive") + return super().alter_sql(expression) + def serdeproperties_sql(self, expression: exp.SerdeProperties) -> str: prefix = "WITH " if expression.args.get("with") else "" exprs = self.expressions(expression, flat=True) diff --git a/sqlglot/dialects/spark2.py b/sqlglot/dialects/spark2.py index 5b8625edc0..2cb8f3ff30 100644 --- a/sqlglot/dialects/spark2.py +++ b/sqlglot/dialects/spark2.py @@ -151,6 +151,8 @@ def _annotate_by_similar_args( class Spark2(Hive): + CHANGE_COLUMN_STYLE = "SPARK" + ANNOTATORS = { **Hive.ANNOTATORS, exp.Substring: lambda self, e: self._annotate_by_args(e, "this"), @@ -234,6 +236,11 @@ class Parser(Hive.Parser): "SHUFFLE_REPLICATE_NL": lambda self: self._parse_join_hint("SHUFFLE_REPLICATE_NL"), } + ALTER_PARSERS = { + **Hive.Parser.ALTER_PARSERS, + "ALTER": lambda self: self._parse_alter_table_alter(), + } + def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: return self._match_text_seq("DROP", "COLUMNS") and self.expression( exp.Drop, this=self._parse_schema(), kind="COLUMNS" @@ -248,6 +255,7 @@ class Generator(Hive.Generator): QUERY_HINTS = True NVL2_SUPPORTED = True CAN_IMPLEMENT_ARRAY_ANY = True + ALTER_SET_TYPE = "TYPE" PROPERTIES_LOCATION = { **Hive.Generator.PROPERTIES_LOCATION, diff --git a/tests/dialects/test_hive.py b/tests/dialects/test_hive.py index 0dba8a31ad..32a730308e 100644 --- a/tests/dialects/test_hive.py +++ b/tests/dialects/test_hive.py @@ -177,15 +177,33 @@ def test_ddl(self): ) self.validate_identity("ALTER TABLE x PARTITION(y = z) ADD COLUMN a VARCHAR(10)") - self.validate_identity("ALTER TABLE x CHANGE COLUMN a a VARCHAR(10)") - self.validate_identity("ALTER TABLE x CHANGE COLUMN a a VARCHAR(10) COMMENT 'comment'") - self.validate_identity("ALTER TABLE x CHANGE COLUMN a b VARCHAR(10)") - self.validate_identity( "ALTER TABLE x CHANGE a a VARCHAR(10)", write_sql="ALTER TABLE x CHANGE COLUMN a a VARCHAR(10)" ) + self.validate_all( + "ALTER TABLE x CHANGE COLUMN a a VARCHAR(10)", + write={ + "hive": "ALTER TABLE x CHANGE COLUMN a a VARCHAR(10)", + "spark": "ALTER TABLE x ALTER COLUMN a TYPE VARCHAR(10)", + }, + ) + self.validate_all( + "ALTER TABLE x CHANGE COLUMN a a VARCHAR(10) COMMENT 'comment'", + write={ + "hive": "ALTER TABLE x CHANGE COLUMN a a VARCHAR(10) COMMENT 'comment'", + "spark": "ALTER TABLE x ALTER COLUMN a COMMENT 'comment'", + }, + ) + self.validate_all( + "ALTER TABLE x CHANGE COLUMN a b VARCHAR(10)", + write={ + "hive": "ALTER TABLE x CHANGE COLUMN a b VARCHAR(10)", + "spark": "ALTER TABLE x RENAME COLUMN a TO b", + } + ) + self.validate_identity("ALTER TABLE X ADD COLUMNS (y INT, z STRING)") self.validate_identity("ALTER TABLE X ADD COLUMNS (y INT, z STRING) CASCADE") diff --git a/tests/dialects/test_spark.py b/tests/dialects/test_spark.py index 0d1ebd5bbc..9dbba66713 100644 --- a/tests/dialects/test_spark.py +++ b/tests/dialects/test_spark.py @@ -2,6 +2,7 @@ from sqlglot import exp, parse_one from sqlglot.dialects.dialect import Dialects +from sqlglot.errors import UnsupportedError from tests.dialects.test_dialect import Validator @@ -132,6 +133,27 @@ def test_ddl(self): "spark": "ALTER TABLE StudentInfo ADD COLUMNS (LastName STRING, DOB TIMESTAMP)", }, ) + self.validate_all( + "ALTER TABLE db.example ALTER COLUMN col_a TYPE BIGINT", + write={ + "spark": "ALTER TABLE db.example ALTER COLUMN col_a TYPE BIGINT", + "hive": "ALTER TABLE db.example CHANGE COLUMN col_a col_a BIGINT", + }, + ) + self.validate_all( + "ALTER TABLE db.example CHANGE COLUMN col_a col_a BIGINT", + write={ + "spark": "ALTER TABLE db.example ALTER COLUMN col_a TYPE BIGINT", + "hive": "ALTER TABLE db.example CHANGE COLUMN col_a col_a BIGINT", + }, + ) + self.validate_all( + "ALTER TABLE db.example RENAME COLUMN col_a TO col_b", + write={ + "spark": "ALTER TABLE db.example RENAME COLUMN col_a TO col_b", + "hive": UnsupportedError, + } + ) self.validate_all( "ALTER TABLE StudentInfo DROP COLUMNS (LastName, DOB)", write={ From fe81285b094cf874a80471ebb0b65d825843aa49 Mon Sep 17 00:00:00 2001 From: Tom Samaras Date: Tue, 23 Sep 2025 14:30:57 -0400 Subject: [PATCH 3/8] hive - support cascade clause in change column statement --- sqlglot/dialects/hive.py | 28 +++++++++++++++------------- sqlglot/expressions.py | 1 + tests/dialects/test_hive.py | 7 +++++++ 3 files changed, 23 insertions(+), 13 deletions(-) diff --git a/sqlglot/dialects/hive.py b/sqlglot/dialects/hive.py index d21e786aa4..fada95963a 100644 --- a/sqlglot/dialects/hive.py +++ b/sqlglot/dialects/hive.py @@ -465,16 +465,16 @@ def _parse_types( return this - def _parse_alter_table_change(self) -> t.List[exp.Expression]: + def _parse_alter_table_change(self) -> t.Optional[exp.Expression]: self._match(TokenType.COLUMN) this = self._parse_column() if self.dialect.CHANGE_COLUMN_STYLE == "SPARK" and self._match_text_seq("TYPE"): - return [self.expression( + return self.expression( exp.AlterColumn, this=this, dtype=self._parse_types(schema=True), - )] + ) column_new = self._parse_column() dtype = self._parse_types(schema=True) @@ -483,18 +483,19 @@ def _parse_alter_table_change(self) -> t.List[exp.Expression]: if self._match(TokenType.COMMENT): comment = self._parse_string() + cascade = self._match_text_seq("CASCADE") + if not this or not column_new or not dtype: self.raise_error("Expected 'CHANGE COLUMN' to be followed by 'column_name' 'column_name' 'data_type'") - return [ - self.expression( - exp.AlterColumn, - this=this, - rename_to=column_new, - dtype=dtype, - comment=comment, - ) - ] + return self.expression( + exp.AlterColumn, + this=this, + rename_to=column_new, + dtype=dtype, + comment=comment, + cascade=cascade + ) def _parse_partition_and_order( self, @@ -809,6 +810,7 @@ def altercolumn_sql(self, expression: exp.AlterColumn) -> str: new_name = self.sql(expression, "rename_to") or this dtype = self.sql(expression, "dtype") comment = f" COMMENT {self.sql(expression, 'comment')}" if self.sql(expression, "comment") else "" + cascade = " CASCADE" if expression.args.get("cascade") else "" default = self.sql(expression, "default") visible = expression.args.get("visible") allow_null = expression.args.get("allow_null") @@ -826,7 +828,7 @@ def altercolumn_sql(self, expression: exp.AlterColumn) -> str: if not dtype: self.unsupported("CHANGE COLUMN without a type is not supported") - return f"CHANGE COLUMN {this} {new_name} {dtype}{comment}" + return f"CHANGE COLUMN {this} {new_name} {dtype}{comment}{cascade}" def alterset_sql(self, expression: exp.AlterSet) -> str: exprs = self.expressions(expression, flat=True) diff --git a/sqlglot/expressions.py b/sqlglot/expressions.py index 9f3020781c..c815e4b1d2 100644 --- a/sqlglot/expressions.py +++ b/sqlglot/expressions.py @@ -1839,6 +1839,7 @@ class AlterColumn(Expression): "allow_null": False, "visible": False, "rename_to": False, + "cascade": False, } diff --git a/tests/dialects/test_hive.py b/tests/dialects/test_hive.py index 32a730308e..9f6df0fd62 100644 --- a/tests/dialects/test_hive.py +++ b/tests/dialects/test_hive.py @@ -203,6 +203,13 @@ def test_ddl(self): "spark": "ALTER TABLE x RENAME COLUMN a TO b", } ) + self.validate_all( + "ALTER TABLE x CHANGE COLUMN a a VARCHAR(10) CASCADE", + write={ + "hive": "ALTER TABLE x CHANGE COLUMN a a VARCHAR(10) CASCADE", + "spark": "ALTER TABLE x ALTER COLUMN a TYPE VARCHAR(10)", + }, + ) self.validate_identity("ALTER TABLE X ADD COLUMNS (y INT, z STRING)") self.validate_identity("ALTER TABLE X ADD COLUMNS (y INT, z STRING) CASCADE") From 50cf2a971bc292c39318c87c428d6936a94ac91c Mon Sep 17 00:00:00 2001 From: Tom Samaras Date: Tue, 23 Sep 2025 15:05:59 -0400 Subject: [PATCH 4/8] cleanup cascade logic --- sqlglot/dialects/hive.py | 6 +----- sqlglot/dialects/spark2.py | 1 + sqlglot/expressions.py | 1 - 3 files changed, 2 insertions(+), 6 deletions(-) diff --git a/sqlglot/dialects/hive.py b/sqlglot/dialects/hive.py index fada95963a..dca1ca1599 100644 --- a/sqlglot/dialects/hive.py +++ b/sqlglot/dialects/hive.py @@ -483,8 +483,6 @@ def _parse_alter_table_change(self) -> t.Optional[exp.Expression]: if self._match(TokenType.COMMENT): comment = self._parse_string() - cascade = self._match_text_seq("CASCADE") - if not this or not column_new or not dtype: self.raise_error("Expected 'CHANGE COLUMN' to be followed by 'column_name' 'column_name' 'data_type'") @@ -494,7 +492,6 @@ def _parse_alter_table_change(self) -> t.Optional[exp.Expression]: rename_to=column_new, dtype=dtype, comment=comment, - cascade=cascade ) def _parse_partition_and_order( @@ -810,7 +807,6 @@ def altercolumn_sql(self, expression: exp.AlterColumn) -> str: new_name = self.sql(expression, "rename_to") or this dtype = self.sql(expression, "dtype") comment = f" COMMENT {self.sql(expression, 'comment')}" if self.sql(expression, "comment") else "" - cascade = " CASCADE" if expression.args.get("cascade") else "" default = self.sql(expression, "default") visible = expression.args.get("visible") allow_null = expression.args.get("allow_null") @@ -828,7 +824,7 @@ def altercolumn_sql(self, expression: exp.AlterColumn) -> str: if not dtype: self.unsupported("CHANGE COLUMN without a type is not supported") - return f"CHANGE COLUMN {this} {new_name} {dtype}{comment}{cascade}" + return f"CHANGE COLUMN {this} {new_name} {dtype}{comment}" def alterset_sql(self, expression: exp.AlterSet) -> str: exprs = self.expressions(expression, flat=True) diff --git a/sqlglot/dialects/spark2.py b/sqlglot/dialects/spark2.py index 2cb8f3ff30..f2ffcf7968 100644 --- a/sqlglot/dialects/spark2.py +++ b/sqlglot/dialects/spark2.py @@ -151,6 +151,7 @@ def _annotate_by_similar_args( class Spark2(Hive): + ALTER_TABLE_SUPPORTS_CASCADE = False CHANGE_COLUMN_STYLE = "SPARK" ANNOTATORS = { diff --git a/sqlglot/expressions.py b/sqlglot/expressions.py index c815e4b1d2..9f3020781c 100644 --- a/sqlglot/expressions.py +++ b/sqlglot/expressions.py @@ -1839,7 +1839,6 @@ class AlterColumn(Expression): "allow_null": False, "visible": False, "rename_to": False, - "cascade": False, } From a25412ff7072c1a8fe85fe0a2bdb7ee19a66721c Mon Sep 17 00:00:00 2001 From: Tom Samaras Date: Tue, 23 Sep 2025 15:16:38 -0400 Subject: [PATCH 5/8] document constant --- sqlglot/dialects/hive.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sqlglot/dialects/hive.py b/sqlglot/dialects/hive.py index dca1ca1599..8a40ab7657 100644 --- a/sqlglot/dialects/hive.py +++ b/sqlglot/dialects/hive.py @@ -213,6 +213,12 @@ class Hive(Dialect): REGEXP_EXTRACT_DEFAULT_GROUP = 1 ALTER_TABLE_SUPPORTS_CASCADE = True CHANGE_COLUMN_STYLE = "HIVE" + """ + Spark and its derivatives support both the Hive style CHANGE COLUMN syntax and more traditional ALTER/RENAME COLUMN syntax. Spark also + accepts ALTER COLUMN syntax when using the CHANGE COLUMN keyword but not vice versa. The Parser needs to be able to handle both styles + for Spark dialects and the Generator will use ALTER COLUMN syntax when possible. This also means that there are circumstances where it's + not possible to transpile commands from Spark to Hive due to missing information (e.g. missing data types). + """ # https://spark.apache.org/docs/latest/sql-ref-identifier.html#description NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE From 9e88a64ef29bf19e454f17467c4f8248d0655c25 Mon Sep 17 00:00:00 2001 From: Tom Samaras Date: Fri, 3 Oct 2025 13:56:00 -0400 Subject: [PATCH 6/8] fix formatting --- sqlglot/dialects/hive.py | 11 ++++++++--- sqlglot/generator.py | 6 +++++- sqlglot/parser.py | 2 +- tests/dialects/test_hive.py | 4 ++-- tests/dialects/test_spark.py | 2 +- 5 files changed, 17 insertions(+), 8 deletions(-) diff --git a/sqlglot/dialects/hive.py b/sqlglot/dialects/hive.py index 8a40ab7657..d89cb6d121 100644 --- a/sqlglot/dialects/hive.py +++ b/sqlglot/dialects/hive.py @@ -490,7 +490,9 @@ def _parse_alter_table_change(self) -> t.Optional[exp.Expression]: comment = self._parse_string() if not this or not column_new or not dtype: - self.raise_error("Expected 'CHANGE COLUMN' to be followed by 'column_name' 'column_name' 'data_type'") + self.raise_error( + "Expected 'CHANGE COLUMN' to be followed by 'column_name' 'column_name' 'data_type'" + ) return self.expression( exp.AlterColumn, @@ -808,11 +810,14 @@ def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: ) def altercolumn_sql(self, expression: exp.AlterColumn) -> str: - this = self.sql(expression, "this") new_name = self.sql(expression, "rename_to") or this dtype = self.sql(expression, "dtype") - comment = f" COMMENT {self.sql(expression, 'comment')}" if self.sql(expression, "comment") else "" + comment = ( + f" COMMENT {self.sql(expression, 'comment')}" + if self.sql(expression, "comment") + else "" + ) default = self.sql(expression, "default") visible = expression.args.get("visible") allow_null = expression.args.get("allow_null") diff --git a/sqlglot/generator.py b/sqlglot/generator.py index bbe67be98a..7f32255e55 100644 --- a/sqlglot/generator.py +++ b/sqlglot/generator.py @@ -3621,7 +3621,11 @@ def alter_sql(self, expression: exp.Alter) -> str: kind = self.sql(expression, "kind") not_valid = " NOT VALID" if expression.args.get("not_valid") else "" check = " WITH CHECK" if expression.args.get("check") else "" - cascade = " CASCADE" if expression.args.get("cascade") and self.dialect.ALTER_TABLE_SUPPORTS_CASCADE else "" + cascade = ( + " CASCADE" + if expression.args.get("cascade") and self.dialect.ALTER_TABLE_SUPPORTS_CASCADE + else "" + ) this = self.sql(expression, "this") this = f" {this}" if this else "" diff --git a/sqlglot/parser.py b/sqlglot/parser.py index 9eac872c1d..e5301e599e 100644 --- a/sqlglot/parser.py +++ b/sqlglot/parser.py @@ -7731,7 +7731,7 @@ def _parse_alter(self) -> exp.Alter | exp.Command: actions = ensure_list(parser(self)) not_valid = self._match_text_seq("NOT", "VALID") options = self._parse_csv(self._parse_property) - cascade = (self.dialect.ALTER_TABLE_SUPPORTS_CASCADE and self._match_text_seq("CASCADE")) + cascade = self.dialect.ALTER_TABLE_SUPPORTS_CASCADE and self._match_text_seq("CASCADE") if not self._curr and actions: return self.expression( diff --git a/tests/dialects/test_hive.py b/tests/dialects/test_hive.py index 9f6df0fd62..bad3b99aab 100644 --- a/tests/dialects/test_hive.py +++ b/tests/dialects/test_hive.py @@ -179,7 +179,7 @@ def test_ddl(self): self.validate_identity("ALTER TABLE x PARTITION(y = z) ADD COLUMN a VARCHAR(10)") self.validate_identity( "ALTER TABLE x CHANGE a a VARCHAR(10)", - write_sql="ALTER TABLE x CHANGE COLUMN a a VARCHAR(10)" + write_sql="ALTER TABLE x CHANGE COLUMN a a VARCHAR(10)", ) self.validate_all( @@ -201,7 +201,7 @@ def test_ddl(self): write={ "hive": "ALTER TABLE x CHANGE COLUMN a b VARCHAR(10)", "spark": "ALTER TABLE x RENAME COLUMN a TO b", - } + }, ) self.validate_all( "ALTER TABLE x CHANGE COLUMN a a VARCHAR(10) CASCADE", diff --git a/tests/dialects/test_spark.py b/tests/dialects/test_spark.py index 9dbba66713..553b248048 100644 --- a/tests/dialects/test_spark.py +++ b/tests/dialects/test_spark.py @@ -152,7 +152,7 @@ def test_ddl(self): write={ "spark": "ALTER TABLE db.example RENAME COLUMN col_a TO col_b", "hive": UnsupportedError, - } + }, ) self.validate_all( "ALTER TABLE StudentInfo DROP COLUMNS (LastName, DOB)", From d98a522f18603ecadd8fe07a9b09e2393c850691 Mon Sep 17 00:00:00 2001 From: Tom Samaras Date: Tue, 7 Oct 2025 13:25:41 -0400 Subject: [PATCH 7/8] fixes for PR comments * updated syntax handling constant name * Inherit ALTER_PARSERS from parser.Parser * fix column name parsing * revised rename handling --- sqlglot/dialects/hive.py | 46 ++++++++++--------------------------- sqlglot/dialects/spark2.py | 20 +++++++++++----- tests/dialects/test_hive.py | 2 +- 3 files changed, 27 insertions(+), 41 deletions(-) diff --git a/sqlglot/dialects/hive.py b/sqlglot/dialects/hive.py index d89cb6d121..dd68899a42 100644 --- a/sqlglot/dialects/hive.py +++ b/sqlglot/dialects/hive.py @@ -212,13 +212,6 @@ class Hive(Dialect): ARRAY_AGG_INCLUDES_NULLS = None REGEXP_EXTRACT_DEFAULT_GROUP = 1 ALTER_TABLE_SUPPORTS_CASCADE = True - CHANGE_COLUMN_STYLE = "HIVE" - """ - Spark and its derivatives support both the Hive style CHANGE COLUMN syntax and more traditional ALTER/RENAME COLUMN syntax. Spark also - accepts ALTER COLUMN syntax when using the CHANGE COLUMN keyword but not vice versa. The Parser needs to be able to handle both styles - for Spark dialects and the Generator will use ALTER COLUMN syntax when possible. This also means that there are circumstances where it's - not possible to transpile commands from Spark to Hive due to missing information (e.g. missing data types). - """ # https://spark.apache.org/docs/latest/sql-ref-identifier.html#description NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE @@ -320,6 +313,9 @@ class Parser(parser.Parser): ADD_JOIN_ON_TRUE = True ALTER_TABLE_PARTITIONS = True + CHANGE_COLUMN_ALTER_SYNTAX = False + # Whether the dialect supports using ALTER COLUMN syntax with CHANGE COLUMN. + FUNCTIONS = { **parser.Parser.FUNCTIONS, "BASE64": exp.ToBase64.from_arg_list, @@ -388,14 +384,8 @@ class Parser(parser.Parser): } ALTER_PARSERS = { - "ADD": lambda self: self._parse_alter_table_add(), - "AS": lambda self: self._parse_select(), + **parser.Parser.ALTER_PARSERS, "CHANGE": lambda self: self._parse_alter_table_change(), - "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), - "DROP": lambda self: self._parse_alter_table_drop(), - "PARTITION": lambda self: self._parse_alter_table_partition(), - "RENAME": lambda self: self._parse_alter_table_rename(), - "SET": lambda self: self._parse_alter_table_set(), } def _parse_transform(self) -> t.Optional[exp.Transform | exp.QueryTransform]: @@ -473,21 +463,19 @@ def _parse_types( def _parse_alter_table_change(self) -> t.Optional[exp.Expression]: self._match(TokenType.COLUMN) - this = self._parse_column() + this = self._parse_field(any_token=True) - if self.dialect.CHANGE_COLUMN_STYLE == "SPARK" and self._match_text_seq("TYPE"): + if self.CHANGE_COLUMN_ALTER_SYNTAX and self._match_text_seq("TYPE"): return self.expression( exp.AlterColumn, this=this, dtype=self._parse_types(schema=True), ) - column_new = self._parse_column() + column_new = self._parse_field(any_token=True) dtype = self._parse_types(schema=True) - comment = None - if self._match(TokenType.COMMENT): - comment = self._parse_string() + comment = self._match(TokenType.COMMENT) and self._parse_string() if not this or not column_new or not dtype: self.raise_error( @@ -825,18 +813,16 @@ def altercolumn_sql(self, expression: exp.AlterColumn) -> str: if any([default, drop, visible, allow_null, drop]): self.unsupported("Unsupported CHANGE COLUMN syntax") - if self.dialect.CHANGE_COLUMN_STYLE == "SPARK": - if new_name == this: - if comment: - return f"ALTER COLUMN {this}{comment}" - return super().altercolumn_sql(expression) - return f"RENAME COLUMN {this} TO {new_name}" if not dtype: self.unsupported("CHANGE COLUMN without a type is not supported") return f"CHANGE COLUMN {this} {new_name} {dtype}{comment}" + def renamecolumn_sql(self, expression: exp.RenameColumn) -> str: + self.unsupported("Cannot rename columns without data type defined in Hive") + return "" + def alterset_sql(self, expression: exp.AlterSet) -> str: exprs = self.expressions(expression, flat=True) exprs = f" {exprs}" if exprs else "" @@ -851,14 +837,6 @@ def alterset_sql(self, expression: exp.AlterSet) -> str: return f"SET{serde}{exprs}{location}{file_format}{tags}" - def alter_sql(self, expression: exp.Alter) -> str: - if self.dialect.CHANGE_COLUMN_STYLE == "HIVE": - actions = expression.args["actions"] - for action in actions: - if isinstance(action, exp.RenameColumn): - self.unsupported("Cannot rename columns without data type defined in Hive") - return super().alter_sql(expression) - def serdeproperties_sql(self, expression: exp.SerdeProperties) -> str: prefix = "WITH " if expression.args.get("with") else "" exprs = self.expressions(expression, flat=True) diff --git a/sqlglot/dialects/spark2.py b/sqlglot/dialects/spark2.py index f2ffcf7968..9c4d449f57 100644 --- a/sqlglot/dialects/spark2.py +++ b/sqlglot/dialects/spark2.py @@ -152,7 +152,6 @@ def _annotate_by_similar_args( class Spark2(Hive): ALTER_TABLE_SUPPORTS_CASCADE = False - CHANGE_COLUMN_STYLE = "SPARK" ANNOTATORS = { **Hive.ANNOTATORS, @@ -175,6 +174,7 @@ class Tokenizer(Hive.Tokenizer): class Parser(Hive.Parser): TRIM_PATTERN_FIRST = True + CHANGE_COLUMN_ALTER_SYNTAX = True FUNCTIONS = { **Hive.Parser.FUNCTIONS, @@ -237,11 +237,6 @@ class Parser(Hive.Parser): "SHUFFLE_REPLICATE_NL": lambda self: self._parse_join_hint("SHUFFLE_REPLICATE_NL"), } - ALTER_PARSERS = { - **Hive.Parser.ALTER_PARSERS, - "ALTER": lambda self: self._parse_alter_table_alter(), - } - def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: return self._match_text_seq("DROP", "COLUMNS") and self.expression( exp.Drop, this=self._parse_schema(), kind="COLUMNS" @@ -373,3 +368,16 @@ def fileformatproperty_sql(self, expression: exp.FileFormatProperty) -> str: return super().fileformatproperty_sql(expression) return f"USING {expression.name.upper()}" + + def altercolumn_sql(self, expression: exp.AlterColumn) -> str: + this = self.sql(expression, "this") + new_name = self.sql(expression, "rename_to") or this + comment = self.sql(expression, "comment") + if new_name == this: + if comment: + return f"ALTER COLUMN {this} COMMENT {comment}" + return super(Hive.Generator, self).altercolumn_sql(expression) + return f"RENAME COLUMN {this} TO {new_name}" + + def renamecolumn_sql(self, expression: exp.RenameColumn) -> str: + return super(Hive.Generator, self).renamecolumn_sql(expression) diff --git a/tests/dialects/test_hive.py b/tests/dialects/test_hive.py index bad3b99aab..0a31a68179 100644 --- a/tests/dialects/test_hive.py +++ b/tests/dialects/test_hive.py @@ -179,7 +179,7 @@ def test_ddl(self): self.validate_identity("ALTER TABLE x PARTITION(y = z) ADD COLUMN a VARCHAR(10)") self.validate_identity( "ALTER TABLE x CHANGE a a VARCHAR(10)", - write_sql="ALTER TABLE x CHANGE COLUMN a a VARCHAR(10)", + "ALTER TABLE x CHANGE COLUMN a a VARCHAR(10)", ) self.validate_all( From 8cdc5375e89b1f61f1ca1dca6a07e7dd1ae847e4 Mon Sep 17 00:00:00 2001 From: Jo <46752250+georgesittas@users.noreply.github.com> Date: Wed, 8 Oct 2025 15:44:46 +0300 Subject: [PATCH 8/8] Update sqlglot/dialects/spark2.py --- sqlglot/dialects/spark2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sqlglot/dialects/spark2.py b/sqlglot/dialects/spark2.py index 9c4d449f57..c031e78bac 100644 --- a/sqlglot/dialects/spark2.py +++ b/sqlglot/dialects/spark2.py @@ -376,7 +376,7 @@ def altercolumn_sql(self, expression: exp.AlterColumn) -> str: if new_name == this: if comment: return f"ALTER COLUMN {this} COMMENT {comment}" - return super(Hive.Generator, self).altercolumn_sql(expression) + return super().altercolumn_sql(expression) return f"RENAME COLUMN {this} TO {new_name}" def renamecolumn_sql(self, expression: exp.RenameColumn) -> str: