Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions sqlglot/dialects/dialect.py
Original file line number Diff line number Diff line change
Expand Up @@ -525,6 +525,13 @@ class Dialect(metaclass=_Dialect):
equivalent of CREATE SCHEMA is CREATE DATABASE.
"""

ALTER_TABLE_SUPPORTS_CASCADE = False
"""
Hive by default does not update the schema of existing partitions when a column is changed.
the CASCADE clause is used to indicate that the change should be propagated to all existing partitions.
the Spark dialect, while derived from Hive, does not support the CASCADE clause.
"""

# Whether ADD is present for each column added by ALTER TABLE
ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True

Expand Down
88 changes: 88 additions & 0 deletions sqlglot/dialects/hive.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,14 @@ class Hive(Dialect):
SAFE_DIVISION = True
ARRAY_AGG_INCLUDES_NULLS = None
REGEXP_EXTRACT_DEFAULT_GROUP = 1
ALTER_TABLE_SUPPORTS_CASCADE = True
CHANGE_COLUMN_STYLE = "HIVE"
"""
Spark and its derivatives support both the Hive style CHANGE COLUMN syntax and more traditional ALTER/RENAME COLUMN syntax. Spark also
accepts ALTER COLUMN syntax when using the CHANGE COLUMN keyword but not vice versa. The Parser needs to be able to handle both styles
for Spark dialects and the Generator will use ALTER COLUMN syntax when possible. This also means that there are circumstances where it's
not possible to transpile commands from Spark to Hive due to missing information (e.g. missing data types).
"""

# https://spark.apache.org/docs/latest/sql-ref-identifier.html#description
NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE
Expand Down Expand Up @@ -310,6 +318,7 @@ class Parser(parser.Parser):
VALUES_FOLLOWED_BY_PAREN = False
JOINS_HAVE_EQUAL_PRECEDENCE = True
ADD_JOIN_ON_TRUE = True
ALTER_TABLE_PARTITIONS = True

FUNCTIONS = {
**parser.Parser.FUNCTIONS,
Expand Down Expand Up @@ -378,6 +387,17 @@ class Parser(parser.Parser):
),
}

ALTER_PARSERS = {
"ADD": lambda self: self._parse_alter_table_add(),
"AS": lambda self: self._parse_select(),
"CHANGE": lambda self: self._parse_alter_table_change(),
"CLUSTER BY": lambda self: self._parse_cluster(wrapped=True),
"DROP": lambda self: self._parse_alter_table_drop(),
"PARTITION": lambda self: self._parse_alter_table_partition(),
"RENAME": lambda self: self._parse_alter_table_rename(),
"SET": lambda self: self._parse_alter_table_set(),
}

def _parse_transform(self) -> t.Optional[exp.Transform | exp.QueryTransform]:
if not self._match(TokenType.L_PAREN, advance=False):
self._retreat(self._index - 1)
Expand Down Expand Up @@ -451,6 +471,37 @@ def _parse_types(

return this

def _parse_alter_table_change(self) -> t.Optional[exp.Expression]:
self._match(TokenType.COLUMN)
this = self._parse_column()

if self.dialect.CHANGE_COLUMN_STYLE == "SPARK" and self._match_text_seq("TYPE"):
return self.expression(
exp.AlterColumn,
this=this,
dtype=self._parse_types(schema=True),
)

column_new = self._parse_column()
dtype = self._parse_types(schema=True)

comment = None
if self._match(TokenType.COMMENT):
comment = self._parse_string()

if not this or not column_new or not dtype:
self.raise_error(
"Expected 'CHANGE COLUMN' to be followed by 'column_name' 'column_name' 'data_type'"
)

return self.expression(
exp.AlterColumn,
this=this,
rename_to=column_new,
dtype=dtype,
comment=comment,
)

def _parse_partition_and_order(
self,
) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]:
Expand Down Expand Up @@ -500,6 +551,7 @@ class Generator(generator.Generator):
PAD_FILL_PATTERN_IS_REQUIRED = True
SUPPORTS_MEDIAN = False
ARRAY_SIZE_NAME = "SIZE"
ALTER_SET_TYPE = ""

EXPRESSIONS_WITHOUT_NESTED_CTES = {
exp.Insert,
Expand Down Expand Up @@ -757,6 +809,34 @@ def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str:
),
)

def altercolumn_sql(self, expression: exp.AlterColumn) -> str:
this = self.sql(expression, "this")
new_name = self.sql(expression, "rename_to") or this
dtype = self.sql(expression, "dtype")
comment = (
f" COMMENT {self.sql(expression, 'comment')}"
if self.sql(expression, "comment")
else ""
)
default = self.sql(expression, "default")
visible = expression.args.get("visible")
allow_null = expression.args.get("allow_null")
drop = expression.args.get("drop")

if any([default, drop, visible, allow_null, drop]):
self.unsupported("Unsupported CHANGE COLUMN syntax")
if self.dialect.CHANGE_COLUMN_STYLE == "SPARK":
if new_name == this:
if comment:
return f"ALTER COLUMN {this}{comment}"
return super().altercolumn_sql(expression)
return f"RENAME COLUMN {this} TO {new_name}"

if not dtype:
self.unsupported("CHANGE COLUMN without a type is not supported")

return f"CHANGE COLUMN {this} {new_name} {dtype}{comment}"

def alterset_sql(self, expression: exp.AlterSet) -> str:
exprs = self.expressions(expression, flat=True)
exprs = f" {exprs}" if exprs else ""
Expand All @@ -771,6 +851,14 @@ def alterset_sql(self, expression: exp.AlterSet) -> str:

return f"SET{serde}{exprs}{location}{file_format}{tags}"

def alter_sql(self, expression: exp.Alter) -> str:
if self.dialect.CHANGE_COLUMN_STYLE == "HIVE":
actions = expression.args["actions"]
for action in actions:
if isinstance(action, exp.RenameColumn):
self.unsupported("Cannot rename columns without data type defined in Hive")
return super().alter_sql(expression)

def serdeproperties_sql(self, expression: exp.SerdeProperties) -> str:
prefix = "WITH " if expression.args.get("with") else ""
exprs = self.expressions(expression, flat=True)
Expand Down
9 changes: 9 additions & 0 deletions sqlglot/dialects/spark2.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,9 @@ def _annotate_by_similar_args(


class Spark2(Hive):
ALTER_TABLE_SUPPORTS_CASCADE = False
CHANGE_COLUMN_STYLE = "SPARK"

ANNOTATORS = {
**Hive.ANNOTATORS,
exp.Substring: lambda self, e: self._annotate_by_args(e, "this"),
Expand Down Expand Up @@ -234,6 +237,11 @@ class Parser(Hive.Parser):
"SHUFFLE_REPLICATE_NL": lambda self: self._parse_join_hint("SHUFFLE_REPLICATE_NL"),
}

ALTER_PARSERS = {
**Hive.Parser.ALTER_PARSERS,
"ALTER": lambda self: self._parse_alter_table_alter(),
}

def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]:
return self._match_text_seq("DROP", "COLUMNS") and self.expression(
exp.Drop, this=self._parse_schema(), kind="COLUMNS"
Expand All @@ -248,6 +256,7 @@ class Generator(Hive.Generator):
QUERY_HINTS = True
NVL2_SUPPORTED = True
CAN_IMPLEMENT_ARRAY_ANY = True
ALTER_SET_TYPE = "TYPE"

PROPERTIES_LOCATION = {
**Hive.Generator.PROPERTIES_LOCATION,
Expand Down
2 changes: 2 additions & 0 deletions sqlglot/expressions.py
Original file line number Diff line number Diff line change
Expand Up @@ -1838,6 +1838,7 @@ class AlterColumn(Expression):
"comment": False,
"allow_null": False,
"visible": False,
"rename_to": False,
}


Expand Down Expand Up @@ -4956,6 +4957,7 @@ class Alter(Expression):
"cluster": False,
"not_valid": False,
"check": False,
"cascade": False,
}

@property
Expand Down
7 changes: 6 additions & 1 deletion sqlglot/generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -3621,10 +3621,15 @@ def alter_sql(self, expression: exp.Alter) -> str:
kind = self.sql(expression, "kind")
not_valid = " NOT VALID" if expression.args.get("not_valid") else ""
check = " WITH CHECK" if expression.args.get("check") else ""
cascade = (
" CASCADE"
if expression.args.get("cascade") and self.dialect.ALTER_TABLE_SUPPORTS_CASCADE
else ""
)
this = self.sql(expression, "this")
this = f" {this}" if this else ""

return f"ALTER {kind}{exists}{only}{this}{on_cluster}{check}{self.sep()}{actions_sql}{not_valid}{options}"
return f"ALTER {kind}{exists}{only}{this}{on_cluster}{check}{self.sep()}{actions_sql}{not_valid}{options}{cascade}"

def altersession_sql(self, expression: exp.AlterSession) -> str:
items_sql = self.expressions(expression, flat=True)
Expand Down
7 changes: 6 additions & 1 deletion sqlglot/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -1535,6 +1535,9 @@ def _parse_partitioned_by_bucket_or_truncate(self) -> t.Optional[exp.Expression]
# Whether renaming a column with an ALTER statement requires the presence of the COLUMN keyword
ALTER_RENAME_REQUIRES_COLUMN = True

# Whether Alter statements are allowed to contain Partition specifications
ALTER_TABLE_PARTITIONS = False

# Whether all join types have the same precedence, i.e., they "naturally" produce a left-deep tree.
# In standard SQL, joins that use the JOIN keyword take higher precedence than comma-joins. That is
# to say, JOIN operators happen before comma operators. This is not the case in some dialects, such
Expand Down Expand Up @@ -7716,7 +7719,7 @@ def _parse_alter(self) -> exp.Alter | exp.Command:
check = None
cluster = None
else:
this = self._parse_table(schema=True)
this = self._parse_table(schema=True, parse_partition=self.ALTER_TABLE_PARTITIONS)
check = self._match_text_seq("WITH", "CHECK")
cluster = self._parse_on_property() if self._match(TokenType.ON) else None

Expand All @@ -7728,6 +7731,7 @@ def _parse_alter(self) -> exp.Alter | exp.Command:
actions = ensure_list(parser(self))
not_valid = self._match_text_seq("NOT", "VALID")
options = self._parse_csv(self._parse_property)
cascade = self.dialect.ALTER_TABLE_SUPPORTS_CASCADE and self._match_text_seq("CASCADE")

if not self._curr and actions:
return self.expression(
Expand All @@ -7741,6 +7745,7 @@ def _parse_alter(self) -> exp.Alter | exp.Command:
cluster=cluster,
not_valid=not_valid,
check=check,
cascade=cascade,
)

return self._parse_as_command(start)
Expand Down
38 changes: 38 additions & 0 deletions tests/dialects/test_hive.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,44 @@ def test_ddl(self):
},
)

self.validate_identity("ALTER TABLE x PARTITION(y = z) ADD COLUMN a VARCHAR(10)")
self.validate_identity(
"ALTER TABLE x CHANGE a a VARCHAR(10)",
write_sql="ALTER TABLE x CHANGE COLUMN a a VARCHAR(10)",
)

self.validate_all(
"ALTER TABLE x CHANGE COLUMN a a VARCHAR(10)",
write={
"hive": "ALTER TABLE x CHANGE COLUMN a a VARCHAR(10)",
"spark": "ALTER TABLE x ALTER COLUMN a TYPE VARCHAR(10)",
},
)
self.validate_all(
"ALTER TABLE x CHANGE COLUMN a a VARCHAR(10) COMMENT 'comment'",
write={
"hive": "ALTER TABLE x CHANGE COLUMN a a VARCHAR(10) COMMENT 'comment'",
"spark": "ALTER TABLE x ALTER COLUMN a COMMENT 'comment'",
},
)
self.validate_all(
"ALTER TABLE x CHANGE COLUMN a b VARCHAR(10)",
write={
"hive": "ALTER TABLE x CHANGE COLUMN a b VARCHAR(10)",
"spark": "ALTER TABLE x RENAME COLUMN a TO b",
},
)
self.validate_all(
"ALTER TABLE x CHANGE COLUMN a a VARCHAR(10) CASCADE",
write={
"hive": "ALTER TABLE x CHANGE COLUMN a a VARCHAR(10) CASCADE",
"spark": "ALTER TABLE x ALTER COLUMN a TYPE VARCHAR(10)",
},
)

self.validate_identity("ALTER TABLE X ADD COLUMNS (y INT, z STRING)")
self.validate_identity("ALTER TABLE X ADD COLUMNS (y INT, z STRING) CASCADE")

self.validate_identity(
"""CREATE EXTERNAL TABLE x (y INT) ROW FORMAT SERDE 'serde' ROW FORMAT DELIMITED FIELDS TERMINATED BY '1' WITH SERDEPROPERTIES ('input.regex'='')""",
)
Expand Down
22 changes: 22 additions & 0 deletions tests/dialects/test_spark.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from sqlglot import exp, parse_one
from sqlglot.dialects.dialect import Dialects
from sqlglot.errors import UnsupportedError
from tests.dialects.test_dialect import Validator


Expand Down Expand Up @@ -132,6 +133,27 @@ def test_ddl(self):
"spark": "ALTER TABLE StudentInfo ADD COLUMNS (LastName STRING, DOB TIMESTAMP)",
},
)
self.validate_all(
"ALTER TABLE db.example ALTER COLUMN col_a TYPE BIGINT",
write={
"spark": "ALTER TABLE db.example ALTER COLUMN col_a TYPE BIGINT",
"hive": "ALTER TABLE db.example CHANGE COLUMN col_a col_a BIGINT",
},
)
self.validate_all(
"ALTER TABLE db.example CHANGE COLUMN col_a col_a BIGINT",
write={
"spark": "ALTER TABLE db.example ALTER COLUMN col_a TYPE BIGINT",
"hive": "ALTER TABLE db.example CHANGE COLUMN col_a col_a BIGINT",
},
)
self.validate_all(
"ALTER TABLE db.example RENAME COLUMN col_a TO col_b",
write={
"spark": "ALTER TABLE db.example RENAME COLUMN col_a TO col_b",
"hive": UnsupportedError,
},
)
self.validate_all(
"ALTER TABLE StudentInfo DROP COLUMNS (LastName, DOB)",
write={
Expand Down