Skip to content
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions sqlglot/dialects/dialect.py
Original file line number Diff line number Diff line change
Expand Up @@ -525,6 +525,13 @@ class Dialect(metaclass=_Dialect):
equivalent of CREATE SCHEMA is CREATE DATABASE.
"""

ALTER_TABLE_SUPPORTS_CASCADE = False
"""
Hive by default does not update the schema of existing partitions when a column is changed.
the CASCADE clause is used to indicate that the change should be propagated to all existing partitions.
the Spark dialect, while derived from Hive, does not support the CASCADE clause.
"""

# Whether ADD is present for each column added by ALTER TABLE
ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True

Expand Down
66 changes: 66 additions & 0 deletions sqlglot/dialects/hive.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,7 @@ class Hive(Dialect):
SAFE_DIVISION = True
ARRAY_AGG_INCLUDES_NULLS = None
REGEXP_EXTRACT_DEFAULT_GROUP = 1
ALTER_TABLE_SUPPORTS_CASCADE = True

# https://spark.apache.org/docs/latest/sql-ref-identifier.html#description
NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE
Expand Down Expand Up @@ -310,6 +311,10 @@ class Parser(parser.Parser):
VALUES_FOLLOWED_BY_PAREN = False
JOINS_HAVE_EQUAL_PRECEDENCE = True
ADD_JOIN_ON_TRUE = True
ALTER_TABLE_PARTITIONS = True

CHANGE_COLUMN_ALTER_SYNTAX = False
# Whether the dialect supports using ALTER COLUMN syntax with CHANGE COLUMN.

FUNCTIONS = {
**parser.Parser.FUNCTIONS,
Expand Down Expand Up @@ -378,6 +383,11 @@ class Parser(parser.Parser):
),
}

ALTER_PARSERS = {
**parser.Parser.ALTER_PARSERS,
"CHANGE": lambda self: self._parse_alter_table_change(),
}

def _parse_transform(self) -> t.Optional[exp.Transform | exp.QueryTransform]:
if not self._match(TokenType.L_PAREN, advance=False):
self._retreat(self._index - 1)
Expand Down Expand Up @@ -451,6 +461,35 @@ def _parse_types(

return this

def _parse_alter_table_change(self) -> t.Optional[exp.Expression]:
self._match(TokenType.COLUMN)
this = self._parse_field(any_token=True)

if self.CHANGE_COLUMN_ALTER_SYNTAX and self._match_text_seq("TYPE"):
return self.expression(
exp.AlterColumn,
this=this,
dtype=self._parse_types(schema=True),
)

column_new = self._parse_field(any_token=True)
dtype = self._parse_types(schema=True)

comment = self._match(TokenType.COMMENT) and self._parse_string()

if not this or not column_new or not dtype:
self.raise_error(
"Expected 'CHANGE COLUMN' to be followed by 'column_name' 'column_name' 'data_type'"
)

return self.expression(
exp.AlterColumn,
this=this,
rename_to=column_new,
dtype=dtype,
comment=comment,
)

def _parse_partition_and_order(
self,
) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]:
Expand Down Expand Up @@ -500,6 +539,7 @@ class Generator(generator.Generator):
PAD_FILL_PATTERN_IS_REQUIRED = True
SUPPORTS_MEDIAN = False
ARRAY_SIZE_NAME = "SIZE"
ALTER_SET_TYPE = ""

EXPRESSIONS_WITHOUT_NESTED_CTES = {
exp.Insert,
Expand Down Expand Up @@ -757,6 +797,32 @@ def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str:
),
)

def altercolumn_sql(self, expression: exp.AlterColumn) -> str:
this = self.sql(expression, "this")
new_name = self.sql(expression, "rename_to") or this
dtype = self.sql(expression, "dtype")
comment = (
f" COMMENT {self.sql(expression, 'comment')}"
if self.sql(expression, "comment")
else ""
)
default = self.sql(expression, "default")
visible = expression.args.get("visible")
allow_null = expression.args.get("allow_null")
drop = expression.args.get("drop")

if any([default, drop, visible, allow_null, drop]):
self.unsupported("Unsupported CHANGE COLUMN syntax")

if not dtype:
self.unsupported("CHANGE COLUMN without a type is not supported")

return f"CHANGE COLUMN {this} {new_name} {dtype}{comment}"

def renamecolumn_sql(self, expression: exp.RenameColumn) -> str:
self.unsupported("Cannot rename columns without data type defined in Hive")
return ""

def alterset_sql(self, expression: exp.AlterSet) -> str:
exprs = self.expressions(expression, flat=True)
exprs = f" {exprs}" if exprs else ""
Expand Down
17 changes: 17 additions & 0 deletions sqlglot/dialects/spark2.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,8 @@ def _annotate_by_similar_args(


class Spark2(Hive):
ALTER_TABLE_SUPPORTS_CASCADE = False

ANNOTATORS = {
**Hive.ANNOTATORS,
exp.Substring: lambda self, e: self._annotate_by_args(e, "this"),
Expand All @@ -172,6 +174,7 @@ class Tokenizer(Hive.Tokenizer):

class Parser(Hive.Parser):
TRIM_PATTERN_FIRST = True
CHANGE_COLUMN_ALTER_SYNTAX = True

FUNCTIONS = {
**Hive.Parser.FUNCTIONS,
Expand Down Expand Up @@ -248,6 +251,7 @@ class Generator(Hive.Generator):
QUERY_HINTS = True
NVL2_SUPPORTED = True
CAN_IMPLEMENT_ARRAY_ANY = True
ALTER_SET_TYPE = "TYPE"

PROPERTIES_LOCATION = {
**Hive.Generator.PROPERTIES_LOCATION,
Expand Down Expand Up @@ -364,3 +368,16 @@ def fileformatproperty_sql(self, expression: exp.FileFormatProperty) -> str:
return super().fileformatproperty_sql(expression)

return f"USING {expression.name.upper()}"

def altercolumn_sql(self, expression: exp.AlterColumn) -> str:
this = self.sql(expression, "this")
new_name = self.sql(expression, "rename_to") or this
comment = self.sql(expression, "comment")
if new_name == this:
if comment:
return f"ALTER COLUMN {this} COMMENT {comment}"
return super(Hive.Generator, self).altercolumn_sql(expression)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can simplify this a bit right?

Suggested change
return super(Hive.Generator, self).altercolumn_sql(expression)
return super().altercolumn_sql(expression)

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ended up reverting this, it seems like it didn't like it.

return f"RENAME COLUMN {this} TO {new_name}"

def renamecolumn_sql(self, expression: exp.RenameColumn) -> str:
return super(Hive.Generator, self).renamecolumn_sql(expression)
2 changes: 2 additions & 0 deletions sqlglot/expressions.py
Original file line number Diff line number Diff line change
Expand Up @@ -1838,6 +1838,7 @@ class AlterColumn(Expression):
"comment": False,
"allow_null": False,
"visible": False,
"rename_to": False,
}


Expand Down Expand Up @@ -4956,6 +4957,7 @@ class Alter(Expression):
"cluster": False,
"not_valid": False,
"check": False,
"cascade": False,
}

@property
Expand Down
7 changes: 6 additions & 1 deletion sqlglot/generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -3621,10 +3621,15 @@ def alter_sql(self, expression: exp.Alter) -> str:
kind = self.sql(expression, "kind")
not_valid = " NOT VALID" if expression.args.get("not_valid") else ""
check = " WITH CHECK" if expression.args.get("check") else ""
cascade = (
" CASCADE"
if expression.args.get("cascade") and self.dialect.ALTER_TABLE_SUPPORTS_CASCADE
else ""
)
this = self.sql(expression, "this")
this = f" {this}" if this else ""

return f"ALTER {kind}{exists}{only}{this}{on_cluster}{check}{self.sep()}{actions_sql}{not_valid}{options}"
return f"ALTER {kind}{exists}{only}{this}{on_cluster}{check}{self.sep()}{actions_sql}{not_valid}{options}{cascade}"

def altersession_sql(self, expression: exp.AlterSession) -> str:
items_sql = self.expressions(expression, flat=True)
Expand Down
7 changes: 6 additions & 1 deletion sqlglot/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -1535,6 +1535,9 @@ def _parse_partitioned_by_bucket_or_truncate(self) -> t.Optional[exp.Expression]
# Whether renaming a column with an ALTER statement requires the presence of the COLUMN keyword
ALTER_RENAME_REQUIRES_COLUMN = True

# Whether Alter statements are allowed to contain Partition specifications
ALTER_TABLE_PARTITIONS = False

# Whether all join types have the same precedence, i.e., they "naturally" produce a left-deep tree.
# In standard SQL, joins that use the JOIN keyword take higher precedence than comma-joins. That is
# to say, JOIN operators happen before comma operators. This is not the case in some dialects, such
Expand Down Expand Up @@ -7716,7 +7719,7 @@ def _parse_alter(self) -> exp.Alter | exp.Command:
check = None
cluster = None
else:
this = self._parse_table(schema=True)
this = self._parse_table(schema=True, parse_partition=self.ALTER_TABLE_PARTITIONS)
check = self._match_text_seq("WITH", "CHECK")
cluster = self._parse_on_property() if self._match(TokenType.ON) else None

Expand All @@ -7728,6 +7731,7 @@ def _parse_alter(self) -> exp.Alter | exp.Command:
actions = ensure_list(parser(self))
not_valid = self._match_text_seq("NOT", "VALID")
options = self._parse_csv(self._parse_property)
cascade = self.dialect.ALTER_TABLE_SUPPORTS_CASCADE and self._match_text_seq("CASCADE")

if not self._curr and actions:
return self.expression(
Expand All @@ -7741,6 +7745,7 @@ def _parse_alter(self) -> exp.Alter | exp.Command:
cluster=cluster,
not_valid=not_valid,
check=check,
cascade=cascade,
)

return self._parse_as_command(start)
Expand Down
38 changes: 38 additions & 0 deletions tests/dialects/test_hive.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,44 @@ def test_ddl(self):
},
)

self.validate_identity("ALTER TABLE x PARTITION(y = z) ADD COLUMN a VARCHAR(10)")
self.validate_identity(
"ALTER TABLE x CHANGE a a VARCHAR(10)",
"ALTER TABLE x CHANGE COLUMN a a VARCHAR(10)",
)

self.validate_all(
"ALTER TABLE x CHANGE COLUMN a a VARCHAR(10)",
write={
"hive": "ALTER TABLE x CHANGE COLUMN a a VARCHAR(10)",
"spark": "ALTER TABLE x ALTER COLUMN a TYPE VARCHAR(10)",
},
)
self.validate_all(
"ALTER TABLE x CHANGE COLUMN a a VARCHAR(10) COMMENT 'comment'",
write={
"hive": "ALTER TABLE x CHANGE COLUMN a a VARCHAR(10) COMMENT 'comment'",
"spark": "ALTER TABLE x ALTER COLUMN a COMMENT 'comment'",
},
)
self.validate_all(
"ALTER TABLE x CHANGE COLUMN a b VARCHAR(10)",
write={
"hive": "ALTER TABLE x CHANGE COLUMN a b VARCHAR(10)",
"spark": "ALTER TABLE x RENAME COLUMN a TO b",
},
)
self.validate_all(
"ALTER TABLE x CHANGE COLUMN a a VARCHAR(10) CASCADE",
write={
"hive": "ALTER TABLE x CHANGE COLUMN a a VARCHAR(10) CASCADE",
"spark": "ALTER TABLE x ALTER COLUMN a TYPE VARCHAR(10)",
},
)

self.validate_identity("ALTER TABLE X ADD COLUMNS (y INT, z STRING)")
self.validate_identity("ALTER TABLE X ADD COLUMNS (y INT, z STRING) CASCADE")

self.validate_identity(
"""CREATE EXTERNAL TABLE x (y INT) ROW FORMAT SERDE 'serde' ROW FORMAT DELIMITED FIELDS TERMINATED BY '1' WITH SERDEPROPERTIES ('input.regex'='')""",
)
Expand Down
22 changes: 22 additions & 0 deletions tests/dialects/test_spark.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from sqlglot import exp, parse_one
from sqlglot.dialects.dialect import Dialects
from sqlglot.errors import UnsupportedError
from tests.dialects.test_dialect import Validator


Expand Down Expand Up @@ -132,6 +133,27 @@ def test_ddl(self):
"spark": "ALTER TABLE StudentInfo ADD COLUMNS (LastName STRING, DOB TIMESTAMP)",
},
)
self.validate_all(
"ALTER TABLE db.example ALTER COLUMN col_a TYPE BIGINT",
write={
"spark": "ALTER TABLE db.example ALTER COLUMN col_a TYPE BIGINT",
"hive": "ALTER TABLE db.example CHANGE COLUMN col_a col_a BIGINT",
},
)
self.validate_all(
"ALTER TABLE db.example CHANGE COLUMN col_a col_a BIGINT",
write={
"spark": "ALTER TABLE db.example ALTER COLUMN col_a TYPE BIGINT",
"hive": "ALTER TABLE db.example CHANGE COLUMN col_a col_a BIGINT",
},
)
self.validate_all(
"ALTER TABLE db.example RENAME COLUMN col_a TO col_b",
write={
"spark": "ALTER TABLE db.example RENAME COLUMN col_a TO col_b",
"hive": UnsupportedError,
},
)
self.validate_all(
"ALTER TABLE StudentInfo DROP COLUMNS (LastName, DOB)",
write={
Expand Down