Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions sqlglot/dialects/bigquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -867,6 +867,8 @@ class Parser(parser.Parser):
"FROM_HEX": exp.Unhex.from_arg_list,
"WEEK": lambda args: exp.WeekStart(this=exp.var(seq_get(args, 0))),
}
# Remove SEARCH to avoid parameter routing issues - let it fall back to Anonymous function
FUNCTIONS.pop("SEARCH")

FUNCTION_PARSERS = {
**parser.Parser.FUNCTION_PARSERS,
Expand Down
35 changes: 35 additions & 0 deletions sqlglot/dialects/snowflake.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,33 @@ def _build_if_from_zeroifnull(args: t.List) -> exp.If:
return exp.If(this=cond, true=exp.Literal.number(0), false=seq_get(args, 0))


def _build_search(args: t.List) -> exp.Search:
arg2 = seq_get(args, 2)
arg3 = seq_get(args, 3)

analyzer_val = None
search_mode_val = None

if arg2 and isinstance(arg2, exp.Kwarg):
if arg2.this.name.lower() == "analyzer":
analyzer_val = arg2
elif arg2.this.name.lower() == "search_mode":
search_mode_val = arg2

if arg3 and isinstance(arg3, exp.Kwarg):
if arg3.this.name.lower() == "analyzer":
analyzer_val = arg3
elif arg3.this.name.lower() == "search_mode":
search_mode_val = arg3

return exp.Search(
this=seq_get(args, 0),
expression=seq_get(args, 1),
analyzer=analyzer_val,
search_mode=search_mode_val,
)


# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull
def _build_if_from_nullifzero(args: t.List) -> exp.If:
cond = exp.EQ(this=seq_get(args, 0), expression=exp.Literal.number(0))
Expand Down Expand Up @@ -608,6 +635,13 @@ class Snowflake(Dialect):
exp.ParseUrl,
exp.ParseIp,
},
exp.DataType.Type.DECIMAL: {
exp.RegexpCount,
},
exp.DataType.Type.BOOLEAN: {
*Dialect.TYPE_TO_EXPRESSIONS[exp.DataType.Type.BOOLEAN],
exp.Search,
},
}

ANNOTATORS = {
Expand Down Expand Up @@ -819,6 +853,7 @@ class Parser(parser.Parser):
"ZEROIFNULL": _build_if_from_zeroifnull,
"LIKE": _build_like(exp.Like),
"ILIKE": _build_like(exp.ILike),
"SEARCH": _build_search,
}
FUNCTIONS.pop("PREDICT")

Expand Down
13 changes: 13 additions & 0 deletions sqlglot/expressions.py
Original file line number Diff line number Diff line change
Expand Up @@ -7437,6 +7437,19 @@ class StrPosition(Func):
}


# Snowflake: https://docs.snowflake.com/en/sql-reference/functions/search
# BigQuery: https://cloud.google.com/bigquery/docs/reference/standard-sql/search_functions#search
class Search(Func):
arg_types = {
"this": True, # data_to_search / search_data
"expression": True, # search_query / search_string
"json_scope": False, # BigQuery: JSON_VALUES | JSON_KEYS | JSON_KEYS_AND_VALUES
"analyzer": False, # Both: analyzer / ANALYZER
"analyzer_options": False, # BigQuery: analyzer_options_values
"search_mode": False, # Snowflake: OR | AND
}


class StrToDate(Func):
arg_types = {"this": True, "format": False, "safe": False}

Expand Down
18 changes: 18 additions & 0 deletions tests/dialects/test_bigquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,24 @@ def test_bigquery(self):
self.validate_identity("PARSE_JSON('{}', wide_number_mode => 'exact')")
self.validate_identity("FOO(values)")
self.validate_identity("STRUCT(values AS value)")

self.validate_identity("SELECT SEARCH(data_to_search, 'search_query')")
self.validate_identity(
"SELECT SEARCH(data_to_search, 'search_query', json_scope => 'JSON_KEYS_AND_VALUES')"
)
self.validate_identity(
"SELECT SEARCH(data_to_search, 'search_query', analyzer => 'PATTERN_ANALYZER')"
)
self.validate_identity(
"SELECT SEARCH(data_to_search, 'search_query', analyzer_options => 'analyzer_options_values')"
)
self.validate_identity(
"SELECT SEARCH(data_to_search, 'search_query', json_scope => 'JSON_VALUES', analyzer => 'LOG_ANALYZER')"
)
self.validate_identity(
"SELECT SEARCH(data_to_search, 'search_query', analyzer => 'PATTERN_ANALYZER', analyzer_options => 'options')"
)

self.validate_identity("ARRAY_AGG(x IGNORE NULLS LIMIT 1)")
self.validate_identity("ARRAY_AGG(x IGNORE NULLS ORDER BY x LIMIT 1)")
self.validate_identity("ARRAY_AGG(DISTINCT x IGNORE NULLS ORDER BY x LIMIT 1)")
Expand Down
38 changes: 37 additions & 1 deletion tests/dialects/test_snowflake.py
Original file line number Diff line number Diff line change
Expand Up @@ -2438,7 +2438,43 @@ def test_regexp_substr(self, logger):
"REGEXP_EXTRACT_ALL(subject, pattern)",
)

self.validate_identity("SELECT REGEXP_COUNT('hello world', 'l')")
self.validate_identity("SELECT SEARCH((play, line), 'dream')")
self.validate_identity("SELECT SEARCH(line, 'king', ANALYZER => 'UNICODE_ANALYZER')")
self.validate_identity("SELECT SEARCH(character, 'king queen', SEARCH_MODE => 'AND')")
self.validate_identity(
"SELECT SEARCH(line, 'king', ANALYZER => 'UNICODE_ANALYZER', SEARCH_MODE => 'OR')"
)

# AST validation tests - verify argument mapping
ast = self.validate_identity("SELECT SEARCH(line, 'king')")
search_ast = ast.find(exp.Search)
self.assertEqual(list(search_ast.args), ["this", "expression", "analyzer", "search_mode"])
self.assertIsNone(search_ast.args.get("analyzer"))
self.assertIsNone(search_ast.args.get("search_mode"))

ast = self.validate_identity("SELECT SEARCH(line, 'king', ANALYZER => 'UNICODE_ANALYZER')")
search_ast = ast.find(exp.Search)
self.assertIsNotNone(search_ast.args.get("analyzer"))
self.assertIsNone(search_ast.args.get("search_mode"))

ast = self.validate_identity("SELECT SEARCH(character, 'king queen', SEARCH_MODE => 'AND')")
search_ast = ast.find(exp.Search)
self.assertIsNone(search_ast.args.get("analyzer"))
self.assertIsNotNone(search_ast.args.get("search_mode"))

# Test with arguments in different order (search_mode first, then analyzer)
ast = self.validate_identity(
"SELECT SEARCH(line, 'king', SEARCH_MODE => 'AND', ANALYZER => 'PATTERN_ANALYZER')",
"SELECT SEARCH(line, 'king', ANALYZER => 'PATTERN_ANALYZER', SEARCH_MODE => 'AND')",
)
search_ast = ast.find(exp.Search)
self.assertEqual(list(search_ast.args), ["this", "expression", "analyzer", "search_mode"])
analyzer = search_ast.args.get("analyzer")
self.assertIsNotNone(analyzer)
search_mode = search_ast.args.get("search_mode")
self.assertIsNotNone(search_mode)

self.validate_identity("SELECT REGEXP_COUNT('hello world', 'l ')")
self.validate_identity("SELECT REGEXP_COUNT('hello world', 'l', 1)")
self.validate_identity("SELECT REGEXP_COUNT('hello world', 'l', 1, 'i')")

Expand Down
20 changes: 20 additions & 0 deletions tests/fixtures/optimizer/annotate_functions.sql
Original file line number Diff line number Diff line change
Expand Up @@ -2199,6 +2199,26 @@ BOOLEAN;
STARTSWITH(tbl.bin_col, NULL);
BOOLEAN;

# dialect: snowflake
SEARCH(line, 'king');
BOOLEAN;

# dialect: snowflake
SEARCH((play, line), 'dream');
BOOLEAN;

# dialect: snowflake
SEARCH(line, 'king', ANALYZER => 'UNICODE_ANALYZER');
BOOLEAN;

# dialect: snowflake
SEARCH(line, 'king', SEARCH_MODE => 'OR');
BOOLEAN;

# dialect: snowflake
SEARCH(line, 'king', ANALYZER => 'UNICODE_ANALYZER', SEARCH_MODE => 'AND');
BOOLEAN;

# dialect: snowflake
STRTOK_TO_ARRAY('a,b,c', ',');
ARRAY;
Expand Down