diff --git a/sqlglot/dialects/bigquery.py b/sqlglot/dialects/bigquery.py index 3f33b2deae..a714ea130f 100644 --- a/sqlglot/dialects/bigquery.py +++ b/sqlglot/dialects/bigquery.py @@ -867,6 +867,8 @@ class Parser(parser.Parser): "FROM_HEX": exp.Unhex.from_arg_list, "WEEK": lambda args: exp.WeekStart(this=exp.var(seq_get(args, 0))), } + # Remove SEARCH to avoid parameter routing issues - let it fall back to Anonymous function + FUNCTIONS.pop("SEARCH") FUNCTION_PARSERS = { **parser.Parser.FUNCTION_PARSERS, diff --git a/sqlglot/dialects/snowflake.py b/sqlglot/dialects/snowflake.py index 64782e30f4..26835e374d 100644 --- a/sqlglot/dialects/snowflake.py +++ b/sqlglot/dialects/snowflake.py @@ -154,6 +154,33 @@ def _build_if_from_zeroifnull(args: t.List) -> exp.If: return exp.If(this=cond, true=exp.Literal.number(0), false=seq_get(args, 0)) +def _build_search(args: t.List) -> exp.Search: + arg2 = seq_get(args, 2) + arg3 = seq_get(args, 3) + + analyzer_val = None + search_mode_val = None + + if arg2 and isinstance(arg2, exp.Kwarg): + if arg2.this.name.lower() == "analyzer": + analyzer_val = arg2 + elif arg2.this.name.lower() == "search_mode": + search_mode_val = arg2 + + if arg3 and isinstance(arg3, exp.Kwarg): + if arg3.this.name.lower() == "analyzer": + analyzer_val = arg3 + elif arg3.this.name.lower() == "search_mode": + search_mode_val = arg3 + + return exp.Search( + this=seq_get(args, 0), + expression=seq_get(args, 1), + analyzer=analyzer_val, + search_mode=search_mode_val, + ) + + # https://docs.snowflake.com/en/sql-reference/functions/zeroifnull def _build_if_from_nullifzero(args: t.List) -> exp.If: cond = exp.EQ(this=seq_get(args, 0), expression=exp.Literal.number(0)) @@ -608,6 +635,13 @@ class Snowflake(Dialect): exp.ParseUrl, exp.ParseIp, }, + exp.DataType.Type.DECIMAL: { + exp.RegexpCount, + }, + exp.DataType.Type.BOOLEAN: { + *Dialect.TYPE_TO_EXPRESSIONS[exp.DataType.Type.BOOLEAN], + exp.Search, + }, } ANNOTATORS = { @@ -819,6 +853,7 @@ class Parser(parser.Parser): "ZEROIFNULL": _build_if_from_zeroifnull, "LIKE": _build_like(exp.Like), "ILIKE": _build_like(exp.ILike), + "SEARCH": _build_search, } FUNCTIONS.pop("PREDICT") diff --git a/sqlglot/expressions.py b/sqlglot/expressions.py index 157857992c..58549cd3ff 100644 --- a/sqlglot/expressions.py +++ b/sqlglot/expressions.py @@ -7437,6 +7437,19 @@ class StrPosition(Func): } +# Snowflake: https://docs.snowflake.com/en/sql-reference/functions/search +# BigQuery: https://cloud.google.com/bigquery/docs/reference/standard-sql/search_functions#search +class Search(Func): + arg_types = { + "this": True, # data_to_search / search_data + "expression": True, # search_query / search_string + "json_scope": False, # BigQuery: JSON_VALUES | JSON_KEYS | JSON_KEYS_AND_VALUES + "analyzer": False, # Both: analyzer / ANALYZER + "analyzer_options": False, # BigQuery: analyzer_options_values + "search_mode": False, # Snowflake: OR | AND + } + + class StrToDate(Func): arg_types = {"this": True, "format": False, "safe": False} diff --git a/tests/dialects/test_bigquery.py b/tests/dialects/test_bigquery.py index d06c46eebd..a61ea17a53 100644 --- a/tests/dialects/test_bigquery.py +++ b/tests/dialects/test_bigquery.py @@ -79,6 +79,24 @@ def test_bigquery(self): self.validate_identity("PARSE_JSON('{}', wide_number_mode => 'exact')") self.validate_identity("FOO(values)") self.validate_identity("STRUCT(values AS value)") + + self.validate_identity("SELECT SEARCH(data_to_search, 'search_query')") + self.validate_identity( + "SELECT SEARCH(data_to_search, 'search_query', json_scope => 'JSON_KEYS_AND_VALUES')" + ) + self.validate_identity( + "SELECT SEARCH(data_to_search, 'search_query', analyzer => 'PATTERN_ANALYZER')" + ) + self.validate_identity( + "SELECT SEARCH(data_to_search, 'search_query', analyzer_options => 'analyzer_options_values')" + ) + self.validate_identity( + "SELECT SEARCH(data_to_search, 'search_query', json_scope => 'JSON_VALUES', analyzer => 'LOG_ANALYZER')" + ) + self.validate_identity( + "SELECT SEARCH(data_to_search, 'search_query', analyzer => 'PATTERN_ANALYZER', analyzer_options => 'options')" + ) + self.validate_identity("ARRAY_AGG(x IGNORE NULLS LIMIT 1)") self.validate_identity("ARRAY_AGG(x IGNORE NULLS ORDER BY x LIMIT 1)") self.validate_identity("ARRAY_AGG(DISTINCT x IGNORE NULLS ORDER BY x LIMIT 1)") diff --git a/tests/dialects/test_snowflake.py b/tests/dialects/test_snowflake.py index 4c0e2d0f5f..447626526b 100644 --- a/tests/dialects/test_snowflake.py +++ b/tests/dialects/test_snowflake.py @@ -2438,7 +2438,43 @@ def test_regexp_substr(self, logger): "REGEXP_EXTRACT_ALL(subject, pattern)", ) - self.validate_identity("SELECT REGEXP_COUNT('hello world', 'l')") + self.validate_identity("SELECT SEARCH((play, line), 'dream')") + self.validate_identity("SELECT SEARCH(line, 'king', ANALYZER => 'UNICODE_ANALYZER')") + self.validate_identity("SELECT SEARCH(character, 'king queen', SEARCH_MODE => 'AND')") + self.validate_identity( + "SELECT SEARCH(line, 'king', ANALYZER => 'UNICODE_ANALYZER', SEARCH_MODE => 'OR')" + ) + + # AST validation tests - verify argument mapping + ast = self.validate_identity("SELECT SEARCH(line, 'king')") + search_ast = ast.find(exp.Search) + self.assertEqual(list(search_ast.args), ["this", "expression", "analyzer", "search_mode"]) + self.assertIsNone(search_ast.args.get("analyzer")) + self.assertIsNone(search_ast.args.get("search_mode")) + + ast = self.validate_identity("SELECT SEARCH(line, 'king', ANALYZER => 'UNICODE_ANALYZER')") + search_ast = ast.find(exp.Search) + self.assertIsNotNone(search_ast.args.get("analyzer")) + self.assertIsNone(search_ast.args.get("search_mode")) + + ast = self.validate_identity("SELECT SEARCH(character, 'king queen', SEARCH_MODE => 'AND')") + search_ast = ast.find(exp.Search) + self.assertIsNone(search_ast.args.get("analyzer")) + self.assertIsNotNone(search_ast.args.get("search_mode")) + + # Test with arguments in different order (search_mode first, then analyzer) + ast = self.validate_identity( + "SELECT SEARCH(line, 'king', SEARCH_MODE => 'AND', ANALYZER => 'PATTERN_ANALYZER')", + "SELECT SEARCH(line, 'king', ANALYZER => 'PATTERN_ANALYZER', SEARCH_MODE => 'AND')", + ) + search_ast = ast.find(exp.Search) + self.assertEqual(list(search_ast.args), ["this", "expression", "analyzer", "search_mode"]) + analyzer = search_ast.args.get("analyzer") + self.assertIsNotNone(analyzer) + search_mode = search_ast.args.get("search_mode") + self.assertIsNotNone(search_mode) + + self.validate_identity("SELECT REGEXP_COUNT('hello world', 'l ')") self.validate_identity("SELECT REGEXP_COUNT('hello world', 'l', 1)") self.validate_identity("SELECT REGEXP_COUNT('hello world', 'l', 1, 'i')") diff --git a/tests/fixtures/optimizer/annotate_functions.sql b/tests/fixtures/optimizer/annotate_functions.sql index 39a688b379..1506b1ad52 100644 --- a/tests/fixtures/optimizer/annotate_functions.sql +++ b/tests/fixtures/optimizer/annotate_functions.sql @@ -2199,6 +2199,26 @@ BOOLEAN; STARTSWITH(tbl.bin_col, NULL); BOOLEAN; +# dialect: snowflake +SEARCH(line, 'king'); +BOOLEAN; + +# dialect: snowflake +SEARCH((play, line), 'dream'); +BOOLEAN; + +# dialect: snowflake +SEARCH(line, 'king', ANALYZER => 'UNICODE_ANALYZER'); +BOOLEAN; + +# dialect: snowflake +SEARCH(line, 'king', SEARCH_MODE => 'OR'); +BOOLEAN; + +# dialect: snowflake +SEARCH(line, 'king', ANALYZER => 'UNICODE_ANALYZER', SEARCH_MODE => 'AND'); +BOOLEAN; + # dialect: snowflake STRTOK_TO_ARRAY('a,b,c', ','); ARRAY;