@@ -75,17 +75,22 @@ def _is_metadata_var(
75
75
) -> t .Optional [bool ]:
76
76
is_metadata_so_far = used_variables .get (name , True )
77
77
if is_metadata_so_far is False :
78
+ # We've concluded this variable is definitely not metadata-only
78
79
return False
79
80
80
81
appears_under_metadata_macro_func = expr_under_metadata_macro_func .get (id (expression ))
81
82
if is_metadata_so_far and (
82
83
appears_in_metadata_expression or appears_under_metadata_macro_func
83
84
):
85
+ # The variable appears in a metadata expression, e.g., audits (...),
86
+ # or in the AST of metadata-only macro call, e.g., @FOO(@x)
84
87
return True
85
88
89
+ # The variable appears in the AST of a macro call, but we don't know if it's metadata-only
86
90
if appears_under_metadata_macro_func is False :
87
91
return None
88
92
93
+ # The variable appears elsewhere, e.g., in the model's query: SELECT @x
89
94
return False
90
95
91
96
def _is_metadata_macro (name : str , appears_in_metadata_expression : bool ) -> bool :
@@ -131,6 +136,14 @@ def _is_metadata_macro(name: str, appears_in_metadata_expression: bool) -> bool:
131
136
var_name , macro_func_or_var , is_metadata
132
137
)
133
138
elif id (macro_func_or_var ) not in visited_macro_funcs :
139
+ # We only care about the top-level macro function calls to determine the metadata
140
+ # status of the variables referenced in their ASTs. For example, in @m1(@m2(@x)),
141
+ # if m1 is metadata-only but m2 is not, we can still determine that @x only affects
142
+ # the metadata hash, since m2's result feeds into a metadata-only macro function.
143
+ #
144
+ # Generally, if the top-level call is known to be metadata-only or appear in a
145
+ # metadata expression, then we can avoid traversing nested macro function calls.
146
+
134
147
var_refs , _expr_under_metadata_macro_func , _visited_macro_funcs = (
135
148
_extract_macro_func_variable_references (macro_func_or_var , is_metadata )
136
149
)
@@ -192,7 +205,7 @@ def _extract_macro_func_variable_references(
192
205
macro_func : exp .Expression ,
193
206
is_metadata : bool ,
194
207
) -> t .Tuple [t .Set [str ], t .Dict [int , bool ], t .Set [int ]]:
195
- references = set ()
208
+ var_references = set ()
196
209
visited_macro_funcs = set ()
197
210
expr_under_metadata_macro_func = {}
198
211
@@ -204,19 +217,19 @@ def _extract_macro_func_variable_references(
204
217
args = this .expressions
205
218
206
219
if this .name .lower () in (c .VAR , c .BLUEPRINT_VAR ) and args and args [0 ].is_string :
207
- references .add (args [0 ].this .lower ())
220
+ var_references .add (args [0 ].this .lower ())
208
221
expr_under_metadata_macro_func [id (n )] = is_metadata
209
222
elif isinstance (n , d .MacroVar ):
210
- references .add (n .name .lower ())
223
+ var_references .add (n .name .lower ())
211
224
expr_under_metadata_macro_func [id (n )] = is_metadata
212
225
elif isinstance (n , (exp .Identifier , d .MacroStrReplace , d .MacroSQL )) and "@" in n .name :
213
- references .update (
226
+ var_references .update (
214
227
(braced_identifier or identifier ).lower ()
215
228
for _ , identifier , braced_identifier , _ in MacroStrTemplate .pattern .findall (n .name )
216
229
)
217
230
expr_under_metadata_macro_func [id (n )] = is_metadata
218
231
219
- return (references , expr_under_metadata_macro_func , visited_macro_funcs )
232
+ return (var_references , expr_under_metadata_macro_func , visited_macro_funcs )
220
233
221
234
222
235
def _add_variables_to_python_env (
@@ -238,16 +251,22 @@ def _add_variables_to_python_env(
238
251
for var_name , is_metadata in python_used_variables .items ():
239
252
used_variables [var_name ] = is_metadata and used_variables .get (var_name , True )
240
253
241
- # Variables are treated as metadata when:
242
- # - They are only referenced in metadata-only contexts, such as `audits (...)`, virtual statements, etc
243
- # - They are only referenced in metadata-only macros, either as their arguments or within their definitions
254
+ # Variables are treated as metadata-only when all of their references either:
255
+ # - appear in metadata-only expressions, such as `audits (...)`, virtual statements, etc
256
+ # - appear in the ASTs or definitions of metadata-only macros
257
+ #
258
+ # See also: https://github.com/TobikoData/sqlmesh/pull/4936#issuecomment-3136339936,
259
+ # specifically the "Terminology" and "Observations" section.
244
260
metadata_used_variables = {
245
261
var_name for var_name , is_metadata in used_variables .items () if is_metadata
246
262
}
247
263
for used_var , outermost_macro_func in (outermost_macro_func_ancestor_by_var or {}).items ():
248
264
used_var_is_metadata = used_variables .get (used_var )
249
265
if used_var_is_metadata is False :
250
266
continue
267
+
268
+ # At this point we can decide whether a variable reference in a macro call's AST is
269
+ # metadata-only, because we've annotated the corresponding macro call in the python env.
251
270
if outermost_macro_func in python_env and python_env [outermost_macro_func ].is_metadata :
252
271
metadata_used_variables .add (used_var )
253
272
0 commit comments