|
| 1 | +""" |
| 2 | +This script's goal is to warn users about two situations that could lead to a diff: |
| 3 | +
|
| 4 | +- They have blueprint models and some of their variables may be trimmed from `python_env` |
| 5 | +- Variables are used in metadata-only contexts, e.g., within metadata-only macros |
| 6 | +
|
| 7 | +Context: |
| 8 | +
|
| 9 | +We used to store *all* blueprint variables in `python_env`, even though some of them were |
| 10 | +redundant. For example, if a blueprint variable is only used in the model's `name` property, |
| 11 | +then it is rendered once, at load time, and after that point it's not needed elsewhere. |
| 12 | +
|
| 13 | +This behavior is now different: we only store the blueprint variables that are required to render |
| 14 | +expressions at runtime, such as model query or runtime-rendered properties, like `merge_filter`. |
| 15 | +
|
| 16 | +Additionally, variables were previously treated as non-metadata, regardless of how they were used. |
| 17 | +This behavior changed as well: SQLMesh now analyzes variable references and tracks the data flow, |
| 18 | +in order to detect whether changing them will result in a metadata diff for a given model. |
| 19 | +
|
| 20 | +Some examples where variables can be treated as metadata-only `python_env` executables are: |
| 21 | +
|
| 22 | +- A variable is referenced in metadata-only macros |
| 23 | +- A variable is referenced in metadata-only expressions, such as virtual update statements |
| 24 | +- A variable is passed as argument to metadata-only macros |
| 25 | +""" |
| 26 | + |
| 27 | +import json |
| 28 | + |
| 29 | +from sqlglot import exp |
| 30 | + |
| 31 | +from sqlmesh.core.console import get_console |
| 32 | + |
| 33 | +SQLMESH_BLUEPRINT_VARS = "__sqlmesh__blueprint__vars__" |
| 34 | +METADATA_HASH_EXPRESSIONS = {"on_virtual_update", "audits", "signals", "audit_definitions"} |
| 35 | + |
| 36 | + |
| 37 | +def migrate(state_sync, **kwargs): # type: ignore |
| 38 | + engine_adapter = state_sync.engine_adapter |
| 39 | + schema = state_sync.schema |
| 40 | + snapshots_table = "_snapshots" |
| 41 | + if schema: |
| 42 | + snapshots_table = f"{schema}.{snapshots_table}" |
| 43 | + |
| 44 | + warning = ( |
| 45 | + "SQLMesh detected that it may not be able to fully migrate the state database. This should not impact " |
| 46 | + "the migration process, but may result in unexpected changes being reported by the next `sqlmesh plan` " |
| 47 | + "command. Please run `sqlmesh diff prod` after the migration has completed, before making any new " |
| 48 | + "changes. If any unexpected changes are reported, consider running a forward-only plan to apply these " |
| 49 | + "changes and avoid unnecessary backfills: sqlmesh plan prod --forward-only. " |
| 50 | + "See https://sqlmesh.readthedocs.io/en/stable/concepts/plans/#forward-only-plans for more details.\n" |
| 51 | + ) |
| 52 | + |
| 53 | + for (snapshot,) in engine_adapter.fetchall( |
| 54 | + exp.select("snapshot").from_(snapshots_table), quote_identifiers=True |
| 55 | + ): |
| 56 | + parsed_snapshot = json.loads(snapshot) |
| 57 | + node = parsed_snapshot["node"] |
| 58 | + |
| 59 | + # Standalone audits don't have a data hash, so they're unaffected |
| 60 | + if node.get("source_type") == "audit": |
| 61 | + continue |
| 62 | + |
| 63 | + python_env = node.get("python_env") or {} |
| 64 | + |
| 65 | + if ( |
| 66 | + SQLMESH_BLUEPRINT_VARS in python_env |
| 67 | + or any(v.get("is_metadata") for v in python_env.values()) |
| 68 | + or any(node.get(k) for k in METADATA_HASH_EXPRESSIONS) |
| 69 | + ): |
| 70 | + get_console().log_warning(warning) |
| 71 | + return |
0 commit comments