Add triggers class and selected snapshot triggers

treysp · treysp · commit 1b0ad0ec74ac · 2025-07-31T17:54:28.000-05:00
diff --git a/sqlmesh/core/console.py b/sqlmesh/core/console.py
@@ -37,7 +37,12 @@
     SnapshotId,
     SnapshotInfoLike,
 )
-from sqlmesh.core.snapshot.definition import Interval, Intervals, SnapshotTableInfo
+from sqlmesh.core.snapshot.definition import (
+    Interval,
+    Intervals,
+    SnapshotTableInfo,
+    SnapshotEvaluationTriggers,
+)
 from sqlmesh.core.test import ModelTest
 from sqlmesh.utils import rich as srich
 from sqlmesh.utils import Verbosity
@@ -428,7 +433,7 @@ def update_snapshot_evaluation_progress(
         num_audits_passed: int,
         num_audits_failed: int,
         audit_only: bool = False,
-        auto_restatement_triggers: t.Optional[t.List[SnapshotId]] = None,
+        snapshot_evaluation_triggers: t.Optional[SnapshotEvaluationTriggers] = None,
     ) -> None:
         """Updates the snapshot evaluation progress."""
 
@@ -576,7 +581,7 @@ def update_snapshot_evaluation_progress(
         num_audits_passed: int,
         num_audits_failed: int,
         audit_only: bool = False,
-        auto_restatement_triggers: t.Optional[t.List[SnapshotId]] = None,
+        snapshot_evaluation_triggers: t.Optional[SnapshotEvaluationTriggers] = None,
     ) -> None:
         pass
 
@@ -1058,7 +1063,7 @@ def update_snapshot_evaluation_progress(
         num_audits_passed: int,
         num_audits_failed: int,
         audit_only: bool = False,
-        auto_restatement_triggers: t.Optional[t.List[SnapshotId]] = None,
+        snapshot_evaluation_triggers: t.Optional[SnapshotEvaluationTriggers] = None,
     ) -> None:
         """Update the snapshot evaluation progress."""
         if (
@@ -3656,7 +3661,7 @@ def update_snapshot_evaluation_progress(
         num_audits_passed: int,
         num_audits_failed: int,
         audit_only: bool = False,
-        auto_restatement_triggers: t.Optional[t.List[SnapshotId]] = None,
+        snapshot_evaluation_triggers: t.Optional[SnapshotEvaluationTriggers] = None,
     ) -> None:
         view_name, loaded_batches = self.evaluation_batch_progress[snapshot.snapshot_id]
 
@@ -3826,12 +3831,15 @@ def update_snapshot_evaluation_progress(
         num_audits_passed: int,
         num_audits_failed: int,
         audit_only: bool = False,
-        auto_restatement_triggers: t.Optional[t.List[SnapshotId]] = None,
+        snapshot_evaluation_triggers: t.Optional[SnapshotEvaluationTriggers] = None,
     ) -> None:
         message = f"Evaluating {snapshot.name} | batch={batch_idx} | duration={duration_ms}ms | num_audits_passed={num_audits_passed} | num_audits_failed={num_audits_failed}"
 
-        if auto_restatement_triggers:
-            message += f" | auto_restatement_triggers={','.join(trigger.name for trigger in auto_restatement_triggers)}"
+        if snapshot_evaluation_triggers:
+            if snapshot_evaluation_triggers.auto_restatement_triggers:
+                message += f" | auto_restatement_triggers={','.join(trigger.name for trigger in snapshot_evaluation_triggers.auto_restatement_triggers)}"
+            if snapshot_evaluation_triggers.select_snapshot_triggers:
+                message += f" | select_snapshot_triggers={','.join(trigger.name for trigger in snapshot_evaluation_triggers.select_snapshot_triggers)}"
 
         if audit_only:
             message = f"Auditing {snapshot.name} duration={duration_ms}ms | num_audits_passed={num_audits_passed} | num_audits_failed={num_audits_failed}"
diff --git a/sqlmesh/core/context.py b/sqlmesh/core/context.py
@@ -2296,11 +2296,9 @@ def check_intervals(
         }
 
         if select_models:
-            selected: t.Collection[str] = self._select_models_for_run(
-                select_models, True, snapshots.values()
-            )
+            selected, _ = self._select_models_for_run(select_models, True, snapshots.values())
         else:
-            selected = snapshots.keys()
+            selected = t.cast(t.Set[str], snapshots.keys())
 
         results = {}
         execution_context = self.execution_context(snapshots=snapshots)
@@ -2450,8 +2448,9 @@ def _run(
         scheduler = self.scheduler(environment=environment)
         snapshots = scheduler.snapshots
 
+        select_models_auto_upstream = None
         if select_models is not None:
-            select_models = self._select_models_for_run(
+            select_models, select_models_auto_upstream = self._select_models_for_run(
                 select_models, no_auto_upstream, snapshots.values()
             )
 
@@ -2463,6 +2462,7 @@ def _run(
             ignore_cron=ignore_cron,
             circuit_breaker=circuit_breaker,
             selected_snapshots=select_models,
+            selected_snapshots_auto_upstream=select_models_auto_upstream,
             auto_restatement_enabled=environment.lower() == c.PROD,
             run_environment_statements=True,
         )
@@ -2878,7 +2878,7 @@ def _select_models_for_run(
         select_models: t.Collection[str],
         no_auto_upstream: bool,
         snapshots: t.Collection[Snapshot],
-    ) -> t.Set[str]:
+    ) -> t.Tuple[t.Set[str], t.Set[str]]:
         models: UniqueKeyDict[str, Model] = UniqueKeyDict(
             "models", **{s.name: s.model for s in snapshots if s.is_model}
         )
@@ -2888,8 +2888,8 @@ def _select_models_for_run(
         model_selector = self._new_selector(models=models, dag=dag)
         result = set(model_selector.expand_model_selections(select_models))
         if not no_auto_upstream:
-            result = set(dag.subdag(*result))
-        return result
+            result_with_upstream = set(dag.subdag(*result))
+        return result, result_with_upstream - result
 
     @cached_property
     def _project_type(self) -> str:
diff --git a/sqlmesh/core/scheduler.py b/sqlmesh/core/scheduler.py
@@ -29,6 +29,7 @@
 from sqlmesh.core.snapshot.definition import check_ready_intervals
 from sqlmesh.core.snapshot.definition import (
     Interval,
+    SnapshotEvaluationTriggers,
     expand_range,
     parent_snapshots_by_name,
 )
@@ -223,6 +224,7 @@ def run(
         ignore_cron: bool = False,
         end_bounded: bool = False,
         selected_snapshots: t.Optional[t.Set[str]] = None,
+        selected_snapshots_auto_upstream: t.Optional[t.Set[str]] = None,
         circuit_breaker: t.Optional[t.Callable[[], bool]] = None,
         deployability_index: t.Optional[DeployabilityIndex] = None,
         auto_restatement_enabled: bool = False,
@@ -239,6 +241,7 @@ def run(
             ignore_cron=ignore_cron,
             end_bounded=end_bounded,
             selected_snapshots=selected_snapshots,
+            selected_snapshots_auto_upstream=selected_snapshots_auto_upstream,
             circuit_breaker=circuit_breaker,
             deployability_index=deployability_index,
             auto_restatement_enabled=auto_restatement_enabled,
@@ -374,7 +377,7 @@ def run_merged_intervals(
         run_environment_statements: bool = False,
         audit_only: bool = False,
         restatements: t.Optional[t.Dict[SnapshotId, Interval]] = None,
-        auto_restatement_triggers: t.Dict[SnapshotId, t.List[SnapshotId]] = {},
+        snapshot_evaluation_triggers: t.Dict[SnapshotId, SnapshotEvaluationTriggers] = {},
     ) -> t.Tuple[t.List[NodeExecutionFailedError[SchedulingUnit]], t.List[SchedulingUnit]]:
         """Runs precomputed batches of missing intervals.
 
@@ -477,7 +480,9 @@ def evaluate_node(node: SchedulingUnit) -> None:
                     evaluation_duration_ms,
                     num_audits - num_audits_failed,
                     num_audits_failed,
-                    auto_restatement_triggers=auto_restatement_triggers.get(snapshot.snapshot_id),
+                    snapshot_evaluation_triggers=snapshot_evaluation_triggers.get(
+                        snapshot.snapshot_id
+                    ),
                 )
 
         try:
@@ -588,6 +593,7 @@ def _run_or_audit(
         ignore_cron: bool = False,
         end_bounded: bool = False,
         selected_snapshots: t.Optional[t.Set[str]] = None,
+        selected_snapshots_auto_upstream: t.Optional[t.Set[str]] = None,
         circuit_breaker: t.Optional[t.Callable[[], bool]] = None,
         deployability_index: t.Optional[DeployabilityIndex] = None,
         auto_restatement_enabled: bool = False,
@@ -611,6 +617,7 @@ def _run_or_audit(
             end_bounded: If set to true, the evaluated intervals will be bounded by the target end date, disregarding lookback,
                 allow_partials, and other attributes that could cause the intervals to exceed the target end date.
             selected_snapshots: A set of snapshot names to run. If not provided, all snapshots will be run.
+            selected_snapshots_auto_upstream: The set of selected_snapshots that were automatically added because they're upstream of a selected snapshot.
             circuit_breaker: An optional handler which checks if the run should be aborted.
             deployability_index: Determines snapshots that are deployable in the context of this render.
             auto_restatement_enabled: Whether to enable auto restatements.
@@ -666,6 +673,42 @@ def _run_or_audit(
         if not merged_intervals:
             return CompletionStatus.NOTHING_TO_DO
 
+        merged_intervals_snapshots = {
+            snapshot.snapshot_id: snapshot for snapshot in merged_intervals.keys()
+        }
+        select_snapshot_triggers: t.Dict[SnapshotId, t.List[SnapshotId]] = {}
+        if selected_snapshots and selected_snapshots_auto_upstream:
+            # actually selected snapshots are their own triggers
+            selected_snapshots_no_auto_upstream = (
+                selected_snapshots - selected_snapshots_auto_upstream
+            )
+            select_snapshot_triggers = {
+                s_id: [s_id]
+                for s_id in [
+                    snapshot_id
+                    for snapshot_id in merged_intervals_snapshots
+                    if snapshot_id.name in selected_snapshots_no_auto_upstream
+                ]
+            }
+
+            # trace upstream by reversing dag of all snapshots to evaluate
+            reversed_intervals_dag = snapshots_to_dag(merged_intervals_snapshots.values()).reversed
+            for s_id in reversed_intervals_dag:
+                if s_id not in select_snapshot_triggers:
+                    triggers = []
+                    for parent_s_id in merged_intervals_snapshots[s_id].parents:
+                        triggers.extend(select_snapshot_triggers[parent_s_id])
+                    select_snapshot_triggers[s_id] = list(dict.fromkeys(triggers))
+
+        all_snapshot_triggers: t.Dict[SnapshotId, SnapshotEvaluationTriggers] = {
+            s_id: SnapshotEvaluationTriggers(
+                ignore_cron=ignore_cron,
+                auto_restatement_triggers=auto_restatement_triggers.get(s_id, []),
+                select_snapshot_triggers=select_snapshot_triggers.get(s_id, []),
+            )
+            for s_id in merged_intervals_snapshots
+            if ignore_cron or s_id in auto_restatement_triggers or s_id in select_snapshot_triggers
+        }
         errors, _ = self.run_merged_intervals(
             merged_intervals=merged_intervals,
             deployability_index=deployability_index,
@@ -677,7 +720,7 @@ def _run_or_audit(
             run_environment_statements=run_environment_statements,
             audit_only=audit_only,
             restatements=remove_intervals,
-            auto_restatement_triggers=auto_restatement_triggers,
+            snapshot_evaluation_triggers=all_snapshot_triggers,
         )
 
         return CompletionStatus.FAILURE if errors else CompletionStatus.SUCCESS
diff --git a/sqlmesh/core/snapshot/definition.py b/sqlmesh/core/snapshot/definition.py
@@ -325,6 +325,14 @@ def table_name_for_environment(
         return table
 
 
+class SnapshotEvaluationTriggers(PydanticModel):
+    ignore_cron: bool
+    auto_restatement_triggers: t.List[SnapshotId] = []
+    select_snapshot_triggers: t.List[SnapshotId] = []
+    directly_modified_triggers: t.List[SnapshotId] = []
+    manual_restatement_triggers: t.List[SnapshotId] = []
+
+
 class SnapshotInfoMixin(ModelKindMixin):
     name: str
     dev_version_: t.Optional[str]
diff --git a/web/server/console.py b/web/server/console.py
@@ -9,7 +9,8 @@
 from sqlmesh.core.console import TerminalConsole
 from sqlmesh.core.environment import EnvironmentNamingInfo
 from sqlmesh.core.plan.definition import EvaluatablePlan
-from sqlmesh.core.snapshot import Snapshot, SnapshotInfoLike, SnapshotTableInfo, SnapshotId
+from sqlmesh.core.snapshot import Snapshot, SnapshotInfoLike, SnapshotTableInfo
+from sqlmesh.core.snapshot.definition import SnapshotEvaluationTriggers
 from sqlmesh.core.test import ModelTest
 from sqlmesh.core.test.result import ModelTextTestResult
 from sqlmesh.utils.date import now_timestamp
@@ -142,7 +143,7 @@ def update_snapshot_evaluation_progress(
         num_audits_passed: int,
         num_audits_failed: int,
         audit_only: bool = False,
-        auto_restatement_triggers: t.Optional[t.List[SnapshotId]] = None,
+        snapshot_evaluation_triggers: t.Optional[SnapshotEvaluationTriggers] = None,
     ) -> None:
         if audit_only:
             return