Implement unique numbers for pipeline runs

schustmi · schustmi · commit b0c1aa3ce121 · 2025-12-03T18:33:38.000+08:00
diff --git a/.github/workflows/require-release-label.yml b/.github/workflows/require-release-label.yml
@@ -1,12 +1,11 @@
+---
 # Requires PRs to have either 'release-notes' or 'no-release-notes' label
 # This ensures release notes are considered for every PR before merging.
 # The check is enforced via branch protection rules on develop.
 name: Require Release Label
-
 on:
   pull_request:
     types: [opened, labeled, unlabeled, synchronize]
-
 jobs:
   check-label:
     if: github.repository == 'zenml-io/zenml'
@@ -17,8 +16,8 @@ jobs:
         with:
           mode: exactly
           count: 1
-          labels: "release-notes, no-release-notes"
-          message: |
+          labels: release-notes, no-release-notes
+          message: |-
             This PR is missing a release label. Please add one of:
             - `release-notes` - if this PR has user-facing changes that should appear in the changelog
             - `no-release-notes` - if this is an internal change (refactoring, tests, CI, etc.)
diff --git a/src/zenml/cli/pipeline.py b/src/zenml/cli/pipeline.py
@@ -740,7 +740,15 @@ def runs() -> None:
 @runs.command("list", help="List all registered pipeline runs.")
 @list_options(
     PipelineRunFilter,
-    default_columns=["id", "run_name", "pipeline", "status", "stack", "owner"],
+    default_columns=[
+        "id",
+        "index",
+        "run_name",
+        "pipeline",
+        "status",
+        "stack",
+        "owner",
+    ],
 )
 def list_pipeline_runs(
     columns: str, output_format: OutputFormat, **kwargs: Any
diff --git a/src/zenml/client.py b/src/zenml/client.py
@@ -4741,6 +4741,7 @@ def list_pipeline_runs(
         linked_to_model_version_id: Optional[Union[str, UUID]] = None,
         orchestrator_run_id: Optional[str] = None,
         status: Optional[str] = None,
+        index: Optional[int] = None,
         start_time: Optional[Union[datetime, str]] = None,
         end_time: Optional[Union[datetime, str]] = None,
         unlisted: Optional[bool] = None,
@@ -4790,6 +4791,7 @@ def list_pipeline_runs(
             orchestrator_run_id: The run id of the orchestrator to filter by.
             name: The name of the run to filter by.
             status: The status of the pipeline run
+            index: The index of the pipeline run
             start_time: The start_time for the pipeline run
             end_time: The end_time for the pipeline run
             unlisted: If the runs should be unlisted or not.
@@ -4839,6 +4841,7 @@ def list_pipeline_runs(
             orchestrator_run_id=orchestrator_run_id,
             stack_id=stack_id,
             status=status,
+            index=index,
             start_time=start_time,
             end_time=end_time,
             tag=tag,
diff --git a/src/zenml/models/v2/core/pipeline_run.py b/src/zenml/models/v2/core/pipeline_run.py
@@ -101,9 +101,8 @@ class PipelineRunRequest(ProjectScopedRequest):
     snapshot: UUID = Field(
         title="The snapshot associated with the pipeline run."
     )
-    pipeline: Optional[UUID] = Field(
+    pipeline: UUID = Field(
         title="The pipeline associated with the pipeline run.",
-        default=None,
     )
     orchestrator_run_id: Optional[str] = Field(
         title="The orchestrator run ID.",
@@ -214,6 +213,9 @@ class PipelineRunResponseBody(ProjectScopedResponseBody):
         default=None,
         title="The reason for the status of the pipeline run.",
     )
+    index: int = Field(
+        title="The unique index of the run within the pipeline."
+    )
 
     model_config = ConfigDict(protected_namespaces=())
 
@@ -391,6 +393,15 @@ def status(self) -> ExecutionStatus:
         """
         return self.get_body().status
 
+    @property
+    def index(self) -> int:
+        """The `index` property.
+
+        Returns:
+            the value of the property.
+        """
+        return self.get_body().index
+
     @property
     def run_metadata(self) -> Dict[str, MetadataType]:
         """The `run_metadata` property.
@@ -672,6 +683,10 @@ class PipelineRunFilter(
         default=None,
         description="Name of the Pipeline Run",
     )
+    index: Optional[int] = Field(
+        default=None,
+        description="The unique index of the run within the pipeline.",
+    )
     orchestrator_run_id: Optional[str] = Field(
         default=None,
         description="Name of the Pipeline Run within the orchestrator",
diff --git a/src/zenml/orchestrators/step_launcher.py b/src/zenml/orchestrators/step_launcher.py
@@ -406,9 +406,7 @@ def _create_or_reuse_run(self) -> Tuple[PipelineRunResponse, bool]:
             orchestrator_run_id=self._orchestrator_run_id,
             project=client.active_project.id,
             snapshot=self._snapshot.id,
-            pipeline=(
-                self._snapshot.pipeline.id if self._snapshot.pipeline else None
-            ),
+            pipeline=self._snapshot.pipeline.id,
             status=ExecutionStatus.RUNNING,
             orchestrator_environment=get_run_environment_dict(),
             start_time=start_time,
diff --git a/src/zenml/pipelines/run_utils.py b/src/zenml/pipelines/run_utils.py
@@ -108,7 +108,7 @@ def create_placeholder_run(
         orchestrator_run_id=orchestrator_run_id,
         project=snapshot.project_id,
         snapshot=snapshot.id,
-        pipeline=snapshot.pipeline.id if snapshot.pipeline else None,
+        pipeline=snapshot.pipeline.id,
         status=ExecutionStatus.INITIALIZING,
         tags=snapshot.pipeline_configuration.tags,
         logs=logs,
diff --git a/src/zenml/zen_stores/migrations/versions/6e4eb89f632d_unique_run_index.py b/src/zenml/zen_stores/migrations/versions/6e4eb89f632d_unique_run_index.py
@@ -0,0 +1,104 @@
+"""Unique run index [6e4eb89f632d].
+
+Revision ID: 6e4eb89f632d
+Revises: 0.92.0
+Create Date: 2025-12-03 17:27:32.828004
+
+"""
+
+import sqlalchemy as sa
+from alembic import op
+
+# revision identifiers, used by Alembic.
+revision = "6e4eb89f632d"
+down_revision = "0.92.0"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    """Upgrade database schema and/or data, creating a new revision."""
+    with op.batch_alter_table("pipeline", schema=None) as batch_op:
+        batch_op.add_column(
+            sa.Column("run_count", sa.Integer(), nullable=True)
+        )
+
+    with op.batch_alter_table("pipeline_run", schema=None) as batch_op:
+        batch_op.add_column(sa.Column("index", sa.Integer(), nullable=True))
+
+    connection = op.get_bind()
+    meta = sa.MetaData()
+    meta.reflect(bind=connection, only=("pipeline_run", "pipeline"))
+    run_table = sa.Table("pipeline_run", meta)
+    pipeline_table = sa.Table("pipeline", meta)
+
+    # These runs shouldn't exist, but just in case
+    connection.execute(
+        sa.update(run_table)
+        .where(run_table.c.pipeline_id.is_(None))
+        .values(index=0)
+    )
+
+    result = connection.execute(
+        sa.select(
+            run_table.c.id,
+            run_table.c.pipeline_id,
+            run_table.c.created,
+        )
+        .where(run_table.c.pipeline_id.is_not(None))
+        .order_by(run_table.c.pipeline_id, run_table.c.created, run_table.c.id)
+    ).fetchall()
+
+    current_pipeline_id = None
+    index_within_pipeline = 0
+    run_updates: list[dict] = []
+    run_counts: dict = {}
+    for row in result:
+        pipeline_id = row.pipeline_id
+        if pipeline_id != current_pipeline_id:
+            current_pipeline_id = pipeline_id
+            index_within_pipeline = 1
+        else:
+            index_within_pipeline += 1
+        run_updates.append({"id": row.id, "index": index_within_pipeline})
+        run_counts[pipeline_id] = index_within_pipeline
+
+    if run_updates:
+        connection.execute(
+            sa.update(run_table)
+            .where(run_table.c.id == sa.bindparam("id"))
+            .values(index=sa.bindparam("index")),
+            run_updates,
+        )
+
+    if run_counts:
+        pipeline_updates = [
+            {"id": pipeline_id, "run_count": run_count}
+            for pipeline_id, run_count in run_counts.items()
+        ]
+        connection.execute(
+            sa.update(pipeline_table)
+            .where(pipeline_table.c.id == sa.bindparam("id"))
+            .values(run_count=sa.bindparam("run_count")),
+            pipeline_updates,
+        )
+
+    # Step 3: Make columns non-nullable
+    with op.batch_alter_table("pipeline_run", schema=None) as batch_op:
+        batch_op.alter_column(
+            "index", existing_type=sa.Integer(), nullable=False
+        )
+
+    with op.batch_alter_table("pipeline", schema=None) as batch_op:
+        batch_op.alter_column(
+            "run_count", existing_type=sa.Integer(), nullable=False
+        )
+
+
+def downgrade() -> None:
+    """Downgrade database schema and/or data back to the previous revision."""
+    with op.batch_alter_table("pipeline_run", schema=None) as batch_op:
+        batch_op.drop_column("number")
+
+    with op.batch_alter_table("pipeline", schema=None) as batch_op:
+        batch_op.drop_column("run_count")
diff --git a/src/zenml/zen_stores/schemas/pipeline_run_schemas.py b/src/zenml/zen_stores/schemas/pipeline_run_schemas.py
@@ -117,6 +117,7 @@ class PipelineRunSchema(NamedSchema, RunMetadataInterface, table=True):
     orchestrator_environment: Optional[str] = Field(
         sa_column=Column(TEXT, nullable=True)
     )
+    index: int = Field(nullable=False)
 
     # Foreign keys
     snapshot_id: Optional[UUID] = build_foreign_key_field(
@@ -343,12 +344,13 @@ def get_query_options(
 
     @classmethod
     def from_request(
-        cls, request: "PipelineRunRequest"
+        cls, request: "PipelineRunRequest", index: int
     ) -> "PipelineRunSchema":
         """Convert a `PipelineRunRequest` to a `PipelineRunSchema`.
 
         Args:
             request: The request to convert.
+            index: The index of the pipeline run.
 
         Returns:
             The created `PipelineRunSchema`.
@@ -379,6 +381,7 @@ def from_request(
             orchestrator_environment=orchestrator_environment,
             start_time=request.start_time,
             status=request.status.value,
+            index=index,
             in_progress=not request.status.is_finished,
             status_reason=request.status_reason,
             pipeline_id=request.pipeline,
@@ -547,6 +550,7 @@ def to_model(
             created=self.created,
             updated=self.updated,
             in_progress=self.in_progress,
+            index=self.index,
         )
         metadata = None
         if include_metadata:
diff --git a/src/zenml/zen_stores/schemas/pipeline_schemas.py b/src/zenml/zen_stores/schemas/pipeline_schemas.py
@@ -83,6 +83,7 @@ class PipelineSchema(NamedSchema, table=True):
         ondelete="SET NULL",
         nullable=True,
     )
+    run_count: int = Field(nullable=False)
 
     # Relationships
     user: Optional["UserSchema"] = Relationship(back_populates="pipelines")
@@ -198,6 +199,7 @@ def from_request(
             description=pipeline_request.description,
             project_id=pipeline_request.project,
             user_id=pipeline_request.user,
+            run_count=0,
         )
 
     def to_model(
diff --git a/src/zenml/zen_stores/sql_zen_store.py b/src/zenml/zen_stores/sql_zen_store.py
@@ -6504,6 +6504,32 @@ def _get_duplicate_run_name_error_message(
             f"For more information on run naming, see: https://docs.zenml.io/concepts/steps_and_pipelines/yaml_configuration#run-name"
         )
 
+    def _get_next_run_index(self, pipeline_id: UUID, session: Session) -> int:
+        """Get the next run index for a pipeline.
+
+        Args:
+            pipeline_id: The ID of the pipeline to get the next run index for.
+            session: SQLAlchemy session.
+
+        Returns:
+            The next run index for the pipeline.
+        """
+        # Commit before acquiring the exclusive lock on the pipeline
+        session.commit()
+        current_run_index = session.exec(
+            select(PipelineSchema.run_count)
+            .where(PipelineSchema.id == pipeline_id)
+            .with_for_update()
+        ).one()
+        new_run_index = current_run_index + 1
+        session.execute(
+            update(PipelineSchema)
+            .where(col(PipelineSchema.id) == pipeline_id)
+            .values(run_count=new_run_index)
+        )
+        session.commit()
+        return new_run_index
+
     def _create_run(
         self, pipeline_run: PipelineRunRequest, session: Session
     ) -> PipelineRunResponse:
@@ -6524,7 +6550,7 @@ def _create_run(
                 can not be created.
         """
         self._set_request_user_id(request_model=pipeline_run, session=session)
-        self._get_reference_schema_by_id(
+        snapshot = self._get_reference_schema_by_id(
             resource=pipeline_run,
             reference_schema=PipelineSnapshotSchema,
             reference_id=pipeline_run.snapshot,
@@ -6538,7 +6564,10 @@ def _create_run(
             session=session,
         )
 
-        new_run = PipelineRunSchema.from_request(pipeline_run)
+        index = self._get_next_run_index(
+            pipeline_id=snapshot.pipeline_id, session=session
+        )
+        new_run = PipelineRunSchema.from_request(pipeline_run, index=index)
 
         session.add(new_run)
 

Original file line number	Diff line number	Diff line change
`@@ -83,6 +83,7 @@ class PipelineSchema(NamedSchema, table=True):`
`83`	`83`	`ondelete="SET NULL",`
`84`	`84`	`nullable=True,`
`85`	`85`	`)`
	`86`	`+ run_count: int = Field(nullable=False)`
`86`	`87`
`87`	`88`	`# Relationships`
`88`	`89`	`user: Optional["UserSchema"] = Relationship(back_populates="pipelines")`
`@@ -198,6 +199,7 @@ def from_request(`
`198`	`199`	`description=pipeline_request.description,`
`199`	`200`	`project_id=pipeline_request.project,`
`200`	`201`	`user_id=pipeline_request.user,`
	`202`	`+ run_count=0,`
`201`	`203`	`)`
`202`	`204`
`203`	`205`	`def to_model(`