rapidsai
diff --git a/‎python/cudf_polars/cudf_polars/containers/dataframe.py‎
Lines changed: 99 additions & 28 deletions b/‎python/cudf_polars/cudf_polars/containers/dataframe.py‎
Lines changed: 99 additions & 28 deletions
diff --git a/‎python/cudf_polars/cudf_polars/dsl/expressions/rolling.py‎
Lines changed: 1 addition & 1 deletion b/‎python/cudf_polars/cudf_polars/dsl/expressions/rolling.py‎
Lines changed: 1 addition & 1 deletion
@@ -20,8 +20,9 @@
 
     from typing_extensions import Any, CapsuleType, Self
 
-    from cudf_polars.typing import ColumnOptions, DataFrameHeader, PolarsDataType, Slice
+    from rmm.pylibrmm.stream import Stream
 
+    from cudf_polars.typing import ColumnOptions, DataFrameHeader, PolarsDataType, Slice
 
 __all__: list[str] = ["DataFrame"]
 
@@ -78,19 +79,21 @@ class DataFrame:
     column_map: dict[str, Column]
     table: plc.Table
     columns: list[NamedColumn]
+    stream: Stream
 
-    def __init__(self, columns: Iterable[Column]) -> None:
+    def __init__(self, columns: Iterable[Column], stream: Stream) -> None:
         columns = list(columns)
         if any(c.name is None for c in columns):
             raise ValueError("All columns must have a name")
         self.columns = [cast(NamedColumn, c) for c in columns]
         self.dtypes = [c.dtype for c in self.columns]
         self.column_map = {c.name: c for c in self.columns}
         self.table = plc.Table([c.obj for c in self.columns])
+        self.stream = stream
 
     def copy(self) -> Self:
         """Return a shallow copy of self."""
-        return type(self)(c.copy() for c in self.columns)
+        return type(self)((c.copy() for c in self.columns), stream=self.stream)
 
     def to_polars(self) -> pl.DataFrame:
         """Convert to a polars DataFrame."""
@@ -135,30 +138,42 @@ def num_rows(self) -> int:
         return self.table.num_rows() if self.column_map else 0
 
     @classmethod
-    def from_polars(cls, df: pl.DataFrame) -> Self:
+    def from_polars(cls, df: pl.DataFrame, stream: Stream) -> Self:
         """
         Create from a polars dataframe.
 
         Parameters
         ----------
         df
             Polars dataframe to convert
+        stream
+            CUDA stream used for device memory operations and kernel launches
+            on this dataframe.
 
         Returns
         -------
         New dataframe representing the input.
         """
-        plc_table = plc.Table.from_arrow(df)
+        plc_table = plc.Table.from_arrow(df, stream=stream)
         return cls(
-            Column(d_col, name=name, dtype=DataType(h_col.dtype)).copy_metadata(h_col)
-            for d_col, h_col, name in zip(
-                plc_table.columns(), df.iter_columns(), df.columns, strict=True
-            )
+            (
+                Column(d_col, name=name, dtype=DataType(h_col.dtype)).copy_metadata(
+                    h_col
+                )
+                for d_col, h_col, name in zip(
+                    plc_table.columns(), df.iter_columns(), df.columns, strict=True
+                )
+            ),
+            stream=stream,
         )
 
     @classmethod
     def from_table(
-        cls, table: plc.Table, names: Sequence[str], dtypes: Sequence[DataType]
+        cls,
+        table: plc.Table,
+        names: Sequence[str],
+        dtypes: Sequence[DataType],
+        stream: Stream,
     ) -> Self:
         """
         Create from a pylibcudf table.
@@ -171,6 +186,10 @@ def from_table(
             Names for the columns
         dtypes
             Dtypes for the columns
+        stream
+            CUDA stream used for device memory operations and kernel launches
+            on this dataframe. The caller is responsible for ensuring that
+            the data in ``table`` is valid on ``stream``.
 
         Returns
         -------
@@ -185,15 +204,19 @@ def from_table(
         if table.num_columns() != len(names):
             raise ValueError("Mismatching name and table length.")
         return cls(
-            Column(c, name=name, dtype=dtype)
-            for c, name, dtype in zip(table.columns(), names, dtypes, strict=True)
+            (
+                Column(c, name=name, dtype=dtype)
+                for c, name, dtype in zip(table.columns(), names, dtypes, strict=True)
+            ),
+            stream=stream,
         )
 
     @classmethod
     def deserialize(
         cls,
         header: DataFrameHeader,
         frames: tuple[memoryview[bytes], plc.gpumemoryview],
+        stream: Stream,
     ) -> Self:
         """
         Create a DataFrame from a serialized representation returned by `.serialize()`.
@@ -204,6 +227,10 @@ def deserialize(
             The (unpickled) metadata required to reconstruct the object.
         frames
             Two-tuple of frames (a memoryview and a gpumemoryview).
+        stream
+            CUDA stream used for device memory operations and kernel launches
+            on this dataframe. The caller is responsible for ensuring that
+            the data in ``frames`` is valid on ``stream``.
 
         Returns
         -------
@@ -212,11 +239,15 @@ def deserialize(
         """
         packed_metadata, packed_gpu_data = frames
         table = plc.contiguous_split.unpack_from_memoryviews(
-            packed_metadata, packed_gpu_data
+            packed_metadata,
+            packed_gpu_data,
         )
         return cls(
-            Column(c, **Column.deserialize_ctor_kwargs(kw))
-            for c, kw in zip(table.columns(), header["columns_kwargs"], strict=True)
+            (
+                Column(c, **Column.deserialize_ctor_kwargs(kw))
+                for c, kw in zip(table.columns(), header["columns_kwargs"], strict=True)
+            ),
+            stream=stream,
         )
 
     def serialize(
@@ -240,7 +271,7 @@ def serialize(
         frames
             Two-tuple of frames suitable for passing to `plc.contiguous_split.unpack_from_memoryviews`
         """
-        packed = plc.contiguous_split.pack(self.table)
+        packed = plc.contiguous_split.pack(self.table, stream=self.stream)
 
         # Keyword arguments for `Column.__init__`.
         columns_kwargs: list[ColumnOptions] = [
@@ -278,12 +309,19 @@ def sorted_like(
             raise ValueError("Can only copy from identically named frame")
         subset = self.column_names_set if subset is None else subset
         return type(self)(
-            c.sorted_like(other) if c.name in subset else c
-            for c, other in zip(self.columns, like.columns, strict=True)
+            (
+                c.sorted_like(other) if c.name in subset else c
+                for c, other in zip(self.columns, like.columns, strict=True)
+            ),
+            stream=self.stream,
         )
 
     def with_columns(
-        self, columns: Iterable[Column], *, replace_only: bool = False
+        self,
+        columns: Iterable[Column],
+        *,
+        replace_only: bool = False,
+        stream: Stream,
     ) -> Self:
         """
         Return a new dataframe with extra columns.
@@ -294,6 +332,13 @@ def with_columns(
             Columns to add
         replace_only
             If true, then only replacements are allowed (matching by name).
+        stream
+            CUDA stream used for device memory operations and kernel launches.
+            The caller is responsible for ensuring that
+
+            1. The data in ``columns`` is valid on ``stream``.
+            2. No additional operations occur on ``self.stream`` with the
+               original data in ``self``.
 
         Returns
         -------
@@ -307,33 +352,57 @@ def with_columns(
         new = {c.name: c for c in columns}
         if replace_only and not self.column_names_set.issuperset(new.keys()):
             raise ValueError("Cannot replace with non-existing names")
-        return type(self)((self.column_map | new).values())
+        return type(self)((self.column_map | new).values(), stream=stream)
 
     def discard_columns(self, names: Set[str]) -> Self:
         """Drop columns by name."""
-        return type(self)(column for column in self.columns if column.name not in names)
+        return type(self)(
+            (column for column in self.columns if column.name not in names),
+            stream=self.stream,
+        )
 
     def select(self, names: Sequence[str] | Mapping[str, Any]) -> Self:
         """Select columns by name returning DataFrame."""
         try:
-            return type(self)(self.column_map[name] for name in names)
+            return type(self)(
+                (self.column_map[name] for name in names), stream=self.stream
+            )
         except KeyError as e:
             raise ValueError("Can't select missing names") from e
 
     def rename_columns(self, mapping: Mapping[str, str]) -> Self:
         """Rename some columns."""
-        return type(self)(c.rename(mapping.get(c.name, c.name)) for c in self.columns)
+        return type(self)(
+            (c.rename(mapping.get(c.name, c.name)) for c in self.columns),
+            stream=self.stream,
+        )
 
     def select_columns(self, names: Set[str]) -> list[Column]:
         """Select columns by name."""
         return [c for c in self.columns if c.name in names]
 
     def filter(self, mask: Column) -> Self:
-        """Return a filtered table given a mask."""
-        table = plc.stream_compaction.apply_boolean_mask(self.table, mask.obj)
+        """
+        Return a filtered table given a mask.
+
+        Parameters
+        ----------
+        mask
+            Boolean mask to apply to the dataframe. It is the caller's
+            responsibility to ensure that ``mask`` is valid on ``self.stream``.
+            A mask that is derived from ``self`` via a computation on ``self.stream``
+            automatically satisfies this requirement.
+
+        Returns
+        -------
+        Filtered dataframe
+        """
+        table = plc.stream_compaction.apply_boolean_mask(
+            self.table, mask.obj, stream=self.stream
+        )
         return (
             type(self)
-            .from_table(table, self.column_names, self.dtypes)
+            .from_table(table, self.column_names, self.dtypes, self.stream)
             .sorted_like(self)
         )
 
@@ -354,10 +423,12 @@ def slice(self, zlice: Slice | None) -> Self:
         if zlice is None:
             return self
         (table,) = plc.copying.slice(
-            self.table, conversion.from_polars_slice(zlice, num_rows=self.num_rows)
+            self.table,
+            conversion.from_polars_slice(zlice, num_rows=self.num_rows),
+            stream=self.stream,
         )
         return (
             type(self)
-            .from_table(table, self.column_names, self.dtypes)
+            .from_table(table, self.column_names, self.dtypes, self.stream)
             .sorted_like(self)
         )
@@ -854,5 +854,5 @@ def do_evaluate(  # noqa: D102
 
         # Create a temporary DataFrame with the broadcasted columns named by their
         # placeholder names from agg decomposition, then evaluate the post-expression.
-        df = DataFrame(broadcasted_cols)
+        df = DataFrame(broadcasted_cols, stream=df.stream)
         return self.post.value.evaluate(df, context=ExecutionContext.FRAME)