Skip to content

Commit 6eb3b9c

Browse files
authored
Thread missing streams in column/table view creation to char size calculation (#20351)
All of the APIs for creating pylibcudf Columns from arbitrary other owners and a libcudf `column_view` of type string will eventually call into `strings_column_view::chars_size`, which is stream-ordered and needs an input stream. This PR pipes the necessary streams through. Authors: - Vyas Ramasubramani (https://github.com/vyasr) - Matthew Murray (https://github.com/Matt711) Approvers: - Bradley Dice (https://github.com/bdice) - Matthew Roeschke (https://github.com/mroeschke) - Lawrence Mitchell (https://github.com/wence-) URL: #20351
1 parent 0693083 commit 6eb3b9c

File tree

6 files changed

+69
-23
lines changed

6 files changed

+69
-23
lines changed

python/pylibcudf/pylibcudf/column.pxd

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ cdef class OwnerWithCAI:
2626
cdef dict cai
2727

2828
@staticmethod
29-
cdef create(column_view cv, object owner)
29+
cdef create(column_view cv, object owner, Stream stream)
3030

3131

3232
cdef class OwnerMaskWithCAI:
@@ -68,7 +68,11 @@ cdef class Column:
6868
cdef Column from_column_view(const column_view& cv, Column owner)
6969

7070
@staticmethod
71-
cdef Column from_column_view_of_arbitrary(const column_view& cv, object owner)
71+
cdef Column from_column_view_of_arbitrary(
72+
const column_view& cv,
73+
object owner,
74+
Stream stream,
75+
)
7276

7377
@staticmethod
7478
cdef Column _wrap_nested_list_column(

python/pylibcudf/pylibcudf/column.pyx

Lines changed: 27 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ cdef class _ArrowColumnHolder:
9393
cdef class OwnerWithCAI:
9494
"""An interface for column view's data with gpumemoryview via CAI."""
9595
@staticmethod
96-
cdef create(column_view cv, object owner):
96+
cdef create(column_view cv, object owner, Stream stream):
9797
obj = OwnerWithCAI()
9898
obj.owner = owner
9999
# The default size of 0 will be applied for any type that stores data in the
@@ -105,8 +105,7 @@ cdef class OwnerWithCAI:
105105
# Cast to Python integers before multiplying to avoid overflow.
106106
size = int(cv.size()) * int(cpp_size_of(cv.type()))
107107
elif cv.type().id() == type_id.STRING:
108-
# TODO: stream-ordered
109-
size = strings_column_view(cv).chars_size(_get_stream().view())
108+
size = strings_column_view(cv).chars_size(stream.view())
110109

111110
obj.cai = {
112111
"shape": (size,),
@@ -453,7 +452,11 @@ cdef class Column:
453452
)
454453
result.col.swap(c_result)
455454

456-
return Column.from_column_view_of_arbitrary(result.col.get().view(), result)
455+
return Column.from_column_view_of_arbitrary(
456+
result.col.get().view(),
457+
result,
458+
stream,
459+
)
457460
elif hasattr(obj, "__arrow_c_array__"):
458461
schema, h_array = obj.__arrow_c_array__()
459462
c_schema = <ArrowSchema*>PyCapsule_GetPointer(schema, "arrow_schema")
@@ -470,7 +473,11 @@ cdef class Column:
470473
)
471474
result.col.swap(c_result)
472475

473-
return Column.from_column_view_of_arbitrary(result.col.get().view(), result)
476+
return Column.from_column_view_of_arbitrary(
477+
result.col.get().view(),
478+
result,
479+
stream,
480+
)
474481
elif hasattr(obj, "__arrow_c_stream__"):
475482
arrow_stream = obj.__arrow_c_stream__()
476483
c_arrow_stream = (
@@ -490,7 +497,11 @@ cdef class Column:
490497
)
491498
result.col.swap(c_result)
492499

493-
return Column.from_column_view_of_arbitrary(result.col.get().view(), result)
500+
return Column.from_column_view_of_arbitrary(
501+
result.col.get().view(),
502+
result,
503+
stream,
504+
)
494505
elif hasattr(obj, "__arrow_c_device_stream__"):
495506
# TODO: When we add support for this case, it should be moved above
496507
# the __arrow_c_array__ case since we should prioritize device
@@ -716,7 +727,11 @@ cdef class Column:
716727
# from_column_view, but this does not work due to
717728
# https://github.com/cython/cython/issues/6740
718729
@staticmethod
719-
cdef Column from_column_view_of_arbitrary(const column_view& cv, object owner):
730+
cdef Column from_column_view_of_arbitrary(
731+
const column_view& cv,
732+
object owner,
733+
Stream stream,
734+
):
720735
"""Create a Column from a libcudf column_view into an arbitrary owner.
721736
722737
This method accepts shared ownership of the underlying data from the owner.
@@ -730,16 +745,19 @@ cdef class Column:
730745
"""
731746
# For efficiency, prohibit calling this overload with a Column owner.
732747
assert not isinstance(owner, Column)
748+
stream = _get_stream(stream)
733749

734750
children = []
735751
cdef size_type i
736752
if cv.num_children() != 0:
737753
for i in range(cv.num_children()):
738754
children.append(
739-
Column.from_column_view_of_arbitrary(cv.child(i), owner)
755+
Column.from_column_view_of_arbitrary(cv.child(i), owner, stream)
740756
)
741757

742-
cdef gpumemoryview owning_data = gpumemoryview(OwnerWithCAI.create(cv, owner))
758+
cdef gpumemoryview owning_data = gpumemoryview(
759+
OwnerWithCAI.create(cv, owner, stream)
760+
)
743761
cdef gpumemoryview owning_mask = None
744762
if cv.null_count() > 0:
745763
owning_mask = gpumemoryview(OwnerMaskWithCAI.create(cv, owner))

python/pylibcudf/pylibcudf/contiguous_split.pxd

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,9 +54,10 @@ cdef class ChunkedPack:
5454

5555
cpdef PackedColumns pack(Table input)
5656

57-
cpdef Table unpack(PackedColumns input)
57+
cpdef Table unpack(PackedColumns input, Stream stream=*)
5858

5959
cpdef Table unpack_from_memoryviews(
6060
memoryview metadata,
6161
gpumemoryview gpu_data,
62+
Stream stream=*,
6263
)

python/pylibcudf/pylibcudf/contiguous_split.pyx

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -350,7 +350,7 @@ cpdef PackedColumns pack(Table input, Stream stream=None, DeviceMemoryResource m
350350
return PackedColumns.from_libcudf(move(pack), stream, mr)
351351

352352

353-
cpdef Table unpack(PackedColumns input):
353+
cpdef Table unpack(PackedColumns input, Stream stream=None):
354354
"""Deserialize the result of `pack`.
355355
356356
Copies the result of a serialized table into a table.
@@ -361,21 +361,25 @@ cpdef Table unpack(PackedColumns input):
361361
----------
362362
input : PackedColumns
363363
The packed columns to unpack.
364+
stream : Stream | None
365+
CUDA stream on which to perform the operation.
364366
365367
Returns
366368
-------
367369
Table
368370
Copy of the packed columns.
369371
"""
370372
cdef table_view v
373+
stream = _get_stream(stream)
371374
with nogil:
372375
v = cpp_unpack(dereference(input.c_obj))
373-
return Table.from_table_view_of_arbitrary(v, input)
376+
return Table.from_table_view_of_arbitrary(v, input, stream)
374377

375378

376379
cpdef Table unpack_from_memoryviews(
377380
memoryview metadata,
378381
gpumemoryview gpu_data,
382+
Stream stream=None,
379383
):
380384
"""Deserialize the result of `pack`.
381385
@@ -389,20 +393,23 @@ cpdef Table unpack_from_memoryviews(
389393
The packed metadata to unpack.
390394
gpu_data : gpumemoryview
391395
The packed gpu_data to unpack.
396+
stream : Stream | None
397+
CUDA stream on which to perform the operation.
392398
393399
Returns
394400
-------
395401
Table
396402
Copy of the packed columns.
397403
"""
404+
stream = _get_stream(stream)
398405
if metadata.nbytes == 0:
399406
if gpu_data.__cuda_array_interface__["data"][0] != 0:
400407
raise ValueError("Expected an empty gpu_data from unpacking an empty table")
401-
# For an empty table we just attach the default stream and mr since neither will
402-
# be used for any operations.
408+
# For an empty table we just attach the default mr since it will not be
409+
# used for any operations.
403410
return Table.from_libcudf(
404411
make_unique[table](table_view()),
405-
_get_stream(),
412+
stream,
406413
_get_memory_resource(),
407414
)
408415

@@ -414,4 +421,4 @@ cpdef Table unpack_from_memoryviews(
414421
cdef table_view v
415422
with nogil:
416423
v = cpp_unpack(metadata_ptr, gpu_data_ptr)
417-
return Table.from_table_view_of_arbitrary(v, gpu_data)
424+
return Table.from_table_view_of_arbitrary(v, gpu_data, stream)

python/pylibcudf/pylibcudf/table.pxd

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,10 @@ cdef class Table:
2828
cdef Table from_table_view(const table_view& tv, Table owner)
2929

3030
@staticmethod
31-
cdef Table from_table_view_of_arbitrary(const table_view& tv, object owner)
31+
cdef Table from_table_view_of_arbitrary(
32+
const table_view& tv,
33+
object owner,
34+
Stream stream,
35+
)
3236

3337
cpdef list columns(self)

python/pylibcudf/pylibcudf/table.pyx

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,11 @@ cdef class Table:
175175
)
176176
result.tbl.swap(c_result)
177177

178-
return Table.from_table_view_of_arbitrary(result.tbl.get().view(), result)
178+
return Table.from_table_view_of_arbitrary(
179+
result.tbl.get().view(),
180+
result,
181+
stream,
182+
)
179183
elif hasattr(obj, "__arrow_c_stream__"):
180184
arrow_stream = obj.__arrow_c_stream__()
181185
c_stream = (
@@ -194,7 +198,11 @@ cdef class Table:
194198
)
195199
result.tbl.swap(c_result)
196200

197-
return Table.from_table_view_of_arbitrary(result.tbl.get().view(), result)
201+
return Table.from_table_view_of_arbitrary(
202+
result.tbl.get().view(),
203+
result,
204+
stream,
205+
)
198206
elif hasattr(obj, "__arrow_c_device_stream__"):
199207
# TODO: When we add support for this case, it should be moved above
200208
# the __arrow_c_stream__ case since we should prioritize device
@@ -262,7 +270,11 @@ cdef class Table:
262270
# from_table_view, but this does not work due to
263271
# https://github.com/cython/cython/issues/6740
264272
@staticmethod
265-
cdef Table from_table_view_of_arbitrary(const table_view& tv, object owner):
273+
cdef Table from_table_view_of_arbitrary(
274+
const table_view& tv,
275+
object owner,
276+
Stream stream,
277+
):
266278
"""Create a Table from a libcudf table_view into an arbitrary owner.
267279
268280
This method accepts shared ownership of the underlying data from the owner.
@@ -279,7 +291,7 @@ cdef class Table:
279291
assert not isinstance(owner, Table)
280292
cdef int i
281293
return Table([
282-
Column.from_column_view_of_arbitrary(tv.column(i), owner)
294+
Column.from_column_view_of_arbitrary(tv.column(i), owner, stream)
283295
for i in range(tv.num_columns())
284296
])
285297

0 commit comments

Comments
 (0)