Skip to content

Commit 4c6268d

Browse files
authored
Check split_out to decide on sorted groupby (#802)
1 parent 1101477 commit 4c6268d

File tree

1 file changed

+5
-2
lines changed

1 file changed

+5
-2
lines changed

dask_sql/physical/rel/logical/aggregate.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -533,11 +533,14 @@ def _perform_aggregation(
533533
for group_name in group_columns
534534
]
535535

536+
# if split_out > 1, we cannot do a sorted groupby
537+
sort = False if groupby_agg_options.get("split_out", 1) > 1 else True
538+
536539
# perform groupby operation; if we are using custom aggregations, we must handle
537540
# null values manually (this is slow)
538541
if fast_groupby:
539542
grouped_df = tmp_df.groupby(
540-
by=(group_columns or [additional_column_name]), dropna=False
543+
by=(group_columns or [additional_column_name]), dropna=False, sort=sort
541544
)
542545
else:
543546
group_columns = [
@@ -547,7 +550,7 @@ def _perform_aggregation(
547550
group_columns_and_nulls = get_groupby_with_nulls_cols(
548551
tmp_df, group_columns, additional_column_name
549552
)
550-
grouped_df = tmp_df.groupby(by=group_columns_and_nulls)
553+
grouped_df = tmp_df.groupby(by=group_columns_and_nulls, sort=sort)
551554

552555
# apply the aggregation(s)
553556
logger.debug(f"Performing aggregation {dict(aggregations_dict)}")

0 commit comments

Comments
 (0)