Skip to content

Commit 81a6d69

Browse files
Refactor apply_method in GroupBy (#488)
* Renames class method in Daru::Core::GroupBy for clarity * Adds deprecation code for renamed method * Updates Daru::Core::GroupBy to use aggregate in apply_method This also fixes issues with Daru::Core::GroupBy#std: when a group had only one value, the std was not NaN (which was wrong)
1 parent 7e36d74 commit 81a6d69

File tree

1 file changed

+23
-22
lines changed

1 file changed

+23
-22
lines changed

lib/daru/core/group_by.rb

Lines changed: 23 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -2,21 +2,25 @@ module Daru
22
module Core
33
class GroupBy
44
class << self
5+
extend Gem::Deprecate
6+
57
# @private
6-
def get_positions_group_map_on(indexes_with_positions, sort: false)
7-
group_map = {}
8+
def group_by_index_to_positions(indexes_with_positions, sort: false)
9+
index_to_positions = {}
810

911
indexes_with_positions.each do |idx, position|
10-
(group_map[idx] ||= []) << position
12+
(index_to_positions[idx] ||= []) << position
1113
end
1214

1315
if sort # TODO: maybe add a more "stable" sorting option?
14-
sorted_keys = group_map.keys.sort(&Daru::Core::GroupBy::TUPLE_SORTER)
15-
group_map = sorted_keys.map { |k| [k, group_map[k]] }.to_h
16+
sorted_keys = index_to_positions.keys.sort(&Daru::Core::GroupBy::TUPLE_SORTER)
17+
index_to_positions = sorted_keys.map { |k| [k, index_to_positions[k]] }.to_h
1618
end
1719

18-
group_map
20+
index_to_positions
1921
end
22+
alias get_positions_group_map_on group_by_index_to_positions
23+
deprecate :get_positions_group_map_on, :group_by_index_to_positions, 2019, 10
2024

2125
# @private
2226
def get_positions_group_for_aggregation(multi_index, level=-1)
@@ -25,14 +29,14 @@ def get_positions_group_for_aggregation(multi_index, level=-1)
2529
new_index = multi_index.dup
2630
new_index.remove_layer(level) # TODO: recheck code of Daru::MultiIndex#remove_layer
2731

28-
get_positions_group_map_on(new_index.each_with_index)
32+
group_by_index_to_positions(new_index.each_with_index)
2933
end
3034

3135
# @private
3236
def get_positions_group_map_for_df(df, group_by_keys, sort: true)
3337
indexes_with_positions = df[*group_by_keys].to_df.each_row.map(&:to_a).each_with_index
3438

35-
get_positions_group_map_on(indexes_with_positions, sort: sort)
39+
group_by_index_to_positions(indexes_with_positions, sort: sort)
3640
end
3741

3842
# @private
@@ -57,6 +61,9 @@ def df_from_group_map(df, group_map, remaining_vectors, from_position: true)
5761
end
5862
end
5963

64+
# The group_by was done over the vectors in group_vectors; the remaining vectors are the non_group_vectors
65+
attr_reader :group_vectors, :non_group_vectors
66+
6067
# lazy accessor/attr_reader for the attribute groups
6168
def groups
6269
@groups ||= GroupBy.group_map_from_positions_to_indexes(@groups_by_pos, @context.index)
@@ -93,7 +100,7 @@ def initialize context, names
93100
@group_vectors = names
94101
@non_group_vectors = context.vectors.to_a - names
95102

96-
@context = context # TODO: maybe rename in @original_df or @grouped_db
103+
@context = context # TODO: maybe rename in @original_df
97104

98105
# FIXME: It feels like we don't want to sort here. Ruby's #group_by
99106
# never sorts:
@@ -362,21 +369,15 @@ def select_groups_from method, quantity
362369
Daru::DataFrame.rows(rows, order: @context.vectors, index: indexes)
363370
end
364371

365-
def apply_method method_type, method
366-
order = @non_group_vectors.select do |ngvec|
367-
method_type == :numeric && @context[ngvec].type == :numeric
368-
end
372+
def select_numeric_non_group_vectors
373+
@non_group_vectors.select { |ngvec| @context[ngvec].type == :numeric }
374+
end
369375

370-
rows = groups_by_idx.map do |_group, indexes|
371-
order.map do |ngvector|
372-
slice = @context[ngvector][*indexes]
373-
slice.is_a?(Daru::Vector) ? slice.send(method) : slice
374-
end
375-
end
376+
def apply_method method_type, method
377+
raise 'To implement' if method_type != :numeric
378+
aggregation_options = select_numeric_non_group_vectors.map { |k| [k, method] }.to_h
376379

377-
index = get_grouped_index
378-
order = Daru::Index.new(order)
379-
Daru::DataFrame.new(rows.transpose, index: index, order: order)
380+
aggregate(aggregation_options)
380381
end
381382

382383
def get_grouped_index(index_tuples=nil)

0 commit comments

Comments
 (0)