Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 26 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,15 @@ def row_by_value(df, col, value):
"""Slice out row from DataFrame by a value."""
return df[df[col] == value].squeeze()

# register_series_and_dataframe_method can optionally include arguments to pass to pd.DataFrame.apply()
#
# Note that this is not the best example as total_diff can be vectorized instead of using
# pd.DataFrame.apply() with register_series_and_dataframe_method
@pf.register_series_and_dataframe_method(axis=1)
def total_diff(col, start_idx=0, end_idx=-1):
""" Return difference between the last and the first values per row/column"""
return col.iloc[end_idx] - col.iloc[start_idx]
Copy link
Author

@seanboothm seanboothm Mar 5, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I know this is really not a great example of the need for register_series_or_dataframe_method, as total_diff could easily be vectorized. I do believe this function is beneficial in situations where the operation cannot be vectorized and must be completed column by column.

I am open to suggestions for a better example and will update the README once a better example is found.


```

```python
Expand All @@ -120,14 +129,30 @@ df.row_by_value('x', 10)
# x 10
# y 0
# Name: 0, dtype: int64

df2.total_diff()

# 0 -10
# 1 -18
# 2 -20
# dtype: int64

# It is possible to override the decorator kwargs from the function call
df.total_diff(axis=0, end_idx=-2)

# x 10
# y 2
# dtype: int64

```

## Available Methods

- **register_dataframe_method**: register a method directly with a pandas DataFrame.
- **register_dataframe_accessor**: register an accessor (and it's methods) with a pandas DataFrame.
- **register_series_method**: register a methods directly with a pandas Series.
- **register_series_method**: register a method directly with a pandas Series.
- **register_series_accessor**: register an accessor (and it's methods) with a pandas Series.
- **register_series_and_dataframe_method**: register a method directly with both pandas Series and DataFrames. Please note that this will use pd.DataFrame.apply on dataframes which can result in a loss in performance. Vectorized operations should always be preferred.

## Installation

Expand Down
3 changes: 2 additions & 1 deletion pandas_flavor/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
from .register import (register_series_method,
register_series_accessor,
register_dataframe_method,
register_dataframe_accessor)
register_dataframe_accessor,
register_series_and_dataframe_method)
from .xarray import (register_xarray_dataarray_method,
register_xarray_dataset_method)

56 changes: 55 additions & 1 deletion pandas_flavor/register.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from functools import wraps
from .pandas_internals import (register_series_accessor,
register_dataframe_accessor)
from pandas.core.frame import DataFrame


def register_dataframe_method(method):
Expand Down Expand Up @@ -39,7 +40,6 @@ def register_series_method(method):
"""Register a function as a method attached to the Pandas Series.
"""
def inner(*args, **kwargs):

class AccessorMethod(object):
__doc__ = method.__doc__

Expand All @@ -55,3 +55,57 @@ def __call__(self, *args, **kwargs):
return method

return inner()


def register_series_and_dataframe_method(_func=None, **decorator_kwargs):
"""Register a function as a method attached to the Pandas Series or DataFrame
Method should be written as a function to apply on each column or each row

Can optionally include arguments to pass to pd.DataFrame.apply() such as axis=1

Please note that if the operation can be vectorized, register_dataframe_method will
likely yield higher performance as this decorator will always use pd.DataFrame.apply()

Example
-------

.. code-block:: python

@register_series_method
def total_pct_change(df):
return (df.iloc[-1] - df.iloc[0]) / df.iloc[0]
"""
def inner_wrapper(method):
def inner(*args, **kwargs):
class SerAccessorMethod(object):
__doc__ = method.__doc__

def __init__(self, pandas_obj):
self._obj = pandas_obj

@wraps(method)
def __call__(self, *args, **kwargs):
return method(self._obj, *args, **kwargs)

class DFAccessorMethod(object):
__doc__ = method.__doc__

def __init__(self, pandas_obj):
self._obj = pandas_obj

@wraps(method)
def __call__(self, *args, **kwargs):
kwargs = {**decorator_kwargs, **kwargs}
return DataFrame.apply(self._obj, method, args=args, **kwargs)

register_series_accessor(method.__name__)(SerAccessorMethod)
register_dataframe_accessor(method.__name__)(DFAccessorMethod)

return method

return inner()

if _func is None:
return inner_wrapper
else:
return inner_wrapper(_func)