diff --git a/README.md b/README.md index 3599bb1..1cad334 100644 --- a/README.md +++ b/README.md @@ -95,6 +95,15 @@ def row_by_value(df, col, value): """Slice out row from DataFrame by a value.""" return df[df[col] == value].squeeze() +# register_series_and_dataframe_method can optionally include arguments to pass to pd.DataFrame.apply() +# +# Note that this is not the best example as total_diff can be vectorized instead of using +# pd.DataFrame.apply() with register_series_and_dataframe_method +@pf.register_series_and_dataframe_method(axis=1) +def total_diff(col, start_idx=0, end_idx=-1): + """ Return difference between the last and the first values per row/column""" + return col.iloc[end_idx] - col.iloc[start_idx] + ``` ```python @@ -120,14 +129,30 @@ df.row_by_value('x', 10) # x 10 # y 0 # Name: 0, dtype: int64 + +df2.total_diff() + +# 0 -10 +# 1 -18 +# 2 -20 +# dtype: int64 + +# It is possible to override the decorator kwargs from the function call +df.total_diff(axis=0, end_idx=-2) + +# x 10 +# y 2 +# dtype: int64 + ``` ## Available Methods - **register_dataframe_method**: register a method directly with a pandas DataFrame. - **register_dataframe_accessor**: register an accessor (and it's methods) with a pandas DataFrame. -- **register_series_method**: register a methods directly with a pandas Series. +- **register_series_method**: register a method directly with a pandas Series. - **register_series_accessor**: register an accessor (and it's methods) with a pandas Series. +- **register_series_and_dataframe_method**: register a method directly with both pandas Series and DataFrames. Please note that this will use pd.DataFrame.apply on dataframes which can result in a loss in performance. Vectorized operations should always be preferred. ## Installation diff --git a/pandas_flavor/__init__.py b/pandas_flavor/__init__.py index 80755f9..c855523 100644 --- a/pandas_flavor/__init__.py +++ b/pandas_flavor/__init__.py @@ -1,7 +1,8 @@ from .register import (register_series_method, register_series_accessor, register_dataframe_method, - register_dataframe_accessor) + register_dataframe_accessor, + register_series_and_dataframe_method) from .xarray import (register_xarray_dataarray_method, register_xarray_dataset_method) diff --git a/pandas_flavor/register.py b/pandas_flavor/register.py index d8c20e7..ce1e17f 100644 --- a/pandas_flavor/register.py +++ b/pandas_flavor/register.py @@ -1,6 +1,7 @@ from functools import wraps from .pandas_internals import (register_series_accessor, register_dataframe_accessor) +from pandas.core.frame import DataFrame def register_dataframe_method(method): @@ -39,7 +40,6 @@ def register_series_method(method): """Register a function as a method attached to the Pandas Series. """ def inner(*args, **kwargs): - class AccessorMethod(object): __doc__ = method.__doc__ @@ -55,3 +55,57 @@ def __call__(self, *args, **kwargs): return method return inner() + + +def register_series_and_dataframe_method(_func=None, **decorator_kwargs): + """Register a function as a method attached to the Pandas Series or DataFrame + Method should be written as a function to apply on each column or each row + + Can optionally include arguments to pass to pd.DataFrame.apply() such as axis=1 + + Please note that if the operation can be vectorized, register_dataframe_method will + likely yield higher performance as this decorator will always use pd.DataFrame.apply() + + Example + ------- + + .. code-block:: python + + @register_series_method + def total_pct_change(df): + return (df.iloc[-1] - df.iloc[0]) / df.iloc[0] + """ + def inner_wrapper(method): + def inner(*args, **kwargs): + class SerAccessorMethod(object): + __doc__ = method.__doc__ + + def __init__(self, pandas_obj): + self._obj = pandas_obj + + @wraps(method) + def __call__(self, *args, **kwargs): + return method(self._obj, *args, **kwargs) + + class DFAccessorMethod(object): + __doc__ = method.__doc__ + + def __init__(self, pandas_obj): + self._obj = pandas_obj + + @wraps(method) + def __call__(self, *args, **kwargs): + kwargs = {**decorator_kwargs, **kwargs} + return DataFrame.apply(self._obj, method, args=args, **kwargs) + + register_series_accessor(method.__name__)(SerAccessorMethod) + register_dataframe_accessor(method.__name__)(DFAccessorMethod) + + return method + + return inner() + + if _func is None: + return inner_wrapper + else: + return inner_wrapper(_func) \ No newline at end of file