diff --git a/.gitignore b/.gitignore index 471a0de..8ff8e70 100644 --- a/.gitignore +++ b/.gitignore @@ -19,3 +19,7 @@ # macOS metadata .DS_Store + +# compiled files +runstats/_core.pxd +runstats/_core.py \ No newline at end of file diff --git a/README.rst b/README.rst index ed9d9d3..14bf6f7 100644 --- a/README.rst +++ b/README.rst @@ -25,7 +25,8 @@ the system based on the recent past. In these cases exponential statistics are used. Instead of weighting all values uniformly in the statistics computation, an exponential decay weight is applied to older values. The decay rate is configurable and provides a mechanism for balancing recent values with past -values. +values. The exponential weighting may be on a 'per data point' or 'per time +step' basis. The Python `RunStats`_ module was designed for these cases by providing classes for computing online summary statistics and online linear regression in a @@ -70,27 +71,32 @@ function: >>> help(runstats) # doctest: +SKIP >>> help(runstats.Statistics) # doctest: +SKIP >>> help(runstats.Regression) # doctest: +SKIP - >>> help(runstats.ExponentialStatistics) # doctest: +SKIP + >>> help(runstats.ExponentialMovingStatistics) # doctest: +SKIP + >>> help(runstats.ExponentialMovingCovariance) # doctest: +SKIP Tutorial -------- -The Python `RunStats`_ module provides three types for computing running -statistics: Statistics, ExponentialStatistics and Regression.The Regression -object leverages Statistics internally for its calculations. Each can be -initialized without arguments: +The Python `RunStats`_ module provides four types for computing running +statistics: Statistics, ExponentialMovingStatistics, +ExponentialMovingCovariance and Regression. +The Regression object leverages Statistics internally for its calculations +while ExponentialMovingCovariance uses ExponentialMovingStatistics. +Each can be initialized without arguments: .. code-block:: python - >>> from runstats import Statistics, Regression, ExponentialStatistics + >>> from runstats import Statistics, Regression, ExponentialMovingStatistics, ExponentialMovingCovariance >>> stats = Statistics() >>> regr = Regression() - >>> exp_stats = ExponentialStatistics() + >>> exp_stats = ExponentialMovingStatistics() + >>> exp_cov = ExponentialMovingCovariance() Statistics objects support four methods for modification. Use `push` to add -values to the summary, `clear` to reset the summary, sum to combine Statistics -summaries and multiply to weight summary Statistics by a scalar. +values to the summary, `clear` to reset the the object to its initialization +state, sum to combine Statistics summaries and multiply to weight summary +Statistics by a scalar. .. code-block:: python @@ -200,13 +206,13 @@ Both constructors accept an optional iterable that is consumed and pushed into the summary. Note that you may pass a generator as an iterable and the generator will be entirely consumed. -The ExponentialStatistics are constructed by providing a decay rate, initial -mean, and initial variance. The decay rate has default 0.9 and must be between -0 and 1. The initial mean and variance default to zero. +The ExponentialMovingStatistics are constructed by providing a decay rate, +initial mean, and initial variance. The decay rate defaults to 0.9 and must be +between 0 and 1. The initial mean and variance default to zero. .. code-block:: python - >>> exp_stats = ExponentialStatistics() + >>> exp_stats = ExponentialMovingStatistics() >>> exp_stats.decay 0.9 >>> exp_stats.mean() @@ -215,9 +221,9 @@ mean, and initial variance. The decay rate has default 0.9 and must be between 0.0 The decay rate is the weight by which the current statistics are discounted -by. Consequently, (1 - decay) is the weight of the new value. Like the `Statistics` class, -there are four methods for modification: `push`, `clear`, sum and -multiply. +by. Consequently, (1 - decay) is the weight of the new value. Like the +`Statistics` class, there are four methods for modification: `push`, `clear`, +sum and multiply. .. code-block:: python @@ -230,8 +236,8 @@ multiply. >>> exp_stats.stddev() 3.4049127627507683 -The decay of the exponential statistics can also be changed. The value must be -between 0 and 1. +The decay of the exponential statistics can also be changed during the lifetime +of the object. .. code-block:: python @@ -245,30 +251,18 @@ between 0 and 1. ... ValueError: decay must be between 0 and 1 -The clear method allows to optionally set a new mean, new variance and new -decay. If none are provided mean and variance reset to zero, while the decay is -not changed. +Combining `ExponentialMovingStatistics` is done by adding them together. The +mean and variance are simply added to create a new object. To weight each +`ExponentialMovingStatistics`, multiply them by a constant factor. +Note how this behaviour differs from the two previous classes. When two +`ExponentialMovingStatistics` are added the decay of the left object is used for +the new object. The clear method resets the object to its state at +construction. `len`, minimum and maximum are not supported. .. code-block:: python - >>> exp_stats.clear() - >>> exp_stats.decay - 0.5 - >>> exp_stats.mean() - 0.0 - >>> exp_stats.variance() - 0.0 - -Combining `ExponentialStatistics` is done by adding them together. The mean and -variance are simply added to create a new object. To weight each -`ExponentialStatistics`, multiply them by a constant factor. If two -`ExponentialStatistics` are added then the leftmost decay is used for the new -object. The `len` method is not supported. - -.. code-block:: python - - >>> alpha_stats = ExponentialStatistics(iterable=range(10)) - >>> beta_stats = ExponentialStatistics(decay=0.1) + >>> alpha_stats = ExponentialMovingStatistics(iterable=range(10)) + >>> beta_stats = ExponentialMovingStatistics(decay=0.1) >>> for num in range(10): ... beta_stats.push(num) >>> exp_stats = beta_stats * 0.5 + alpha_stats * 0.5 @@ -277,6 +271,100 @@ object. The `len` method is not supported. >>> exp_stats.mean() 6.187836645 +The `ExponentialMovingCovariance` works equivalently to +`ExponentialMovingStatistics`. + +.. code-block:: python + + >>> exp_cov = ExponentialMovingCovariance( + ... decay=0.9, + ... mean_x=0.0, + ... variance_x=0.0, + ... mean_y=0.0, + ... variance_y=0.0, + ... covariance=0.0, + ... iterable=(), + ... ) + >>> for num in range(10): + ... exp_cov.push(num, num + 5) + >>> round(exp_cov.covariance(), 2) + 17.67 + >>> round(exp_cov.correlation(), 2) + 0.96 + +`ExponentialMovingStatistics` can also work in a time-based mode i.e. old +statistics are not simply discounted by the decay rate each time a value is +pushed. Instead an effective decay rate is calculated based on the provided +'nominal' decay rate as well as the time difference between the last push and +the current push.`ExponentialMovingStatistics` operate in time based mode when +a `delay > 0` is provided at construction. The delay is the no. of seconds that +need to pass for the effective decay rate to be equal to the provided decay rate. +For example, if a delay of 60 and a decay of 0.9 is provided, then after 60 +seconds pass between calls to push() the effective decay rate for discounting +the old statistics equals 0.9, when 120 seconds pass than it equals +0.9 ** 2 = 0.81 and so on. The exact formula for calculating the effective +decay rate at a given call to push is: +`decay ** ((current_timestamp - timestamp_at_last_push) / delay)`. The initial +timestamp is the timestamp when delay has been set. + +.. code-block:: python + + >>> import time + >>> alpha_stats = ExponentialMovingStatistics(decay=0.9, delay=1) + >>> time.sleep(1) + >>> alpha_stats.push(100) + >>> round(alpha_stats.mean()) + 10 + >>> alpha_stats.clear() # note that clear() resets the timer as well + >>> time.sleep(2) + >>> alpha_stats.push(100) + >>> round(alpha_stats.mean()) + 19 + +There are a few things to note about an time_based +`ExponentialMovingStatistics` object: +- When providing an iterable at construction together with a delay, the iterable +is first processed in non-time based mode i.e. as if there would be no delay +- The delay can also be set after object construction. In this case the initial +timestamp is the time when the delay is set. If a non `None` delay is changed, +this does not effect the timer. Setting delay to `None` deactivates time based +mode. +- When two ExponentialMovingStatistics objects are added the state of the delay +is taken from the left object. If the left object is time-based (non `None` +delay) the timer is reset during an regular __add__ (a + b) for the resulting +object while it is not during an incremental add __iadd__ (a += b). +- The timer can be stopped with a call to `freeze()`. This can +be useful when saving the state of the object (`get_state()`) for later usage +or when serializing the object to pickle. +With a call to `unfreeze()` the timer continues where it left of (e.g. after +loading). +- Pushes onto a freezed object use a effective decay rate based on the time +difference between the last call to push and the moment `freeze()` was called. +- With a call to `clear_timer()` the timer can be reset. +- It is not recommended to use time based discounting for use cases that +require high precision on below seconds granularity. + +.. code-block:: python + + >>> alpha_stats = ExponentialMovingStatistics(decay=0.9, delay=1) + >>> time.sleep(1) + >>> alpha_stats.freeze() + >>> saved_state = alpha_stats.get_state() + >>> time.sleep(2) + >>> beta_stats = ExponentialMovingStatistics.fromstate(saved_state) + >>> beta_stats.push(10) + >>> round(beta_stats.mean()) + 1 + >>> beta_stats.unfreeze() + >>> time.sleep(1) + >>> beta_stats.push(10) + >>> round(beta_stats.mean()) + 3 + + +Sources +------- + All internal calculations of the Statistics and Regression classes are based entirely on the C++ code by John Cook as posted in a couple of articles: @@ -286,9 +374,15 @@ entirely on the C++ code by John Cook as posted in a couple of articles: .. _`Computing Skewness and Kurtosis in One Pass`: http://www.johndcook.com/blog/skewness_kurtosis/ .. _`Computing Linear Regression in One Pass`: http://www.johndcook.com/blog/running_regression/ -The ExponentialStatistics implementation is based on: +The ExponentialMovingStatistics implementation is based on: + +* `Finch, 2009, Incremental Calculation of Weighted Mean and Variance`_ + +.. _`Finch, 2009, Incremental Calculation of Weighted Mean and Variance`: https://fanf2.user.srcf.net/hermes/doc/antiforgery/stats.pdf + -* Finch, 2009, Incremental Calculation of Weighted Mean and Variance +Pure Python and Cython +---------------------- The pure-Python version of `RunStats`_ is directly available if preferred. diff --git a/docs/api.rst b/docs/api.rst index b32d5df..2d99e80 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -17,9 +17,9 @@ Regression :special-members: -ExponentialStatistics -..................... +ExponentialMovingStatistics +........................... -.. autoclass:: runstats.ExponentialStatistics +.. autoclass:: runstats.ExponentialMovingStatistics :members: :special-members: diff --git a/docs/conf.py b/docs/conf.py index e8b42ed..c6c3a4d 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -12,9 +12,11 @@ import os import sys -sys.path.insert(0, os.path.abspath('..')) + import runstats +sys.path.insert(0, os.path.abspath('..')) + # -- Project information ----------------------------------------------------- @@ -89,7 +91,6 @@ ] } - # -- Options for todo extension ---------------------------------------------- # If true, `todo` and `todoList` produce output, else they produce nothing. diff --git a/runstats/__init__.py b/runstats/__init__.py index 824a3bc..1207779 100644 --- a/runstats/__init__.py +++ b/runstats/__init__.py @@ -7,11 +7,26 @@ """ try: - from ._core import ExponentialStatistics, Regression, Statistics + from ._core import ( + ExponentialMovingCovariance, + ExponentialMovingStatistics, + Regression, + Statistics, + ) except ImportError: # pragma: no cover - from .core import ExponentialStatistics, Regression, Statistics + from .core import ( + ExponentialMovingCovariance, + ExponentialMovingStatistics, + Regression, + Statistics, + ) -__all__ = ['Statistics', 'Regression', 'ExponentialStatistics'] +__all__ = [ + 'Statistics', + 'Regression', + 'ExponentialMovingStatistics', + 'ExponentialMovingCovariance', +] __title__ = 'runstats' __version__ = '2.0.0' __author__ = 'Grant Jenks' diff --git a/runstats/core.pxd b/runstats/core.pxd index cec8001..ea101c1 100644 --- a/runstats/core.pxd +++ b/runstats/core.pxd @@ -65,12 +65,18 @@ cdef class Statistics: cpdef Statistics make_statistics(state) -cdef class ExponentialStatistics: - cdef public double _decay, _mean, _variance +cdef class ExponentialMovingStatistics: + cdef public double _decay, _mean, _variance, _initial_mean, _initial_variance, _delay, _time_diff, _current_time cpdef _set_decay(self, double value) - cpdef clear(self, double mean=*, double variance=*, decay=*) + cpdef _set_delay(self, double value) + + cpdef clear(self) + + cpdef clear_timer(self) + + cpdef is_time_based(self) cpdef get_state(self) @@ -78,35 +84,53 @@ cdef class ExponentialStatistics: cpdef __reduce__(self) - cpdef ExponentialStatistics copy(self, _=*) + cpdef ExponentialMovingStatistics copy(self, _=*) + + cpdef freeze(self) + + cpdef unfreeze(self) @cython.locals( + freezed=bint + ) + cpdef is_freezed(self) + + @cython.locals( + decay=double, alpha=double, diff=double, - incr=double, + incr=double ) cpdef push(self, double value) + @cython.locals( + now=double, + diff=double, + norm_diff=double, + decay=double + ) + cpdef _effective_decay(self) + cpdef double mean(self) cpdef double variance(self) cpdef double stddev(self) - @cython.locals(sigma=ExponentialStatistics) - cpdef ExponentialStatistics _add(self, ExponentialStatistics that) + @cython.locals(sigma=ExponentialMovingStatistics) + cpdef ExponentialMovingStatistics _add(self, ExponentialMovingStatistics that) - cpdef ExponentialStatistics _iadd(self, ExponentialStatistics that) + cpdef ExponentialMovingStatistics _iadd(self, ExponentialMovingStatistics that) @cython.locals( - sigma=ExponentialStatistics, + sigma=ExponentialMovingStatistics, ) - cpdef ExponentialStatistics _mul(self, double that) + cpdef ExponentialMovingStatistics _mul(self, double that) - cpdef ExponentialStatistics _imul(self, double that) + cpdef ExponentialMovingStatistics _imul(self, double that) -cpdef ExponentialStatistics make_exponential_statistics(state) +cpdef ExponentialMovingStatistics make_exponential_statistics(state) cdef class Regression: @@ -148,3 +172,47 @@ cdef class Regression: cpdef Regression make_regression(state) + + +cdef class ExponentialMovingCovariance: + cdef public ExponentialMovingStatistics _xstats, _ystats + cdef public double _decay, _initial_covariance, _covariance + + cpdef _set_decay(self, double value) + + cpdef clear(self) + + cpdef get_state(self) + + cpdef set_state(self, state) + + cpdef __reduce__(self) + + cpdef ExponentialMovingCovariance copy(self, _=*) + + @cython.locals( + alpha=double + ) + cpdef push(self, double x_val, double y_val) + + cpdef double covariance(self) + + @cython.locals( + denom=double + ) + cpdef double correlation(self) + + @cython.locals(sigma=ExponentialMovingCovariance) + cpdef ExponentialMovingCovariance _add(self, ExponentialMovingCovariance that) + + cpdef ExponentialMovingCovariance _iadd(self, ExponentialMovingCovariance that) + + @cython.locals( + sigma=ExponentialMovingCovariance, + ) + cpdef ExponentialMovingCovariance _mul(self, double that) + + cpdef ExponentialMovingCovariance _imul(self, double that) + + +cpdef ExponentialMovingCovariance make_exponential_covariance(state) diff --git a/runstats/core.py b/runstats/core.py index bc9f14e..a5cbe6e 100644 --- a/runstats/core.py +++ b/runstats/core.py @@ -1,11 +1,15 @@ """Python RunStats -Compute Statistics, Exponential Statistics and Regression in a single pass. +Compute Statistics, Exponential Statistics, Regression and Exponential +Covariance in a single pass. """ from __future__ import division +import time +from math import isnan + NAN = float('nan') @@ -261,10 +265,11 @@ def make_statistics(state): return Statistics.fromstate(state) -class ExponentialStatistics: +class ExponentialMovingStatistics: + # pylint: disable=too-many-instance-attributes """Compute exponential mean and variance in a single pass. - ExponentialStatistics objects may also be copied. + ExponentialMovingStatistics objects may also be added and copied. Based on "Finch, 2009, Incremental Calculation of Weighted Mean and Variance" at @@ -275,25 +280,51 @@ class ExponentialStatistics: """ - def __init__(self, decay=0.9, mean=0.0, variance=0.0, iterable=()): - """Initialize ExponentialStatistics object. + def __init__( + self, decay=0.9, mean=0.0, variance=0.0, delay=None, iterable=() + ): # pylint: disable=too-many-arguments + """Initialize ExponentialMovingStatistics object. Incrementally tracks mean and variance and exponentially discounts old values. Requires a `decay` rate in exclusive range (0, 1) for discounting - previous statistics. + previous statistics. Default 0.9 Optionally allows setting initial mean and variance. Default 0. Iterates optional parameter `iterable` and pushes each value into the statistics summary. + Can discount values based on time passed instead of position if 'delay' is + set. Setting delay (in seconds) computes a dynamic + decay rate each time a value is pushed: + dynamic_decay = decay ** (sec_from_last_push / delay). + When the first value x is pushed, sec_from_last_push is the difference + (in sec) between setting the delay from None to a value t (usually at + object construction) and the times when x is being pushed. + When freeze() has been called sec_from_last_push is the difference + between the last call to push() and the time freeze() has been + called(). + Note that at object initialization the values in iterable are weighted + as if delay has not been set. """ - self.clear(mean, variance, decay) + self.decay = decay + self._initial_mean = mean + self._initial_variance = variance + self._mean = self._initial_mean + self._variance = self._initial_variance + + # using float('nan') for cython compatibility + self._current_time = NAN + self._time_diff = NAN + self.delay = NAN + for value in iterable: self.push(value) + self.delay = delay + @property def decay(self): """Exponential decay rate of old values.""" @@ -304,16 +335,56 @@ def decay(self, value): self._set_decay(value) def _set_decay(self, value): - if not 0 <= value <= 1: + if not 0 < value < 1: raise ValueError('decay must be between 0 and 1') self._decay = value - def clear(self, mean=0.0, variance=0.0, decay=None): - """Clear ExponentialStatistics object.""" - self._mean = mean - self._variance = variance - if decay is not None: - self._set_decay(decay) + @property + def delay(self): + """Delay in sec for time based discounting""" + return self._delay + + @delay.setter + def delay(self, value): + value = NAN if value is None else value + self._set_delay(value) + + def _set_delay(self, value): + if not isnan(value): + if value <= 0.0: + raise ValueError('delay must be > 0') + self._current_time = ( + self._current_time + if not isnan(self._current_time) + else time.time() + ) + else: + self._current_time = NAN + self._time_diff = NAN + + self._delay = value + + def clear(self): + """Clear ExponentialMovingStatistics object.""" + self._mean = self._initial_mean + self._variance = self._initial_variance + self._current_time = time.time() if self.is_time_based() else NAN + self._time_diff = NAN + + def clear_timer(self): + """Reset time counter""" + if self.is_time_based(): + self._current_time = time.time() + self._time_diff = NAN + else: + raise AttributeError( + 'clear_timer on a non-time time based (i.e. delay == None) ' + 'ExponentialMovingStatistics object is illegal' + ) + + def is_time_based(self): + """Checks if object is time-based or not i.e. delay is set or None""" + return not isnan(self.delay) def __eq__(self, that): return self.get_state() == that.get_state() @@ -323,19 +394,38 @@ def __ne__(self, that): def get_state(self): """Get internal state.""" - return self._decay, self._mean, self._variance + state = [ + self._decay, + self._initial_mean, + self._initial_variance, + self._mean, + self._variance, + self._delay, + self._current_time, + self._time_diff, + ] + state = [None if isnan(i) else i for i in state] + return tuple(state) def set_state(self, state): """Set internal state.""" + state = list(state) + state = [NAN if i is None else i for i in state] + ( self._decay, + self._initial_mean, + self._initial_variance, self._mean, self._variance, + self._delay, + self._current_time, + self._time_diff, ) = state @classmethod def fromstate(cls, state): - """Return ExponentialStatistics object from state.""" + """Return ExponentialMovingStatistics object from state.""" stats = cls() stats.set_state(state) return stats @@ -344,23 +434,80 @@ def __reduce__(self): return make_exponential_statistics, (self.get_state(),) def copy(self, _=None): - """Copy ExponentialStatistics object.""" + """Copy ExponentialMovingStatistics object.""" return self.fromstate(self.get_state()) def __copy__(self, _=None): - """Copy ExponentialStatistics object.""" + """Copy ExponentialMovingStatistics object.""" return self.copy(_) __deepcopy__ = __copy__ + def freeze(self): + """Freeze time i.e. save the difference between now and the last push""" + if self.is_time_based(): + self._time_diff = time.time() - self._current_time + else: + raise AttributeError( + 'freeze on a non-time time based (i.e. delay == None) ' + 'ExponentialMovingStatistics object is illegal' + ) + + def unfreeze(self): + """Unfreeze time i.e. continue counting the time difference""" + if not self.is_time_based(): + raise AttributeError( + 'unfreeze on a non-time time based (i.e. delay == None) ' + 'ExponentialMovingStatistics object is illegal' + ) + + if not self.is_freezed(): + raise AttributeError( + 'Time must be freezed first before it can be unfreezed' + ) + + self._current_time = time.time() - self._time_diff + self._time_diff = NAN + + def is_freezed(self): + """Check if object is in a freezed state""" + if not self.is_time_based(): + raise AttributeError('Only time-based objects can be freezed') + + freezed = not isnan(self._time_diff) + return freezed + def push(self, value): - """Add `value` to the ExponentialStatistics summary.""" - alpha = 1.0 - self._decay + """Add `value` to the ExponentialMovingStatistics summary.""" + if self.is_time_based(): + decay = self._effective_decay() + else: + decay = self.decay + + alpha = 1.0 - decay diff = value - self._mean incr = alpha * diff - self._variance += alpha * (self._decay * diff ** 2 - self._variance) + self._variance += alpha * (decay * diff ** 2 - self._variance) self._mean += incr + def _effective_decay(self): + """Calculate effective decay rate for time based ExponentialMovingStatistics""" + if not self.is_time_based(): + raise AttributeError( + 'Forbidden to call _effective_decay on non-time based object' + ) + + now = time.time() + diff = ( + self._time_diff + if self.is_freezed() + else (now - self._current_time) + ) + norm_diff = diff / self.delay + decay = self.decay ** norm_diff + self._current_time = now + return decay + def mean(self): """Exponential mean of values.""" return self._mean @@ -374,40 +521,46 @@ def stddev(self): return self.variance() ** 0.5 def _add(self, that): - """Add two ExponentialStatistics objects together.""" + """Add two ExponentialMovingStatistics objects together.""" sigma = self.copy() sigma._iadd(that) + + if sigma.is_time_based(): + sigma.clear_timer() + return sigma def __add__(self, that): - """Add two ExponentialStatistics objects together.""" + """Add two ExponentialMovingStatistics objects together.""" return self._add(that) def _iadd(self, that): - """Add another ExponentialStatistics object to this one.""" + """Add another ExponentialMovingStatistics object to this one.""" self._mean += that.mean() self._variance += that.variance() return self def __iadd__(self, that): - """Add another ExponentialStatistics object to this one.""" + """Add another ExponentialMovingStatistics object to this one.""" return self._iadd(that) def _mul(self, that): - """Multiply by a scalar to change ExponentialStatistics weighting.""" + """Multiply by a scalar to change ExponentialMovingStatistics weighting.""" sigma = self.copy() sigma._imul(that) return sigma def __mul__(self, that): - """Multiply by a scalar to change ExponentialStatistics weighting.""" - if isinstance(self, ExponentialStatistics): + """Multiply by a scalar to change ExponentialMovingStatistics weighting.""" + if isinstance(self, ExponentialMovingStatistics): return self._mul(that) # https://stackoverflow.com/q/33218006/232571 return that._mul(self) # pragma: no cover + __rmul__ = __mul__ + def _imul(self, that): - """Multiply by a scalar to change ExponentialStatistics weighting + """Multiply by a scalar to change ExponentialMovingStatistics weighting in-place. """ @@ -416,7 +569,7 @@ def _imul(self, that): return self def __imul__(self, that): - """Multiply by a scalar to change ExponentialStatistics weighting + """Multiply by a scalar to change ExponentialMovingStatistics weighting in-place. """ @@ -424,8 +577,8 @@ def __imul__(self, that): def make_exponential_statistics(state): - """Make ExponentialStatistics object from state.""" - return ExponentialStatistics.fromstate(state) + """Make ExponentialMovingStatistics object from state.""" + return ExponentialMovingStatistics.fromstate(state) class Regression: @@ -573,3 +726,190 @@ def __iadd__(self, that): def make_regression(state): """Make Regression object from state.""" return Regression.fromstate(state) + + +class ExponentialMovingCovariance: + """Compute exponential moving covariance and correlation in a single pass. + + ExponentialMovingCovariance objects may also be added and copied. + + """ + + def __init__( + self, + decay=0.9, + mean_x=0.0, + variance_x=0.0, + mean_y=0.0, + variance_y=0.0, + covariance=0.0, + iterable=(), + ): # pylint: disable=too-many-arguments + """Initialize ExponentialMovingCovariance object. + + Incrementally tracks covariance and exponentially discounts old + values. + + Requires a `decay` rate in exclusive range (0, 1) for discounting + previous statistics. + + Optionally allows setting initial covariance. Default 0. + + Iterates optional parameter `iterable` and pushes each pair into the + statistics summary. + + """ + self._initial_covariance = covariance + self._covariance = self._initial_covariance + self._xstats = ExponentialMovingStatistics( + decay=decay, mean=mean_x, variance=variance_x + ) + self._ystats = ExponentialMovingStatistics( + decay=decay, mean=mean_y, variance=variance_y + ) + self.decay = decay + + for x_val, y_val in iterable: + self.push(x_val, y_val) + + @property + def decay(self): + """Decay rate for old values.""" + return self._decay + + @decay.setter + def decay(self, value): + self._set_decay(value) + + def _set_decay(self, value): + self._xstats.decay = value + self._ystats.decay = value + self._decay = value + + def clear(self): + """Clear ExponentialMovingCovariance object.""" + self._xstats.clear() + self._ystats.clear() + self._covariance = self._initial_covariance + + def __eq__(self, that): + return self.get_state() == that.get_state() + + def __ne__(self, that): + return self.get_state() != that.get_state() + + def get_state(self): + """Get internal state.""" + return ( + self._decay, + self._initial_covariance, + self._covariance, + self._xstats.get_state(), + self._ystats.get_state(), + ) + + def set_state(self, state): + """Set internal state.""" + decay, initial_covariance, covariance, xstate, ystate = state + self._decay = decay + self._initial_covariance = initial_covariance + self._covariance = covariance + self._xstats.set_state(xstate) + self._ystats.set_state(ystate) + + @classmethod + def fromstate(cls, state): + """Return ExponentialMovingCovariance object from state.""" + stats = cls() + stats.set_state(state) + return stats + + def __reduce__(self): + return make_exponential_covariance, (self.get_state(),) + + def copy(self, _=None): + """Copy ExponentialMovingCovariance object.""" + return self.fromstate(self.get_state()) + + def __copy__(self, _=None): + """Copy ExponentialMovingCovariance object.""" + return self.copy(_) + + __deepcopy__ = __copy__ + + def push(self, x_val, y_val): + """Add a pair `(x, y)` to the ExponentialMovingCovariance summary.""" + self._xstats.push(x_val) + alpha = 1.0 - self.decay + self._covariance = self.decay * self.covariance() + alpha * ( + x_val - self._xstats.mean() + ) * (y_val - self._ystats.mean()) + self._ystats.push(y_val) + + def covariance(self): + """Covariance of values""" + return self._covariance + + def correlation(self): + """Correlation of values""" + denom = self._xstats.stddev() * self._ystats.stddev() + return self.covariance() / denom + + def _add(self, that): + """Add two ExponentialMovingCovariance objects together.""" + sigma = self.copy() + sigma._iadd(that) + return sigma + + def __add__(self, that): + """Add two ExponentialMovingCovariance objects together.""" + return self._add(that) + + def _iadd(self, that): + """Add another ExponentialMovingCovariance object to this one.""" + self._xstats += that._xstats + self._ystats += that._ystats + self._covariance += that.covariance() + return self + + def __iadd__(self, that): + """Add another ExponentialMovingCovariance object to this one.""" + return self._iadd(that) + + def _mul(self, that): + """Multiply by a scalar to change ExponentialMovingCovariance weighting.""" + sigma = self.copy() + sigma._imul(that) + return sigma + + def __mul__(self, that): + """Multiply by a scalar to change ExponentialMovingCovariance weighting.""" + if isinstance(self, ExponentialMovingCovariance): + return self._mul(that) + # https://stackoverflow.com/q/33218006/232571 + return that._mul(self) # pragma: no cover + + __rmul__ = __mul__ + + def _imul(self, that): + """Multiply by a scalar to change ExponentialMovingCovariance weighting + in-place. + + """ + that = float(that) + self._xstats *= that + self._ystats *= that + self._covariance *= that + return self + + def __imul__(self, that): + """Multiply by a scalar to change ExponentialMovingCovariance weighting + in-place. + + """ + return self._imul(that) + + +def make_exponential_covariance(state): + """Make ExponentialMovingCovariance object from state.""" + return ExponentialMovingCovariance.fromstate(state) diff --git a/tests/__main__.py b/tests/__main__.py index 142f9b5..551d32b 100644 --- a/tests/__main__.py +++ b/tests/__main__.py @@ -2,14 +2,27 @@ import sys -from runstats import ExponentialStatistics as FastExponentialStatistics +from runstats import ExponentialMovingCovariance as FastExponentialCoveriance +from runstats import ExponentialMovingStatistics as FastExponentialStatistics from runstats import Regression as FastRegression from runstats import Statistics as FastStatistics -from runstats.core import ExponentialStatistics as CoreExponentialStatistics +from runstats.core import ( + ExponentialMovingCovariance as CoreExponentialCoveriance, +) +from runstats.core import ( + ExponentialMovingStatistics as CoreExponentialStatistics, +) from runstats.core import Regression as CoreRegression from runstats.core import Statistics as CoreStatistics - -from .test_runstats import kurtosis, mean, skewness, stddev, variance +from tests.test_runstats import ( + exp_cov_cor, + exp_mean_var, + kurtosis, + mean, + skewness, + stddev, + variance, +) def main(): @@ -23,6 +36,14 @@ def main(): print('Skewness:', skewness(args)) print('Kurtosis:', kurtosis(args)) + exp_mean, exp_var = exp_mean_var(0.9, args) + exp_cov, exp_cor = exp_cov_cor(0.9, enumerate(args, 1)) + print('Exponential Moving Mean (decay=0.9):', exp_mean) + print('Exponential Moving Variance (decay=0.9):', exp_var) + print('Exponential Moving StdDev (decay=0.9):', exp_var ** 0.5) + print('Exponential Moving Covariance (decay=0.9):', exp_cov) + print('Exponential Moving Correlation (decay=0.9):', exp_cor) + fast_stats = FastStatistics() for arg in args: @@ -57,8 +78,8 @@ def main(): fast_exp_stats.push(arg) print() - print('FastExponentialStatistics') - print('Decay Rate (default):', fast_exp_stats.get_decay()) + print('FastExponentialMovingStatistics') + print('Decay Rate (default):', fast_exp_stats.decay) print('Exponential Mean:', fast_exp_stats.mean()) print('Exponential Variance:', fast_exp_stats.variance()) print('Exponential StdDev:', fast_exp_stats.stddev()) @@ -69,8 +90,8 @@ def main(): core_exp_stats.push(arg) print() - print('CoreExponentialStatistics') - print('Decay Rate (default):', core_exp_stats.get_decay()) + print('CoreExponentialMovingStatistics') + print('Decay Rate (default):', core_exp_stats.decay) print('Exponential Mean:', core_exp_stats.mean()) print('Exponential Variance:', core_exp_stats.variance()) print('Exponential StdDev:', core_exp_stats.stddev()) @@ -99,6 +120,28 @@ def main(): print('Intercept:', core_regr.intercept()) print('Correlation:', core_regr.correlation()) + fast_exp_cov = FastExponentialCoveriance() + + for index, arg in enumerate(args, 1): + fast_exp_cov.push(index, arg) + + print() + print('FastExponentialCovariance') + print('Decay Rate (default):', fast_exp_cov.decay) + print('Exponential Moving Covariance:', fast_exp_cov.covariance()) + print('Exponential Moving Correlation:', fast_exp_cov.correlation()) + + core_exp_cov = CoreExponentialCoveriance() + + for index, arg in enumerate(args, 1): + core_exp_cov.push(index, arg) + + print() + print('CoreExponentialCovariance') + print('Decay Rate (default):', core_exp_cov.decay) + print('Exponential Moving Covariance:', core_exp_cov.covariance()) + print('Exponential Moving Correlation:', core_exp_cov.correlation()) + if __name__ == '__main__': main() diff --git a/tests/benchmark.py b/tests/benchmark.py index b0bb7d4..95ac94f 100644 --- a/tests/benchmark.py +++ b/tests/benchmark.py @@ -44,8 +44,8 @@ def main(): core_exp_stats = timeit.repeat( setup=''' from __main__ import VALUES -from runstats.core import ExponentialStatistics -exp_stats = ExponentialStatistics() +from runstats.core import ExponentialMovingStatistics +exp_stats = ExponentialMovingStatistics() ''', stmt=''' for value in VALUES: @@ -59,8 +59,8 @@ def main(): fast_exp_stats = timeit.repeat( setup=''' from __main__ import VALUES -from runstats._core import ExponentialStatistics -exp_stats = ExponentialStatistics() +from runstats._core import ExponentialMovingStatistics +exp_stats = ExponentialMovingStatistics() ''', stmt=''' for value in VALUES: @@ -105,18 +105,54 @@ def main(): speedup_regr = core_regr / fast_regr - 1 + core_exp_cov = timeit.repeat( + setup=''' +from __main__ import PAIRS +from runstats.core import ExponentialMovingCovariance +exp_cov = ExponentialMovingCovariance() + ''', + stmt=''' +for pos, val in PAIRS: + exp_cov.push(pos, val) +exp_cov.covariance() + ''', + number=1, + repeat=7, + )[2] + + fast_exp_cov = timeit.repeat( + setup=''' +from __main__ import PAIRS +from runstats._core import ExponentialMovingCovariance +exp_cov = ExponentialMovingCovariance() + ''', + stmt=''' +for pos, val in PAIRS: + exp_cov.push(pos, val) +exp_cov.covariance() + ''', + number=1, + repeat=7, + )[2] + + speedup_exp_cov = core_exp_cov / fast_exp_cov - 1 + print('core.Statistics:', core_stats) print('_core.Statistics:', fast_stats) print(' Stats Speedup: %.2fx faster' % speedup_stats) - print('core.ExponentialStatistics:', core_exp_stats) - print('_core.ExponentialStatistics:', fast_exp_stats) + print('core.ExponentialMovingStatistics:', core_exp_stats) + print('_core.ExponentialMovingStatistics:', fast_exp_stats) print(' ExpStats Speedup: %.2fx faster' % speedup_exp_stats) print('core.Regression:', core_regr) print('_core.Regression:', fast_regr) print(' Regr Speedup: %.2fx faster' % speedup_regr) + print('core.ExponentialMovingCovariance:', core_exp_cov) + print('_core.ExponentialMovingCovariance:', fast_exp_cov) + print(' ExpCov Speedup: %.2fx faster' % speedup_exp_cov) + if __name__ == '__main__': main() diff --git a/tests/test_runstats.py b/tests/test_runstats.py index a49aa7f..63ccc42 100644 --- a/tests/test_runstats.py +++ b/tests/test_runstats.py @@ -3,16 +3,24 @@ """ import copy +import itertools import math import pickle import random +from unittest.mock import patch import pytest -from runstats import ExponentialStatistics as FastExponentialStatistics +from runstats import ExponentialMovingCovariance as FastExponentialCovariance +from runstats import ExponentialMovingStatistics as FastExponentialStatistics from runstats import Regression as FastRegression from runstats import Statistics as FastStatistics -from runstats.core import ExponentialStatistics as CoreExponentialStatistics +from runstats.core import ( + ExponentialMovingCovariance as CoreExponentialCovariance, +) +from runstats.core import ( + ExponentialMovingStatistics as CoreExponentialStatistics, +) from runstats.core import Regression as CoreRegression from runstats.core import Statistics as CoreStatistics @@ -50,10 +58,92 @@ def kurtosis(values): return (numerator / denominator) - 3 +def covariance(values): + values = list(values) + x_vals = [x for x, y in values] + y_vals = [y for x, y in values] + mean_x = mean(x_vals) + mean_y = mean(y_vals) + return sum((x - mean_x) * (y - mean_y) for x, y in values) / len(values) + + +def correlation(values): + sigma_x = sum(xxx for xxx, yyy in values) / len(values) + sigma_y = sum(yyy for xxx, yyy in values) / len(values) + sigma_xy = sum(xxx * yyy for xxx, yyy in values) / len(values) + sigma_x2 = sum(xxx ** 2 for xxx, yyy in values) / len(values) + sigma_y2 = sum(yyy ** 2 for xxx, yyy in values) / len(values) + return (sigma_xy - sigma_x * sigma_y) / ( + ((sigma_x2 - sigma_x ** 2) * (sigma_y2 - sigma_y ** 2)) ** 0.5 + ) + + +def exponential_weight(decay, pos): + return (1 - decay) * decay ** pos + + +def exp_mean_var(decay, iterable): + indecies = list(range(len(iterable))) + weights = list(map(lambda x: exponential_weight(decay, x), indecies))[::-1] + + mean = 0.0 + for val, weight in zip(iterable, weights): + mean += val * weight + + variance = (0 - mean) ** 2 * (1 - sum(weights)) + for val, weight in zip(iterable, weights): + variance += (val - mean) ** 2 * weight + + return mean, variance + + +def exp_cov_cor(decay, iterable): + lst = list(iterable) + indecies = list(range(len(lst))) + weights = list(map(lambda x: exponential_weight(decay, x), indecies))[::-1] + + mean_1 = 0.0 + mean_2 = 0.0 + for vals, weight in zip(lst, weights): + x_1, x_2 = vals + mean_1 += x_1 * weight + mean_2 += x_2 * weight + + variance_1 = (0 - mean_1) ** 2 * (1 - sum(weights)) + variance_2 = (0 - mean_2) ** 2 * (1 - sum(weights)) + for vals, weight in zip(lst, weights): + x_1, x_2 = vals + variance_1 += (x_1 - mean_1) ** 2 * weight + variance_2 += (x_2 - mean_2) ** 2 * weight + + covar = (0 - mean_1) * (0 - mean_2) * (1 - sum(weights)) + for vals, weight in zip(lst, weights): + x_1, x_2 = vals + covar += (x_1 - mean_1) * (x_2 - mean_2) * weight + + correlation = covar / (variance_1 * variance_2) ** 0.5 + + return covar, correlation + + def error(value, test): return abs((test - value) / value) +def calc_effective_decay(diff, delay, nominal_decay): + norm_diff = diff / delay + eff_decay = nominal_decay ** norm_diff + return eff_decay + + +def get_time_patch(ExponentialMovingStatistics): + module = ExponentialMovingStatistics.__module__ + core = '_core' if module == 'runstats._core' else 'core' + patch_path = f'runstats.{core}.time' + time_patch = patch(patch_path) + return time_patch + + @pytest.mark.parametrize( 'Statistics,Regression', [ @@ -131,16 +221,16 @@ def test_statistics(Statistics, Regression): @pytest.mark.parametrize( - 'ExponentialStatistics', + 'ExponentialMovingStatistics', [CoreExponentialStatistics, FastExponentialStatistics], ) -def test_exponential_statistics(ExponentialStatistics): +def test_exponential_statistics(ExponentialMovingStatistics): random.seed(0) alpha = [random.random() for _ in range(count)] big_alpha = [random.random() for _ in range(count * 100)] - alpha_exp_stats_zero = ExponentialStatistics(0.9999) - alpha_exp_stats_init = ExponentialStatistics( + alpha_exp_stats_zero = ExponentialMovingStatistics(0.9999) + alpha_exp_stats_init = ExponentialMovingStatistics( decay=0.9999, mean=mean(alpha), variance=variance(alpha, 0), @@ -163,9 +253,8 @@ def test_exponential_statistics(ExponentialStatistics): alpha_exp_stats_zero.clear() alpha_exp_stats_zero.decay = 0.1 - alpha_exp_stats_init.clear( - decay=0.1, mean=mean(alpha), variance=variance(alpha, 0) - ) + alpha_exp_stats_init.clear() + alpha_exp_stats_init.decay = 0.1 for val in big_alpha: alpha_exp_stats_zero.push(val) @@ -183,9 +272,9 @@ def test_exponential_statistics(ExponentialStatistics): < limit ) - alpha_exp_stats = ExponentialStatistics(0.1, iterable=alpha) + alpha_exp_stats = ExponentialMovingStatistics(0.1, iterable=alpha) beta = [random.random() * 2 for _ in range(count)] - beta_exp_stats = ExponentialStatistics(0.1) + beta_exp_stats = ExponentialMovingStatistics(0.1) assert alpha_exp_stats != beta_exp_stats @@ -219,16 +308,81 @@ def test_exponential_statistics(ExponentialStatistics): assert (error(current_mean, alpha_exp_stats.mean())) > limit assert (error(current_variance, alpha_exp_stats.variance())) > limit + # test time based to calculate correct mean/variance + time_patch = get_time_patch(ExponentialMovingStatistics) + with time_patch as time_mock: + delay = 0.5 + nominal_decay = 0.9 + + past = 100.0 + time_mock.time.return_value = past + gamma_exp_stats = ExponentialMovingStatistics() + exp_stats_time = ExponentialMovingStatistics(delay=0.5) + now = 110.55 + time_mock.time.return_value = now + exp_stats_time.push(10) + effective_decay = calc_effective_decay( + now - past, delay, nominal_decay + ) + gamma_exp_stats.decay = effective_decay + gamma_exp_stats.push(10) + + assert gamma_exp_stats.mean() == exp_stats_time.mean() + assert gamma_exp_stats.variance() == exp_stats_time.variance() + @pytest.mark.parametrize( - 'ExponentialStatistics', + 'ExponentialMovingStatistics', [CoreExponentialStatistics, FastExponentialStatistics], ) -def test_bad_decay(ExponentialStatistics): +def test_bad_decay(ExponentialMovingStatistics): with pytest.raises(ValueError): - ExponentialStatistics(decay=2.0) + ExponentialMovingStatistics(decay=2.0) with pytest.raises(ValueError): - ExponentialStatistics(decay=-1.0) + ExponentialMovingStatistics(decay=-1.0) + + +@pytest.mark.parametrize( + 'ExponentialMovingCovariance', + [CoreExponentialCovariance, FastExponentialCovariance], +) +def test_exponential_covariance(ExponentialMovingCovariance): + random.seed(0) + alpha = [random.random() for _ in range(count)] + beta = [x * -10 for x in alpha] + big_alpha = [random.random() for _ in range(count * 100)] + big_beta = [x * -10 for x in big_alpha] + data = list(zip(big_alpha, big_beta)) + + exp_cov = ExponentialMovingCovariance( + decay=0.9999, + mean_x=mean(alpha), + variance_x=variance(alpha, 0), + mean_y=mean(beta), + variance_y=variance(beta), + covariance=covariance(data), + ) + + for x, y in zip(big_alpha, big_beta): + exp_cov.push(x, y) + + assert error(covariance(data), exp_cov.covariance()) < limit + assert error(-1.0, exp_cov.correlation()) < limit + + exp_cov_2 = exp_cov.copy() + assert exp_cov == exp_cov_2 + assert exp_cov.covariance() != covariance(data) + exp_cov.clear() + assert exp_cov != exp_cov_2 + assert exp_cov.covariance() == covariance(data) + exp_cov_2.clear() + exp_cov.decay = 0.1 + exp_cov_2.decay = 0.1 + assert exp_cov.decay == 0.1 + assert exp_cov == exp_cov_2 + + exp_cov_3 = exp_cov * 0.5 + exp_cov * 0.5 + assert exp_cov_3 == exp_cov @pytest.mark.parametrize( @@ -247,12 +401,12 @@ def test_add_statistics(Statistics, Regression): @pytest.mark.parametrize( - 'ExponentialStatistics', + 'ExponentialMovingStatistics', [CoreExponentialStatistics, FastExponentialStatistics], ) -def test_add_exponential_statistics(ExponentialStatistics): - exp_stats0 = ExponentialStatistics(0.9) - exp_stats10 = ExponentialStatistics(0.9, iterable=range(10)) +def test_add_exponential_statistics(ExponentialMovingStatistics): + exp_stats0 = ExponentialMovingStatistics(0.9) + exp_stats10 = ExponentialMovingStatistics(0.9, iterable=range(10)) assert (exp_stats0 + exp_stats10) == exp_stats10 assert (exp_stats10 + exp_stats0) == exp_stats10 exp_stats0 += exp_stats10 @@ -262,16 +416,37 @@ def test_add_exponential_statistics(ExponentialStatistics): with pytest.raises(TypeError): object() * exp_stats0 + exp_stats0.decay = 0.8 + exp_stats0.delay = 60 + exp_stats10.delay = 120 + # To check if clear_timer was called for add and not for iadd + exp_stats0._time_diff = -1 -def correlation(values): - sigma_x = sum(xxx for xxx, yyy in values) / len(values) - sigma_y = sum(yyy for xxx, yyy in values) / len(values) - sigma_xy = sum(xxx * yyy for xxx, yyy in values) / len(values) - sigma_x2 = sum(xxx ** 2 for xxx, yyy in values) / len(values) - sigma_y2 = sum(yyy ** 2 for xxx, yyy in values) / len(values) - return (sigma_xy - sigma_x * sigma_y) / ( - ((sigma_x2 - sigma_x ** 2) * (sigma_y2 - sigma_y ** 2)) ** 0.5 + exp_stats = exp_stats0 + exp_stats10 + assert exp_stats.delay == exp_stats0.delay != exp_stats10.delay + assert exp_stats.decay == exp_stats0.decay != exp_stats10.decay + assert math.isnan(exp_stats._time_diff) + + exp_stats0 += exp_stats10 + assert exp_stats0.decay == 0.8 + assert exp_stats0.delay == 60 + assert exp_stats0._time_diff == -1 + + +@pytest.mark.parametrize( + 'ExponentialMovingCovariance', + [CoreExponentialCovariance, FastExponentialCovariance], +) +def test_add_exponential_covariance(ExponentialMovingCovariance): + exp_cov0 = ExponentialMovingCovariance(0.9) + exp_cov10 = ExponentialMovingCovariance( + 0.9, iterable=zip(range(10), range(10)) ) + assert (exp_cov0 + exp_cov10) == exp_cov10 + assert (exp_cov10 + exp_cov0) == exp_cov10 + + exp_cov0 += exp_cov10 + assert exp_cov0 == exp_cov10 @pytest.mark.parametrize( @@ -365,16 +540,16 @@ def test_get_set_state_statistics(Statistics, Regression): @pytest.mark.parametrize( - 'ExponentialStatistics', + 'ExponentialMovingStatistics', [CoreExponentialStatistics, FastExponentialStatistics], ) -def test_get_set_state_exponential_statistics(ExponentialStatistics): +def test_get_set_state_exponential_statistics(ExponentialMovingStatistics): random.seed(0) vals = [random.random() for _ in range(count)] - exp_stats = ExponentialStatistics(iterable=vals) + exp_stats = ExponentialMovingStatistics(iterable=vals) exp_state = exp_stats.get_state() - new_exp_stats = ExponentialStatistics(0.8) + new_exp_stats = ExponentialMovingStatistics(0.8) assert exp_stats != new_exp_stats assert new_exp_stats.decay == 0.8 new_exp_stats.set_state(exp_state) @@ -386,7 +561,50 @@ def test_get_set_state_exponential_statistics(ExponentialStatistics): assert exp_stats.variance() == new_exp_stats.variance() assert new_exp_stats.decay == 0.1 - assert exp_stats == ExponentialStatistics.fromstate(exp_stats.get_state()) + assert exp_stats == ExponentialMovingStatistics.fromstate( + exp_stats.get_state() + ) + + new_exp_stats.decay = 0.9 + assert exp_stats == new_exp_stats + exp_stats.delay = 60 + assert exp_stats != new_exp_stats + exp_stats.freeze() + + assert exp_stats == ExponentialMovingStatistics.fromstate( + exp_stats.get_state() + ) + + exp_stats.unfreeze() + assert exp_stats == ExponentialMovingStatistics.fromstate( + exp_stats.get_state() + ) + + +@pytest.mark.parametrize( + 'ExponentialMovingCovariance', + [CoreExponentialCovariance, FastExponentialCovariance], +) +def test_get_set_state_exponential_covariance(ExponentialMovingCovariance): + random.seed(0) + vals = [(random.random(), random.random()) for _ in range(count)] + exp_cov = ExponentialMovingCovariance(iterable=vals) + exp_state = exp_cov.get_state() + + new_exp_cov = ExponentialMovingCovariance(0.8) + assert exp_cov != new_exp_cov + assert new_exp_cov.decay == 0.8 + new_exp_cov.set_state(exp_state) + assert new_exp_cov.decay == 0.9 + assert exp_cov == new_exp_cov + new_exp_cov.decay = 0.1 + assert exp_cov != new_exp_cov + assert exp_cov.covariance() == new_exp_cov.covariance() + assert new_exp_cov.decay == 0.1 + + assert exp_cov == ExponentialMovingCovariance.fromstate( + exp_cov.get_state() + ) @pytest.mark.parametrize( @@ -440,17 +658,50 @@ def test_pickle_statistics(Statistics, Regression): @pytest.mark.parametrize( - 'ExponentialStatistics', + 'ExponentialMovingStatistics', [CoreExponentialStatistics, FastExponentialStatistics], ) -def test_pickle_exponential_statistics(ExponentialStatistics): - exp_stats = ExponentialStatistics(0.9, iterable=range(10)) +def test_pickle_exponential_statistics(ExponentialMovingStatistics): + exp_stats = ExponentialMovingStatistics(0.9, iterable=range(10)) for num in range(pickle.HIGHEST_PROTOCOL): pickled_exp_stats = pickle.dumps(exp_stats, protocol=num) unpickled_exp_stats = pickle.loads(pickled_exp_stats) assert exp_stats == unpickled_exp_stats, 'protocol: %s' % num +@pytest.mark.parametrize( + 'ExponentialMovingStatistics', + [CoreExponentialStatistics, FastExponentialStatistics], +) +def test_pickle_exponential_statistics_time_based(ExponentialMovingStatistics): + exp_stats = ExponentialMovingStatistics(0.9, iterable=range(10), delay=30) + exp_stats.freeze() + for num in range(pickle.HIGHEST_PROTOCOL): + pickled_exp_stats = pickle.dumps(exp_stats, protocol=num) + unpickled_exp_stats = pickle.loads(pickled_exp_stats) + assert exp_stats == unpickled_exp_stats, 'protocol: %s' % num + + exp_stats.unfreeze() + for num in range(pickle.HIGHEST_PROTOCOL): + pickled_exp_stats = pickle.dumps(exp_stats, protocol=num) + unpickled_exp_stats = pickle.loads(pickled_exp_stats) + assert exp_stats == unpickled_exp_stats, 'protocol: %s' % num + + +@pytest.mark.parametrize( + 'ExponentialMovingCovariance', + [CoreExponentialCovariance, FastExponentialCovariance], +) +def test_pickle_exponential_covariance(ExponentialMovingCovariance): + exp_cov = ExponentialMovingCovariance( + 0.9, iterable=zip(range(10), range(10)) + ) + for num in range(pickle.HIGHEST_PROTOCOL): + pickled_exp_cov = pickle.dumps(exp_cov, protocol=num) + unpickled_exp_cov = pickle.loads(pickled_exp_cov) + assert exp_cov == unpickled_exp_cov, 'protocol: %s' % num + + @pytest.mark.parametrize( 'Statistics,Regression', [ @@ -482,17 +733,32 @@ def test_copy_statistics(Statistics, Regression): @pytest.mark.parametrize( - 'ExponentialStatistics', + 'ExponentialMovingStatistics', [CoreExponentialStatistics, FastExponentialStatistics], ) -def test_copy_exponential_statistics(ExponentialStatistics): - exp_stats = ExponentialStatistics(0.9, iterable=range(10)) +def test_copy_exponential_statistics(ExponentialMovingStatistics): + exp_stats = ExponentialMovingStatistics(0.9, iterable=range(10), delay=30) + exp_stats.freeze() copy_exp_stats = copy.copy(exp_stats) assert exp_stats == copy_exp_stats deepcopy_exp_stats = copy.deepcopy(exp_stats) assert exp_stats == deepcopy_exp_stats +@pytest.mark.parametrize( + 'ExponentialMovingCovariance', + [CoreExponentialCovariance, FastExponentialCovariance], +) +def test_copy_exponential_covariance(ExponentialMovingCovariance): + exp_cov = ExponentialMovingCovariance( + 0.9, iterable=zip(range(10), range(10)) + ) + copy_exp_cov = copy.copy(exp_cov) + assert exp_cov == copy_exp_cov + deepcopy_exp_cov = copy.deepcopy(exp_cov) + assert exp_cov == deepcopy_exp_cov + + @pytest.mark.parametrize( 'Statistics,Regression', [ @@ -524,15 +790,31 @@ def test_equality_statistics(Statistics, Regression): @pytest.mark.parametrize( - 'ExponentialStatistics', + 'ExponentialMovingStatistics', [CoreExponentialStatistics, FastExponentialStatistics], ) -def test_equality_exponential_statistics(ExponentialStatistics): - exp_stats1 = ExponentialStatistics(0.9, iterable=range(10)) - exp_stats2 = ExponentialStatistics(0.9, iterable=range(10)) - assert exp_stats1 == exp_stats2 +def test_equality_exponential_statistics(ExponentialMovingStatistics): + exp_stats1 = ExponentialMovingStatistics(0.9, iterable=range(10)) + exp_stats2 = ExponentialMovingStatistics(0.9, iterable=range(10)) + exp_stats3 = ExponentialMovingStatistics(0.9, iterable=range(10), delay=30) + assert exp_stats1 == exp_stats2 != exp_stats3 exp_stats2.push(42) assert exp_stats1 != exp_stats2 + exp_stats3.freeze() + exp_stats3.delay = None + assert exp_stats1 == exp_stats3 + + +@pytest.mark.parametrize( + 'ExponentialMovingCovariance', + [CoreExponentialCovariance, FastExponentialCovariance], +) +def test_equality_exponential_covariance(ExponentialMovingCovariance): + exp_cov1 = ExponentialMovingCovariance(0.9, iterable=enumerate(range(10))) + exp_cov2 = ExponentialMovingCovariance(0.9, iterable=enumerate(range(10))) + assert exp_cov1 == exp_cov2 + exp_cov2.push(42, 42) + assert exp_cov1 != exp_cov2 @pytest.mark.parametrize( @@ -615,17 +897,20 @@ def test_multiply(Statistics, Regression): @pytest.mark.parametrize( - 'ExponentialStatistics', + 'ExponentialMovingStatistics', [CoreExponentialStatistics, FastExponentialStatistics], ) -def test_expoential_batch(ExponentialStatistics): +def test_exponential_statistics_batch(ExponentialMovingStatistics): random.seed(0) alpha = [random.random() for _ in range(count)] beta = [random.random() * 2 for _ in range(count)] - alpha_exp_stats = ExponentialStatistics(0.1, iterable=alpha) - beta_exp_stats = ExponentialStatistics(0.9, iterable=beta) + alpha_exp_stats = ExponentialMovingStatistics(0.1, iterable=alpha) + + assert (alpha_exp_stats * 0.5 + alpha_exp_stats * 0.5) == alpha_exp_stats + + beta_exp_stats = ExponentialMovingStatistics(0.9, iterable=beta) gamma_exp_stats = alpha_exp_stats * 0.3 + beta_exp_stats * 0.7 @@ -638,3 +923,331 @@ def test_expoential_batch(ExponentialStatistics): assert weighted_var == gamma_exp_stats.variance() assert alpha_exp_stats._decay == gamma_exp_stats._decay assert beta_exp_stats._decay != gamma_exp_stats._decay + + +@pytest.mark.parametrize( + 'ExponentialMovingCovariance', + [CoreExponentialCovariance, FastExponentialCovariance], +) +def test_exponential_covariance_batch(ExponentialMovingCovariance): + random.seed(0) + + alpha = [(random.random(), random.random()) for _ in range(count)] + beta = [(random.random() * 2, random.random() * 2) for _ in range(count)] + + alpha_exp_cov = ExponentialMovingCovariance(0.1, iterable=alpha) + + assert (alpha_exp_cov * 0.5 + alpha_exp_cov * 0.5) == alpha_exp_cov + + beta_exp_cov = ExponentialMovingCovariance(0.9, iterable=beta) + + gamma_exp_cov = alpha_exp_cov * 0.3 + beta_exp_cov * 0.7 + + weighted_cov = ( + alpha_exp_cov.covariance() * 0.3 + beta_exp_cov.covariance() * 0.7 + ) + assert weighted_cov == gamma_exp_cov.covariance() + + assert alpha_exp_cov._decay == gamma_exp_cov._decay + assert beta_exp_cov._decay != gamma_exp_cov._decay + + alpha_exp_cov *= 0.3 + beta_exp_cov *= 0.7 + assert (alpha_exp_cov + beta_exp_cov) == gamma_exp_cov + + +@pytest.mark.parametrize( + 'ExponentialMovingStatistics, decay', + list( + itertools.product( + [CoreExponentialStatistics, FastExponentialStatistics], + [0.01, 0.1, 0.3, 0.5, 0.7, 0.9, 0.99], + ) + ), +) +def test_exponential_statistics_decays(ExponentialMovingStatistics, decay): + random.seed(0) + alpha = [random.random() for _ in range(count)] + exp_stats = ExponentialMovingStatistics(decay=decay, iterable=alpha) + true_mean, true_variance = exp_mean_var(decay=decay, iterable=alpha) + + assert (error(true_mean, exp_stats.mean())) < limit + assert (error(true_mean, exp_stats.mean())) < limit + + +@pytest.mark.parametrize( + 'ExponentialMovingCovariance, decay', + list( + itertools.product( + [CoreExponentialCovariance, FastExponentialCovariance], + [0.01, 0.1, 0.3, 0.5, 0.7, 0.9, 0.99], + ) + ), +) +def test_exponential_covariance_decays(ExponentialMovingCovariance, decay): + random.seed(0) + alpha = [(random.random(), random.random()) for _ in range(count)] + exp_stats = ExponentialMovingCovariance(decay=decay, iterable=alpha) + true_cov, true_cor = exp_cov_cor(decay=decay, iterable=alpha) + + assert (error(true_cov, exp_stats.covariance())) < limit + assert (error(true_cor, exp_stats.correlation())) < limit + + +@pytest.mark.parametrize( + 'ExponentialMovingStatistics', + [CoreExponentialStatistics, FastExponentialStatistics], +) +def test_exponential_statistics_clear(ExponentialMovingStatistics): + random.seed(0) + alpha = [random.random() for _ in range(count)] + mean = 10 + variance = 100 + exp_stats = ExponentialMovingStatistics(mean=mean, variance=variance) + + for val in alpha: + exp_stats.push(val) + + assert exp_stats.mean() != mean + assert exp_stats.variance() != variance + assert math.isnan(exp_stats._current_time) + assert math.isnan(exp_stats._time_diff) + exp_stats.clear() + assert exp_stats.mean() == mean + assert exp_stats.variance() == variance + assert math.isnan(exp_stats._current_time) + assert math.isnan(exp_stats._time_diff) + + time_patch = get_time_patch(ExponentialMovingStatistics) + with time_patch as time_mock: + time_mock.time.return_value = 1000.1 + + exp_stats.delay = 60 + assert exp_stats._current_time == 1000.1 + assert math.isnan(exp_stats._time_diff) + exp_stats.freeze() + assert not math.isnan(exp_stats._time_diff) + time_mock.time.return_value = 5000.5 + exp_stats.clear() + assert exp_stats._current_time == 5000.5 + assert math.isnan(exp_stats._time_diff) + exp_stats.freeze() + time_mock.time.return_value = 100.358 + exp_stats.clear_timer() + assert exp_stats._current_time == 100.358 + assert math.isnan(exp_stats._time_diff) + + +@pytest.mark.parametrize( + 'ExponentialMovingCovariance', + [CoreExponentialCovariance, FastExponentialCovariance], +) +def test_exponential_covariance_clear(ExponentialMovingCovariance): + random.seed(0) + alpha = [(random.random(), random.random()) for _ in range(count)] + mean_x = 10 + variance_x = 100 + mean_y = 1000 + variance_y = 2 + covariance = 20 + exp_cov = ExponentialMovingCovariance( + mean_x=mean_x, + variance_x=variance_x, + mean_y=mean_y, + variance_y=variance_y, + covariance=covariance, + ) + + for x, y in alpha: + exp_cov.push(x, y) + + assert exp_cov.covariance() != covariance + assert exp_cov._xstats.mean() != mean_x + assert exp_cov._xstats.variance() != variance_x + assert exp_cov._ystats.mean() != mean_y + assert exp_cov._ystats.variance() != variance_y + exp_cov.clear() + assert exp_cov.covariance() == covariance + assert exp_cov._xstats.mean() == mean_x + assert exp_cov._xstats.variance() == variance_x + assert exp_cov._ystats.mean() == mean_y + assert exp_cov._ystats.variance() == variance_y + + +@pytest.mark.parametrize( + 'ExponentialMovingStatistics', + [CoreExponentialStatistics, FastExponentialStatistics], +) +def test_exponential_statistics_is_time(ExponentialMovingStatistics): + exp_stats = ExponentialMovingStatistics() + assert not exp_stats.is_time_based() + assert math.isnan(exp_stats.delay) + assert math.isnan(exp_stats._current_time) + assert math.isnan(exp_stats._time_diff) + exp_stats.delay = 30 + assert exp_stats.is_time_based() + assert not math.isnan(exp_stats.delay) + assert not math.isnan(exp_stats._current_time) + assert math.isnan(exp_stats._time_diff) + exp_stats = ExponentialMovingStatistics(delay=30) + assert not math.isnan(exp_stats.delay) + assert not math.isnan(exp_stats._current_time) + assert math.isnan(exp_stats._time_diff) + exp_stats.freeze() + assert not math.isnan(exp_stats._time_diff) + + +@pytest.mark.parametrize( + 'ExponentialMovingStatistics', + [CoreExponentialStatistics, FastExponentialStatistics], +) +def test_exponential_statistics_freeze_unfreeze(ExponentialMovingStatistics): + time_patch = get_time_patch(ExponentialMovingStatistics) + with time_patch as time_mock: + time_mock.time.return_value = 1000.1 + exp_stats = ExponentialMovingStatistics(delay=30) + + assert exp_stats._current_time == 1000.1 + assert math.isnan(exp_stats._time_diff) + assert not exp_stats.is_freezed() + + time_mock.time.return_value = 1010.1 + exp_stats.freeze() + assert exp_stats._time_diff == 10.0 + assert exp_stats.is_freezed() + + time_mock.time.return_value = 1110.0 + exp_stats.unfreeze() + assert exp_stats._current_time == 1100.0 + assert not exp_stats.is_freezed() + + +@pytest.mark.parametrize( + 'ExponentialMovingStatistics', + [CoreExponentialStatistics, FastExponentialStatistics], +) +def test_exponential_statistics_time_based_on_off(ExponentialMovingStatistics): + random.seed(0) + alpha = [random.random() for _ in range(count)] + exp_stats = ExponentialMovingStatistics(iterable=alpha) + + assert math.isnan(exp_stats.delay) + assert math.isnan(exp_stats._current_time) + exp_stats.delay = 30 + assert exp_stats.delay == 30 + assert not math.isnan(exp_stats._current_time) + current_time = exp_stats._current_time + exp_stats.delay = 60 + assert exp_stats.delay == 60 + assert exp_stats._current_time == current_time + exp_stats.delay = None + assert math.isnan(exp_stats.delay) + assert math.isnan(exp_stats._current_time) + + exp_stats_time_init = ExponentialMovingStatistics( + delay=300, iterable=alpha + ) + assert exp_stats_time_init.mean() == exp_stats.mean() + assert exp_stats_time_init.variance() == exp_stats.variance() + assert exp_stats_time_init.delay == 300 + assert not math.isnan(exp_stats_time_init._current_time) + + exp_stats.push(10) + exp_stats_time_init.push(10) + assert exp_stats.mean() != exp_stats_time_init.mean() + assert exp_stats.variance() != exp_stats_time_init.variance() + + +@pytest.mark.parametrize( + 'ExponentialMovingStatistics', + [CoreExponentialStatistics, FastExponentialStatistics], +) +def test_exponential_statistics_time_based_effective_decay( + ExponentialMovingStatistics, +): + time_patch = get_time_patch(ExponentialMovingStatistics) + with time_patch as time_mock: + delay = 0.5 + nominal_decay = 0.9 + + past = 100.0 + time_mock.time.return_value = past + exp_stats_time = ExponentialMovingStatistics(delay=0.5) + assert exp_stats_time._current_time == past + now = 110.0 + time_mock.time.return_value = now + expected_effective_decay = calc_effective_decay( + now - past, delay, nominal_decay + ) + true_effective_decay = exp_stats_time._effective_decay() + + assert expected_effective_decay == true_effective_decay + assert exp_stats_time._current_time == now + + later = 120.0 + time_mock.time.return_value = later + exp_stats_time.freeze() + diff = later - now + expected_effective_decay = calc_effective_decay( + diff, delay, nominal_decay + ) + true_effective_decay = exp_stats_time._effective_decay() + + assert expected_effective_decay == true_effective_decay + assert exp_stats_time._current_time == later + + +@pytest.mark.parametrize( + 'ExponentialMovingStatistics', + [CoreExponentialStatistics, FastExponentialStatistics], +) +def test_raise_if_invalid_decay_exp_stats(ExponentialMovingStatistics): + with pytest.raises(ValueError): + ExponentialMovingStatistics(0) + with pytest.raises(ValueError): + ExponentialMovingStatistics(1) + with pytest.raises(ValueError): + ExponentialMovingStatistics(-1) + with pytest.raises(ValueError): + ExponentialMovingStatistics(2) + + +@pytest.mark.parametrize( + 'ExponentialMovingCovariance', + [CoreExponentialCovariance, FastExponentialCovariance], +) +def test_raise_if_invalid_decay_exp_cov(ExponentialMovingCovariance): + with pytest.raises(ValueError): + ExponentialMovingCovariance(0) + with pytest.raises(ValueError): + ExponentialMovingCovariance(1) + with pytest.raises(ValueError): + ExponentialMovingCovariance(-1) + with pytest.raises(ValueError): + ExponentialMovingCovariance(2) + + +@pytest.mark.parametrize( + 'ExponentialMovingStatistics', + [CoreExponentialStatistics, FastExponentialStatistics], +) +def test_raise_if_not_time_exp_stats(ExponentialMovingStatistics): + exp_stats = ExponentialMovingStatistics() + exp_stats_time = ExponentialMovingStatistics(delay=60) + with pytest.raises(AttributeError): + exp_stats.clear_timer() + with pytest.raises(AttributeError): + exp_stats._effective_decay() + with pytest.raises(AttributeError): + exp_stats.freeze() + with pytest.raises(AttributeError): + exp_stats.unfreeze() + with pytest.raises(AttributeError): + exp_stats.is_freezed() + with pytest.raises(AttributeError): + exp_stats_time.unfreeze() + + with pytest.raises(ValueError): + exp_stats_time.delay = 0 + with pytest.raises(ValueError): + exp_stats_time.delay = -1